Fix segmentation fault when running a model with LSTM nodes using NNAPI Delegate

PiperOrigin-RevId: 294644644 Change-Id: Id7e7b79f40ecd09dc1de35be74d57cf06589a831
2020-02-12 05:19:54 -08:00 · 2020-02-12 05:19:54 -08:00 · d08a3cdb16
commit d08a3cdb16
parent 43c49a53ff
2 changed files with 84 additions and 29 deletions
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -2797,10 +2797,12 @@ TfLiteStatus NNAPIDelegateKernel::Map(
        // Configuring the copy from the activation, state outputs
        // to their associated inputs
        mapping_args.feedback_loops->push_back(std::make_tuple(
-            0 /*kOutputActivation*/, 1 /*kInputPrevActivation*/));
+            mapping_args.node->outputs->data[0 /*kOutputActivation*/],
+            mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));

-        mapping_args.feedback_loops->push_back(
-            std::make_tuple(1 /*kOutputState*/, 4 /*kInputPrevState*/));
+        mapping_args.feedback_loops->push_back(std::make_tuple(
+            mapping_args.node->outputs->data[1 /*kOutputState*/],
+            mapping_args.node->inputs->data[4 /*kInputPrevState*/]));

        // OUTPUTS
        // Setting only the first two since the remaining ones are
@ -2809,9 +2811,7 @@ TfLiteStatus NNAPIDelegateKernel::Map(
            mapping_args.node->outputs->data[1 /* kOutputState */], 0);

        mapping_args.builder->AddTensorOutput(
-            mapping_args.node->outputs
-                ->data[0 /* kOutputkOutputActivationState */],
-            0);
+            mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);

        *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
      } else {
@ -3389,12 +3389,10 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
    int output_tensor_idx;
    int input_tensor_idx;
    std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
-    TfLiteTensor* src =
-        &context->tensors[node->outputs->data[output_tensor_idx]];
-    TfLiteTensor* dest =
-        &context->tensors[node->inputs->data[input_tensor_idx]];
+    TfLiteTensor& src = context->tensors[output_tensor_idx];
+    TfLiteTensor& dest = context->tensors[input_tensor_idx];

-    memcpy(dest->data.raw, src->data.raw, src->bytes);
+    memcpy(dest.data.raw, src.data.raw, src.bytes);
  }

  return kTfLiteOk;
@ -3674,11 +3672,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
    // Get op type and operands
    // Fails if the Validate function failed
    int nn_op_type;
-    TF_LITE_ENSURE_STATUS(Map(context, reg->builtin_code, reg->version,
-                              target_sdk_version,
-                              {context, &builder, node, &model_state_outputs_,
-                               &model_state_tfl_inputs_, &feedback_loops_},
-                              &nn_op_type));
+    TF_LITE_ENSURE_STATUS(
+        Map(context, reg->builtin_code, reg->version, target_sdk_version,
+            {context, &builder, node, &model_state_outputs_,
+             &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
+            &nn_op_type));

    // Map outputs to NN API tensor indices.
    int output_tensor_flags = 0;
--- a/tensorflow/lite/kernels/quant_basic_lstm_test.cc
+++ b/tensorflow/lite/kernels/quant_basic_lstm_test.cc
@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <cstdint>
 #include <initializer_list>
 #include <memory>
 #include <vector>
@ -29,16 +30,20 @@ namespace {

 using ::testing::ElementsAreArray;

-class QuantizedLSTMOpModel : public SingleOpModel {
+class QuantizedLSTMOpModel : public MultiOpModel {
 public:
  QuantizedLSTMOpModel(int numBatches, int inputSize, float weightsScale,
                       int32_t weightsZeroPoint, int outputSize,
                       std::initializer_list<uint8_t> weights,
-                       std::initializer_list<int32_t> biases) {
+                       std::initializer_list<int32_t> biases,
+                       // If true the LTSM node will be preceded by a noop
+                       // one (add to 0)
+                       bool prepend_noop) {
    std::vector<uint32_t> inputs;

    input_size_ = inputSize;
    output_size_ = outputSize;
+    prepend_noop_ = prepend_noop;

    std::vector<int> input_shape{numBatches, inputSize};
    std::vector<int> output_shape{numBatches, outputSize};
@ -46,8 +51,19 @@ class QuantizedLSTMOpModel : public SingleOpModel {
    std::vector<int> state_shape{numBatches, outputSize};
    std::vector<int> bias_shape{4 * outputSize};

-    input_ =
-        AddInput({TensorType_UINT8, input_shape, 0.0f, 0.0f, 1. / 128., 128});
+    std::vector<int> lstm_inputs;
+
+    const TensorData input_tensor_data{
+        TensorType_UINT8, input_shape, 0.0f, 0.0f, 1. / 128., 128};
+
+    if (prepend_noop) {
+      zero_input_ = AddInput(input_tensor_data);
+    } else {
+      zero_input_ = 0;
+    }
+
+    input_ = AddInput(input_tensor_data);
+
    prev_output_ =
        AddInput({TensorType_UINT8, output_shape, 0.0f, 0.0f, 1. / 128., 128});
    // Biases and Weights have to be constant in order to allow NNAPI
@ -61,6 +77,8 @@ class QuantizedLSTMOpModel : public SingleOpModel {
    prev_cell_state_ =
        AddInput({TensorType_INT16, state_shape, 0.0f, 0.0f, 1. / 2048., 0});

+    sum_out_ = AddOutput(input_tensor_data);
+
    output_ =
        AddOutput({TensorType_UINT8, output_shape, 0.0f, 0.0f, 1. / 128., 128});
    cell_state_out_ =
@ -70,15 +88,40 @@ class QuantizedLSTMOpModel : public SingleOpModel {
    output_activation_temp_ =
        AddOutput({TensorType_INT16, output_shape, 0.0f, 0.0f, 1. / 128., 128});

-    SetBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions,
+    if (prepend_noop) {
+      AddBuiltinOp(
+          BuiltinOperator_ADD, BuiltinOptions_AddOptions,
+          CreateAddOptions(builder_, ActivationFunctionType_NONE).Union(),
+          {zero_input_, input_}, {sum_out_});
+
+      lstm_inputs.push_back(sum_out_);
+    } else {
+      lstm_inputs.push_back(input_);
+    }
+
+    lstm_inputs.push_back(prev_output_);
+    lstm_inputs.push_back(weights_);
+    lstm_inputs.push_back(biases_);
+    lstm_inputs.push_back(prev_cell_state_);
+
+    std::vector<int> lstm_outputs{output_, cell_state_out_, output_concat_temp_,
+                                  output_activation_temp_};
+
+    AddBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions,
                 CreateLSTMOptions(builder_, ActivationFunctionType_TANH, 0.0,
                                   0.0, LSTMKernelType_BASIC)
-                     .Union());
-
-    BuildInterpreter({GetShape(input_), GetShape(prev_output_),
-                      GetShape(weights_), GetShape(biases_),
-                      GetShape(prev_cell_state_)});
+                     .Union(),
+                 lstm_inputs, lstm_outputs);

+    if (prepend_noop) {
+      BuildInterpreter({GetShape(input_), GetShape(zero_input_),
+                        GetShape(prev_output_), GetShape(weights_),
+                        GetShape(biases_), GetShape(prev_cell_state_)});
+    } else {
+      BuildInterpreter({GetShape(input_), GetShape(prev_output_),
+                        GetShape(weights_), GetShape(biases_),
+                        GetShape(prev_cell_state_)});
+    }
    // init feedback inputs to zero
    std::vector<int16_t> initial_state(GetTensorSize(cell_state_out_), 0);
    PopulateTensor(prev_cell_state_, initial_state);
@ -92,6 +135,10 @@ class QuantizedLSTMOpModel : public SingleOpModel {

  void setInput(const std::vector<uint8_t>& input) {
    PopulateTensor(input_, input);
+    if (prepend_noop_) {
+      std::vector<uint8_t> zero(GetTensorSize(zero_input_), 128);
+      PopulateTensor(zero_input_, zero);
+    }
  }

  std::vector<uint8_t> getOutput() { return ExtractVector<uint8_t>(output_); }
@ -111,9 +158,13 @@ class QuantizedLSTMOpModel : public SingleOpModel {

  int input_size_;
  int output_size_;
+  bool prepend_noop_;
+  int zero_input_;
+  int sum_out_;
 };

-class QuantizedLstmTest : public ::testing::Test {
+class QuantizedLstmTest : public ::testing::Test,
+                          public testing::WithParamInterface<bool> {
 protected:
  void VerifyGoldens(const std::vector<std::vector<uint8_t>>& input,
                     const std::vector<std::vector<uint8_t>>& output,
@ -149,7 +200,7 @@ class QuantizedLstmTest : public ::testing::Test {
 // Inputs and weights in this test are random and the test only checks that the
 // outputs are equal to outputs obtained from running TF Lite version of
 // quantized LSTM on the same inputs.
-TEST_F(QuantizedLstmTest, BasicQuantizedLstmTest) {
+TEST_P(QuantizedLstmTest, BasicQuantizedLstmTest) {
  const int numBatches = 2;
  const int inputSize = 2;
  const int outputSize = 4;
@ -157,6 +208,8 @@ TEST_F(QuantizedLstmTest, BasicQuantizedLstmTest) {
  float weightsScale = 0.00408021;
  int weightsZeroPoint = 100;

+  bool prepend_dummy_node = GetParam();
+
  QuantizedLSTMOpModel lstm(
      numBatches, inputSize, weightsScale, weightsZeroPoint, outputSize,

@ -186,7 +239,8 @@ TEST_F(QuantizedLstmTest, BasicQuantizedLstmTest) {
       // forgetGateBias
       9206, -46884, -11693, -38724,
       // outputGateBias
-       -58999, -17050, -41852, -40538});
+       -58999, -17050, -41852, -40538},
+      prepend_dummy_node);
  // clang-format on

  // LSTM input is stored as numBatches x (sequenceLength x inputSize) vector.
@ -226,5 +280,8 @@ TEST_F(QuantizedLstmTest, BasicQuantizedLstmTest) {
  VerifyGoldens(lstmInput, lstmGoldenOutput, &lstm);
 }

+INSTANTIATE_TEST_SUITE_P(QuantizedLstmTest, QuantizedLstmTest,
+                         testing::Values(false, true));
+
 }  // namespace
 }  // namespace tflite