Add support for TENSOR_QUANT8_ASYMM_SIGNED in NNAPI delegate

PiperOrigin-RevId: 317846923 Change-Id: I1c61f53e89228cd2482435e9255e390864bd83e3
2020-06-23 05:29:56 -07:00 · 2020-06-23 05:29:56 -07:00 · e071e66f03
commit e071e66f03
parent 7198070f4d
8 changed files with 1154 additions and 80 deletions
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@ -190,6 +190,32 @@ cc_test(
    ],
 )

+cc_test(
+    name = "nnapi_delegate_signed_quantization_test",
+    size = "small",
+    srcs = [
+        "nnapi_delegate_signed_quantization_test.cc",
+    ],
+    tags = [
+        "no_mac",
+        "no_windows",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":nnapi_delegate",
+        ":nnapi_delegate_mock_test",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:minimal_logging",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/kernels:test_util",
+        "//tensorflow/lite/nnapi:nnapi_implementation",
+        "//tensorflow/lite/nnapi:nnapi_lib",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
    name = "quant_lstm_sup_test",
    size = "small",
--- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
+++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
@ -60,6 +60,10 @@ FloatActivationsOpTest/Elu,30
 FloatActivationsOpTest/HardSwish
 QuantizedActivationsOpTest/HardSwish
 QuantizedActivationsOpTest/HardSwishBias
+QuantizedActivationsOpTest/Relu*
+QuantizedActivationsOpTest/PRelu,29
+QuantizedActivationsOpTest/PReluSameShapes,29
+QuantizedActivationsOpTest/PReluInt8.+,30

 # add_test
 FloatAddOpModel/.+
@ -145,6 +149,7 @@ ConvolutionOpTest/ConvolutionOpTest/.+/\d+

 # dequantize_test
 DequantizeOpTest/Uint8
+DequantizeOpTest/Int8,30

 # depth_to_space_test
 DepthToSpaceOpModel/Float32
@ -190,6 +195,7 @@ QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Uin
 QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29
 HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29
 HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29
+HybridAsymmetricInputFullyConnectedOpTest.SimpleTestQuantizedUint8,29
 FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+
 QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+
 QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29
@ -207,6 +213,7 @@ FloatGatherOpTest/LastAxis,29
 TypesGatherOpTest/Float32Int32,29
 TypesGatherOpTest/Int32Int32,29
 TypesGatherOpTest/Uint8Int32,29
+TypesGatherOpTest/Int8Int32,29

 # hashtable_lookup_test
 # All test excepted the string one should be accelerated
@ -286,13 +293,18 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29

 # quantize_test
 QuantizeOpTest/UINT8,29
+QuantizeOpTest/INT8,30
+
+# rank

 # reduce_test
 -Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+
 -ConstUint8(Mean|Sum)OpTest/.+
+-ConstInt8MeanOpTest.NonSpecialAxisNonSameScale
+-ConstInt8MeanOpTest.QuantizedDifferentScale
 ConstUint8(Max|Min)OpTest/.+,29
 ConstUint8(Mean)OpTest/.+
-Constint8(Mean|Max|Min)OpTest/.+
+ConstInt8(Mean|Max|Min)OpTest/.+,29
 ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29
 ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29
 ConstFloat(Mean|Any)OpTest/NotKeepDims
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -201,6 +201,7 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
    case kTfLiteBuiltinConcatenation:
    case kTfLiteBuiltinEqual:
    case kTfLiteBuiltinExpandDims:
+    case kTfLiteBuiltinGather:
    case kTfLiteBuiltinGreater:
    case kTfLiteBuiltinGreaterEqual:
    case kTfLiteBuiltinHardSwish:
@ -377,6 +378,7 @@ bool HasZeroes(TfLiteIntArrayView array) {
 enum {
  NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
  NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
+  NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
 };

 // Returns the SDK level to target when delegating to the given devices.
@ -1065,6 +1067,8 @@ class NNAPIOpBuilder {
        tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
    const bool need_int8_conversion =
        tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
+    const bool use_int8_asymm_signed =
+        tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
    int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
    if (ann_tensor_index != -1) {
      indices->push_back(ann_tensor_index);
@ -1095,12 +1099,25 @@ class NNAPIOpBuilder {
        nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
        break;
      case kTfLiteUInt8:
+        nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
+        scale = tensor->params.scale;
+        zeroPoint = tensor->params.zero_point;
+        if (scale == 0) {
+          // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
+          // NNAPI.
+          scale = 1;
+        }
+        break;
      case kTfLiteInt8:
        // If explicit int8 conversion is needed, we still need
        // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
-        nn_type = (tensor_type == kTfLiteUInt8 || need_int8_conversion)
-                      ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
-                      : ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
+        if (use_int8_asymm_signed) {
+          nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
+        } else if (need_int8_conversion) {
+          nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
+        } else {
+          nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
+        }
        scale = tensor->params.scale;
        zeroPoint = tensor->params.zero_point;
        if (tensor->quantization.type == kTfLiteAffineQuantization) {
@ -1130,8 +1147,7 @@ class NNAPIOpBuilder {
            operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
          }
          if (scale == 0) {
-            // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
-            // with zero scale are not valid in NNAPI.
+            // QUANT8 tensors with zero scale are not valid in NNAPI.
            scale = 1;
          }
        }
@ -1248,7 +1264,6 @@ class NNAPIOpBuilder {
            "setting new operand value", nnapi_errno_);
      }
    }
-
    indices->push_back(ann_tensor_index);
    return kTfLiteOk;
  }
@ -1437,7 +1452,6 @@ bool NNAPIDelegateKernel::Validate(
    bool is_accelerator_specified,
    std::vector<NNAPIValidationFailure>* map_failures) {
  OpValidationContext val_ctx{true, map_failures};
-
  switch (builtin_code) {
    case kTfLiteBuiltinAdd: {
      ExpectMaxOpVersion(version, 2, &val_ctx);
@ -1789,18 +1803,21 @@ bool NNAPIDelegateKernel::Validate(
             "Supported op versions are 1 and 2 only", &val_ctx);

      const auto& input = context->tensors[node->inputs->data[0]];
-      Expect(input.type != kTfLiteFloat16,
-             NNAPIValidationFailureType::kUnsupportedInputType,
-             "kTfLiteFloat16 not supported as input", &val_ctx);
+      if (android_sdk_version < kMinSdkVersionForNNAPI12) {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);

-      const auto zero_point = input.params.zero_point;
-      Expect(input.type != kTfLiteInt8 ||
-                 (zero_point == 0 &&
-                  android_sdk_version >= kMinSdkVersionForNNAPI12),
-             NNAPIValidationFailureType::kUnsupportedInputType,
-             "NN API supports int8 type since version 1.2 but only for "
-             "symmetric quantization.",
-             &val_ctx);
+        if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
+            input.type == kTfLiteInt8) {
+          const auto zero_point = input.params.zero_point;
+          Expect(zero_point == 0,
+                 NNAPIValidationFailureType::kUnsupportedInputType,
+                 "NN API supports int8 type since version 1.2 but only for "
+                 "symmetric quantization.",
+                 &val_ctx);
+        }
+      }
    } break;
    case kTfLiteBuiltinFloor: {
      ExpectOpVersion(version, 1, &val_ctx);
@ -2150,21 +2167,38 @@ bool NNAPIDelegateKernel::Validate(
                                 &val_ctx);
      const TfLiteType input_type =
          context->tensors[node->inputs->data[0]].type;
-      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
-                           kTfLiteUInt8);
      const TfLiteType output_type =
          context->tensors[node->outputs->data[0]].type;
-      ExpectTypeIn(output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
-                   NNAPIValidationFailureType::kUnsupportedOutputType,
-                   "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
-                   "kTfLiteUInt8.",
-                   &val_ctx);
+      if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
+        EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
+                             kTfLiteUInt8, kTfLiteInt8);
+
+        ExpectTypeIn(
+            output_type,
+            {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
+            NNAPIValidationFailureType::kUnsupportedOutputType,
+            "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
+            "kTfLiteUInt8, kTfLiteInt8.",
+            &val_ctx);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
+                             kTfLiteUInt8);
+
+        ExpectTypeIn(
+            output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
+            NNAPIValidationFailureType::kUnsupportedOutputType,
+            "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
+            "kTfLiteUInt8.",
+            &val_ctx);
+      }
    } break;
    case kTfLiteBuiltinPrelu: {
      ExpectOpVersion(version, 1, &val_ctx);
      ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                 &val_ctx);
-      ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
+      const auto input_type = context->tensors[node->inputs->data[0]].type;
+      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
+                           kTfLiteInt8);
    } break;
    case kTfLiteBuiltinTile: {
      ExpectOpVersion(version, 1, &val_ctx);
@ -2240,19 +2274,18 @@ bool NNAPIDelegateKernel::Validate(
             &val_ctx);
    } break;
    case kTfLiteBuiltinGather: {
-      ExpectOpVersion(version, 1, &val_ctx);
+      ExpectOpVersion(version, 2, &val_ctx);
      ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                 &val_ctx);
      const auto input_type = context->tensors[node->inputs->data[0]].type;
      const auto& positions = context->tensors[node->inputs->data[1]];
+
      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
-                           kTfLiteInt32, kTfLiteUInt8);
-      ExpectTypeIn(positions.type,
-                   {kTfLiteFloat32, kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8},
-                   NNAPIValidationFailureType::kUnsupportedInputType,
-                   "Positions type should be one of kTfLiteFloat32, "
-                   "kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8",
-                   &val_ctx);
+                           kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
+
+      Expect(positions.type == kTfLiteInt32,
+             NNAPIValidationFailureType::kUnsupportedInputType,
+             "Positions type should be one of kTfLiteInt32", &val_ctx);
      Expect(positions.dims->size != 0,
             NNAPIValidationFailureType::kUnsupportedOperandRank,
             "0-dimension args are not supported by NNAPI.", &val_ctx);
@ -2283,8 +2316,13 @@ bool NNAPIDelegateKernel::Validate(
                                 &val_ctx);
      // Tensor indices: split_dim: 0, value: 1
      const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
-      EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
-                           kTfLiteInt32);
+      if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
+                             kTfLiteInt8, kTfLiteInt32);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
+                             kTfLiteInt32);
+      }
      const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
      Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
             NNAPIValidationFailureType::kUnsupportedInputType,
@ -2308,30 +2346,41 @@ bool NNAPIDelegateKernel::Validate(
             NNAPIValidationFailureType::kUnsupportedInputType,
             "Value should be Float32.", &val_ctx);
      const auto output_type = context->tensors[node->outputs->data[0]].type;
-      Expect(output_type == kTfLiteUInt8,
-             NNAPIValidationFailureType::kUnsupportedOutputType,
-             "Output should be kTfLiteUInt8.", &val_ctx);
+      if (android_sdk_version < kMinSdkVersionForNNAPI13) {
+        Expect(output_type == kTfLiteUInt8,
+               NNAPIValidationFailureType::kUnsupportedOutputType,
+               "Output should be kTfLiteUInt8.", &val_ctx);
+      } else {
+        ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
+                     NNAPIValidationFailureType::kUnsupportedOutputType,
+                     "Output should be kTfLiteUInt8.", &val_ctx);
+      }
      const auto quantization_params =
          context->tensors[node->outputs->data[0]].params;
      Expect(quantization_params.scale > 0.f,
             NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
             "Quantization scale should be > 0.", &val_ctx);
    } break;
-    case kTfLiteBuiltinReduceAny:
-    case kTfLiteBuiltinReduceMin:
-    case kTfLiteBuiltinReduceMax: {
-      ExpectOpVersion(version, 1, &val_ctx);
+    case kTfLiteBuiltinReduceAny: {
+      ExpectOpVersion(version, 2, &val_ctx);
      ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                 &val_ctx);
      Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
             NNAPIValidationFailureType::kUnsupportedOutputType,
             "NNAPI does not support generating a scalar as output.", &val_ctx);
-      if (builtin_code == kTfLiteBuiltinReduceProd) {
-        const auto input_type = context->tensors[node->inputs->data[0]].type;
-        Expect(input_type == kTfLiteFloat32,
-               NNAPIValidationFailureType::kUnsupportedInputType,
-               "NNAPI only supports floating point REDUCE_PROD.", &val_ctx);
-      }
+    } break;
+    case kTfLiteBuiltinReduceMin:
+    case kTfLiteBuiltinReduceMax: {
+      ExpectMaxOpVersion(version, 2, &val_ctx);
+      ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
+                                 &val_ctx);
+      const auto input_tensor = context->tensors[node->inputs->data[0]];
+      const auto input_type = input_tensor.type;
+      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
+                           kTfLiteInt8);
+      Expect(input_tensor.dims->size != 0,
+             NNAPIValidationFailureType::kUnsupportedOutputType,
+             "NNAPI does not support generating a scalar as output.", &val_ctx);
    } break;
    case kTfLiteBuiltinDepthToSpace: {
      const TfLiteType input_type =
@ -3093,16 +3142,10 @@ TfLiteStatus NNAPIDelegateKernel::Map(
    case kTfLiteBuiltinGather: {
      auto builtin = reinterpret_cast<TfLiteGatherParams*>(
          mapping_args.node->builtin_data);
-      mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[0],
-                                           /* hybrid_op */ false,
-                                           /* scalar_as_tensor */ false);
-
      mapping_args.builder->AddScalarInt32Operand(builtin->axis);
-
      mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
                                           /* hybrid_op */ false,
-                                           /* scalar_as_tensor */ false);
-
+                                           /* tensor_flags */ 0);
      *nn_op_type = ANEURALNETWORKS_GATHER;
    } break;
    case kTfLiteBuiltinBidirectionalSequenceLstm: {
@ -3430,6 +3473,9 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
  // absolute indices but NN api indices inputs by relative indices.
  int relative_input_index = 0;

+  const bool use_int8_asymm_signed =
+      target_sdk_version_ >= kMinSdkVersionForNNAPI13;
+
  size_t input_offset = 0;
  for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
    if (absolute_input_index == kTfLiteOptionalTensor) {
@ -3472,9 +3518,16 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
          }
        } else if (tensor->type == kTfLiteInt8 &&
                   ann_type_equivalent == kTfLiteInt32) {
-          for (int i = 0; i < num_elements; ++i) {
-            reinterpret_cast<int32_t*>(input_ptr)[i] =
-                static_cast<const int32_t>(tensor->data.int8[i]) + 128;
+          if (use_int8_asymm_signed) {
+            for (int i = 0; i < num_elements; ++i) {
+              reinterpret_cast<int32_t*>(input_ptr)[i] =
+                  static_cast<const int32_t>(tensor->data.int8[i]);
+            }
+          } else {
+            for (int i = 0; i < num_elements; ++i) {
+              reinterpret_cast<int32_t*>(input_ptr)[i] =
+                  static_cast<const int32_t>(tensor->data.int8[i]) + 128;
+            }
          }
        } else {
          context->ReportError(
@ -3685,6 +3738,15 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
                         &dequantize_mapping, &allocation_memory_mapping_,
                         &nnapi_to_tflite_op_mapping_, nn_model_.get(),
                         nnapi_errno);
+
+  // If we have target accelerators the target SDK version might be
+  // different than the current android version.
+  target_sdk_version_ = nnapi_->android_sdk_version;
+  if (!nnapi_devices_.empty()) {
+    TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
+        context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
+  }
+
  // Add Tensors.
  for (auto node_index : nodes_) {
    // Obtain the op and registration.
@ -3696,11 +3758,18 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
    const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
    const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
    const bool need_int8_conversion =
+        target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
        NeedInt8Conversion(context, reg->builtin_code, node);
+    const bool use_int8_asymm_signed =
+        target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
+
    int input_tensor_flags = 0;
    if (scalar_as_tensor) {
      input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
    }
+    if (use_int8_asymm_signed) {
+      input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
+    }

    // On SDK level less than 30, h_swish will be lowered into supported NNAPI
    // operations. Since SDK level 30, h_swish is supported as a single
@ -3807,8 +3876,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
            break;
          case kTfLiteInt8:
            if (constant_value.allocation_type == kTfLiteMmapRo) {
-              builder.AddScalarInt32Operand(
-                  static_cast<int32_t>(*constant_value.data.int8) + 128);
+              if (need_int8_conversion) {
+                builder.AddScalarInt32Operand(
+                    static_cast<int32_t>(*constant_value.data.int8) + 128);
+              } else {
+                builder.AddScalarInt32Operand(*constant_value.data.int8);
+              }
            } else {
              builder.AddSingleValueTensorAsScalarOperand(
                  constant_value_id, ANEURALNETWORKS_INT32);
@ -3836,7 +3909,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
          // specifying the output height and width, is not added and
          // instead the height and width will be added individually as
          // scalars by the mapping function returned by Map().
-          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
+          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
+                                                       input_tensor_flags));
        }
      } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
        // The K parameter tensor is not handled here but by the functor
@ -3844,8 +3918,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
        // the else clause below
        continue;
      } else if (reg->builtin_code == kTfLiteBuiltinGather) {
-        // Everything is added during Map since input tensors
+        // Everything else is added during Map since input tensors
        // have different order.
+        if (input_pos == 0) {
+          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
+                                                       input_tensor_flags));
+        }
        continue;
      } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
                 input_pos == 1) {
@ -3862,7 +3940,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
        // the axis, needs to be converted to a scalar since TFLite uses a
        // tensor but NNAPI uses a scalar as the axis.
        if (input_pos == 0) {
-          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
+          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
+                                                       input_tensor_flags));
        } else {
          const int axis_id = node->inputs->data[1];
          const TfLiteTensor& axis_tensor = context->tensors[axis_id];
@ -3908,12 +3987,26 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
                  std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
                  operand_tensor.params, &tensor_index));
              break;
-            case kTfLiteInt8:
-              TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
-                  ANEURALNETWORKS_TENSOR_QUANT8_SYMM, operand_tensor.type, {1},
-                  std::vector<int8_t>(1, operand_tensor.data.int8[0]),
-                  operand_tensor.params, &tensor_index));
-              break;
+            case kTfLiteInt8: {
+              auto params = operand_tensor.params;
+              if (params.scale == 0.0) {
+                params.scale = 1.0;
+              }
+
+              if (use_int8_asymm_signed) {
+                TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
+                    ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
+                    operand_tensor.type, {1},
+                    std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
+                    &tensor_index));
+              } else {
+                TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
+                    ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
+                    {1},
+                    std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
+                    params, &tensor_index));
+              }
+            } break;
            case kTfLiteInt32:
              TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
                  ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
@ -3995,19 +4088,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
      }
    }

-    // If we have target accelerators the target SDK version might be
-    // different than the current android version.
-    int target_sdk_version = nnapi_->android_sdk_version;
-    if (!nnapi_devices_.empty()) {
-      TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
-          context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno));
-    }
-
    // Get op type and operands
    // Fails if the Validate function failed
    int nn_op_type;
    TF_LITE_ENSURE_STATUS(
-        Map(context, reg->builtin_code, reg->version, target_sdk_version,
+        Map(context, reg->builtin_code, reg->version, target_sdk_version_,
            {context, &builder, node, &model_state_outputs_,
             &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
            &nn_op_type));
@ -4017,6 +4102,9 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
    if (need_int8_conversion) {
      output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
    }
+    if (use_int8_asymm_signed) {
+      output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
+    }
    for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
      const auto output_index = node->outputs->data[output_pos];

--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@ -341,6 +341,9 @@ class NNAPIDelegateKernel {

  std::vector<int> nnapi_to_tflite_op_mapping_;

+  // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
+  int target_sdk_version_ = 27;  // kMinSdkVersionForNNAPI13
+
  void AddDequantizeOperatorsWhereNeeded(
      const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
      int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
@ -71,6 +71,8 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
    ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
    ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>();
    EventWaitReturns<ANEURALNETWORKS_NO_ERROR>();
+    SetPriorityReturns<ANEURALNETWORKS_NO_ERROR>();
+    SetOperandSymmPerChannelQuantParamsReturns<ANEURALNETWORKS_NO_ERROR>();
    SetNnapiSupportedDevice("test-device", android_sdk_version);
  }

--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc
@ -0,0 +1,920 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/fully_connected.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/minimal_logging.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
+#include "tensorflow/lite/nnapi/nnapi_implementation.h"
+
+namespace tflite {
+
+namespace ops {
+namespace builtin {
+
+TfLiteRegistration* Register_CONVOLUTION_REF();
+TfLiteRegistration* Register_DEQUANTIZE();
+
+}  // namespace builtin
+}  // namespace ops
+
+namespace {
+
+class SingleOpModelWithNNAPI : public SingleOpModel {
+ public:
+  SingleOpModelWithNNAPI() = default;
+  void Init(const NnApi* nnapi) {
+    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
+    SetDelegate(stateful_delegate_.get());
+  }
+
+  StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
+
+  void SetBufferHandle(int index, TfLiteBufferHandle handle) {
+    interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get());
+  }
+  TfLiteStatus GetCompilationStatus() { return compilation_status_; }
+
+ protected:
+  std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
+  TfLiteStatus compilation_status_;
+};
+
+class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI {
+ public:
+  HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches,
+                              const TensorData& input,
+                              const TensorData& weights,
+                              const TensorData& output = {TensorType_FLOAT32},
+                              bool asymmetric_inputs = false)
+      : batches_(batches), units_(units) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+    int total_input_size = 1;
+    for (size_t i = 0; i < input.shape.size(); ++i) {
+      total_input_size *= input.shape[i];
+    }
+    input_size_ = total_input_size / batches_;
+
+    input_ = AddInput(input);
+    weights_ = AddInput(weights);
+
+    TensorData bias{TensorType_FLOAT32, {units_}};
+    bias_ = AddInput(bias);
+
+    output_ = AddOutput(output);
+
+    auto options = CreateFullyConnectedOptions(
+                       builder_, ActivationFunctionType_RELU,
+                       tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
+                       false, asymmetric_inputs)
+                       .Union();
+    SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED,
+                 BuiltinOptions_FullyConnectedOptions, options);
+    resolver_ = absl::make_unique<SingleOpResolver>(
+        BuiltinOperator_FULLY_CONNECTED,
+        ops::builtin::Register_FULLY_CONNECTED_PIE());
+    BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)},
+                     /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+  void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
+  void SetWeights(const std::vector<float>& data) {
+    SymmetricQuantizeAndPopulate(weights_, data);
+  }
+  void SetSignedWeights(std::initializer_list<float> f) {
+    SignedSymmetricQuantizeAndPopulate(weights_, f);
+  }
+
+  void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+  int input_size() { return input_size_; }
+  int num_units() { return units_; }
+  int num_batches() { return batches_; }
+
+ protected:
+  int input_;
+  int weights_;
+  int bias_;
+  int output_;
+
+  int batches_;
+  int units_;
+  int input_size_;
+};
+
+struct NnApiSignedQuantizationTest
+    : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
+  static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); }
+  void SetUp() override {
+    ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
+    nnapi_mock_->StubAddOperandWith(
+        [](ANeuralNetworksModel* model,
+           const ANeuralNetworksOperandType* type) -> int {
+          const auto nn_tensor_type = type->type;
+          if (tensors_count->find(nn_tensor_type) == tensors_count->end()) {
+            tensors_count->insert({nn_tensor_type, 0});
+          }
+          tensors_count->at(nn_tensor_type)++;
+          return ANEURALNETWORKS_NO_ERROR;
+        });
+  }
+  void TearDown() override { tensors_count->clear(); }
+  static void TearDownTestSuite() {
+    delete tensors_count;
+    tensors_count = nullptr;
+  }
+  static std::map<int, int>* tensors_count;
+};
+std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr;
+
+TEST_F(NnApiSignedQuantizationTest,
+       HybridFullyConnectedMapsToSignedSymmOnSdk29) {
+  nnapi_mock_->SetAndroidSdkVersion(29);
+
+  HybridFullyConnectedOpModel m(
+      nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
+      /*input=*/{TensorType_FLOAT32, {2, 10}},
+      /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
+  m.SetSignedWeights({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+  });
+  m.SetBias({1, 2, 3});
+  m.SetInput({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            4);  // fc_input, fc_weights, fc_bias, fc_output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1);  // activation
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            1);  // dequantize_weights_input
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       HybridFullyConnectedMapsToSignedSymmOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+
+  HybridFullyConnectedOpModel m(
+      nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
+      /*input=*/{TensorType_FLOAT32, {2, 10}},
+      /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
+  m.SetSignedWeights({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+  });
+  m.SetBias({1, 2, 3});
+  m.SetInput({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            4);  // fc_input, fc_weights, fc_bias, fc_output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1);  // activation
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            1);  // dequantize_weights_input
+}
+
+template <typename FilterType>
+class BaseConvolutionOpModel : public SingleOpModelWithNNAPI {
+ public:
+  BaseConvolutionOpModel(
+      const NnApi* nnapi, TfLiteRegistration* registration,
+      const TensorData& input, const TensorData& filter,
+      const TensorData& output, int stride_width = 2, int stride_height = 2,
+      enum Padding padding = Padding_VALID,
+      enum ActivationFunctionType activation = ActivationFunctionType_NONE,
+      int dilation_width_factor = 1, int dilation_height_factor = 1,
+      std::initializer_list<FilterType> filter_data = {}) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+
+    input_ = AddInput(input);
+
+    if (filter_data.size()) {
+      filter_ = AddConstInput(filter, filter_data);
+    } else {
+      filter_ = AddInput(filter);
+    }
+
+    int bias_size = GetShape(filter_)[0];
+    if (input.type == TensorType_FLOAT32) {
+      bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
+    } else {
+      // This is a quantized version. The scale of 'bias' depends on the scales
+      // of input and filter. Supposedly this is correctly set during quantized
+      // training.
+      if (filter.per_channel_quantization) {
+        // per channel quantization.
+        std::vector<float> bias_scale(
+            filter.per_channel_quantization_scales.size());
+        std::vector<int64_t> bias_zero_points(
+            filter.per_channel_quantization_scales.size());
+        for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
+             ++i) {
+          bias_scale[i] =
+              input.scale * filter.per_channel_quantization_scales[i];
+          bias_zero_points[i] = 0;
+        }
+        tflite::TensorType bias_type = TensorType_INT32;
+        if (input.type == TensorType_INT16) {
+          // In case of 16-bit, the bias type is set to be int 64.
+          bias_type = TensorType_INT64;
+        }
+        TensorData bias{bias_type,
+                        {bias_size},
+                        /*min=*/0,
+                        /*max=*/0,
+                        /*scale=*/0,
+                        /*zero_point=*/0,
+                        true,
+                        /*per_channel_quantization_scales=*/bias_scale,
+                        /*per_channel_quantization_offsets=*/bias_zero_points,
+                        /*channel_index==*/0};
+        bias_ = AddInput(bias);
+      } else {
+        // per tensor quantization.
+        auto bias_scale = GetScale(input_) * GetScale(filter_);
+        TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
+        bias_ = AddInput(bias);
+      }
+    }
+
+    output_ = AddOutput(output);
+
+    SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
+                 CreateConv2DOptions(
+                     builder_, padding, stride_width, stride_height, activation,
+                     dilation_width_factor, dilation_height_factor)
+                     .Union());
+
+    resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D,
+                                                    registration);
+    BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
+                     /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+
+ protected:
+  int input_;
+  int filter_;
+  int bias_;
+  int output_;
+};
+
+class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> {
+ public:
+  using BaseConvolutionOpModel::BaseConvolutionOpModel;
+
+  void SetInput(std::initializer_list<float> data) {
+    QuantizeAndPopulate<uint8_t>(input_, data);
+  }
+
+  void SetFilter(std::initializer_list<float> data) {
+    QuantizeAndPopulate<uint8_t>(filter_, data);
+  }
+
+  void SetBias(std::initializer_list<float> data) {
+    QuantizeAndPopulate<int32_t>(bias_, data);
+  }
+
+  std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
+                               GetScale(output_), GetZeroPoint(output_));
+  }
+};
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) {
+  QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
+                                ops::builtin::Register_CONVOLUTION_REF(),
+                                {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
+                                {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
+                                {TensorType_UINT8, {}, -127, 128});
+  m.SetInput({
+      // First batch
+      1, 1, 1, 1,  // row = 1
+      2, 2, 2, 2,  // row = 2
+      // Second batch
+      1, 2, 3, 4,  // row = 1
+      1, 2, 3, 4,  // row = 2
+  });
+  m.SetFilter({
+      1, 2, 3, 4,    // first 2x2 filter
+      -1, 1, -1, 1,  // second 2x2 filter
+      -1, -1, 1, 1,  // third 2x2 filter
+  });
+  m.SetBias({1, 2, 3});
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
+                                ops::builtin::Register_CONVOLUTION_REF(),
+                                {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
+                                {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
+                                {TensorType_UINT8, {}, -127, 128});
+  m.SetInput({
+      // First batch
+      1, 1, 1, 1,  // row = 1
+      2, 2, 2, 2,  // row = 2
+      // Second batch
+      1, 2, 3, 4,  // row = 1
+      1, 2, 3, 4,  // row = 2
+  });
+  m.SetFilter({
+      1, 2, 3, 4,    // first 2x2 filter
+      -1, 1, -1, 1,  // second 2x2 filter
+      -1, -1, 1, 1,  // third 2x2 filter
+  });
+  m.SetBias({1, 2, 3});
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+class PerChannelQuantizedConvolutionOpModel
+    : public BaseConvolutionOpModel<int8_t> {
+ public:
+  using BaseConvolutionOpModel::BaseConvolutionOpModel;
+
+  void SetInput(std::initializer_list<float> data) {
+    QuantizeAndPopulate<int8_t>(input_, data);
+  }
+
+  void SetFilter(std::initializer_list<float> data) {
+    PerChannelSymmetricQuantizeAndPopulate(filter_, data);
+  }
+
+  void SetBias(std::initializer_list<float> data) {
+    PerChannelQuantizeBias(bias_, data);
+  }
+
+  std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
+                              GetZeroPoint(output_));
+  }
+};
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dSignedPerTensorMapsToUnsignedOnSdk29) {
+  nnapi_mock_->SetAndroidSdkVersion(29);
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1},
+       /*per_channel_quantization_offsets=*/{0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dSignedPerTensorMapsToUnsignedOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1},
+       /*per_channel_quantization_offsets=*/{0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dSignedPerChannelMapsToUnsignedOnSdk29) {
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1, 2},
+       /*per_channel_quantization_offsets=*/{0, 0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 4);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            2);  // input, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            1);                                                   // filter
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1, 2},
+       /*per_channel_quantization_offsets=*/{0, 0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 4);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            2);  // input, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            1);                                                   // filter
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+class QuantizeOpModel : public SingleOpModelWithNNAPI {
+ public:
+  QuantizeOpModel(const NnApi* nnapi, const TensorData& input,
+                  const TensorData& output) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+    input_ = AddInput(input);
+    output_ = AddOutput(output);
+    SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions,
+                 CreateQuantizeOptions(builder_).Union());
+
+    BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor(input_, data);
+  }
+
+  template <typename T>
+  void SetInputAndQuantize(std::initializer_list<float> data) {
+    QuantizeAndPopulate<T>(input_, data);
+  }
+
+  template <typename T>
+  std::vector<T> GetOutput() {
+    return ExtractVector<T>(output_);
+  }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) {
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
+                    {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
+
+  m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // output
+}
+
+TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
+                    {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
+
+  m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // output
+}
+
+// Quantize with Int8 output is only supported since SDK level 30.
+TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5 zero_point=1 for INT8
+  QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
+                    {TensorType_INT8, {2, 5}, 0, 0, 0.5, -1});
+
+  m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            1);  // output
+}
+
+class DequantizeOpModel : public SingleOpModelWithNNAPI {
+ public:
+  DequantizeOpModel(const NnApi* nnapi, TensorType type,
+                    std::initializer_list<int> shape, float scale,
+                    int32_t zero_point, int version) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+    const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
+    input_ = AddInput(input_tensor_data);
+    output_ = AddOutput({TensorType_FLOAT32, shape});
+    SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
+                 CreateDequantizeOptions(builder_).Union());
+
+    resolver_ = absl::make_unique<SingleOpResolver>(
+        BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(),
+        version);
+
+    BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+
+  template <typename T>
+  void SetInput(std::initializer_list<T> data) {
+    PopulateTensor(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) {
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
+                      127, 1);
+
+  m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
+                      127, 1);
+
+  m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+// Dequantize with Int8 input is only supported for symmetric quantization on
+// SDK level 29
+TEST_F(NnApiSignedQuantizationTest,
+       DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) {
+  // [-63.5, 64] -> scale=0.5, zero_point=0 for INT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0,
+                      2);
+
+  m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+// Dequantize with Int8 input is only supported since SDK level 30.
+TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1,
+                      2);
+
+  m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
--- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h
+++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
@ -46,6 +46,7 @@ enum {
  ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
  ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
  ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
+  ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
 };

 /**
--- a/tensorflow/lite/nnapi/nnapi_handler.h
+++ b/tensorflow/lite/nnapi/nnapi_handler.h
@ -118,6 +118,11 @@ class NnApiHandler {
           const ANeuralNetworksOperandType* type) { return Value; };
  }

+  void StubAddOperandWith(int(stub)(ANeuralNetworksModel* model,
+                                    const ANeuralNetworksOperandType* type)) {
+    nnapi_->ANeuralNetworksModel_addOperand = stub;
+  }
+
  template <int Value>
  void SetOperandValueReturns() {
    nnapi_->ANeuralNetworksModel_setOperandValue =
@ -268,6 +273,23 @@ class NnApiHandler {
    };
  }

+  template <int Value>
+  void SetPriorityReturns() {
+    nnapi_->ANeuralNetworksCompilation_setPriority =
+        [](ANeuralNetworksCompilation* compilation, int priority) -> int {
+      return Value;
+    };
+  }
+
+  template <int Value>
+  void SetOperandSymmPerChannelQuantParamsReturns() {
+    nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams =
+        [](ANeuralNetworksModel* model, int32_t index,
+           const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) {
+          return Value;
+        };
+  }
+
  /*
   * Sets the SDK Version in the nnapi structure.
   * If set_unsupported_ops_to_null is set to true, all the functions not