From e071e66f03eb6ae234212400f34061f25e79a699 Mon Sep 17 00:00:00 2001
From: Lev Proleev <levp@google.com>
Date: Tue, 23 Jun 2020 05:29:56 -0700
Subject: [PATCH] Add support for TENSOR_QUANT8_ASYMM_SIGNED in NNAPI delegate

PiperOrigin-RevId: 317846923
Change-Id: I1c61f53e89228cd2482435e9255e390864bd83e3
---
 tensorflow/lite/delegates/nnapi/BUILD         |  26 +
 .../delegates/nnapi/acceleration_test_list.cc |  14 +-
 .../lite/delegates/nnapi/nnapi_delegate.cc    | 246 +++--
 .../delegates/nnapi/nnapi_delegate_kernel.h   |   3 +
 .../nnapi/nnapi_delegate_mock_test.h          |   2 +
 ...nnapi_delegate_signed_quantization_test.cc | 920 ++++++++++++++++++
 tensorflow/lite/nnapi/NeuralNetworksTypes.h   |   1 +
 tensorflow/lite/nnapi/nnapi_handler.h         |  22 +
 8 files changed, 1154 insertions(+), 80 deletions(-)
 create mode 100644 tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc

diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD
index ec9f6907f21..beeaff1b99d 100644
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@@ -190,6 +190,32 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "nnapi_delegate_signed_quantization_test",
+    size = "small",
+    srcs = [
+        "nnapi_delegate_signed_quantization_test.cc",
+    ],
+    tags = [
+        "no_mac",
+        "no_windows",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":nnapi_delegate",
+        ":nnapi_delegate_mock_test",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:minimal_logging",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/kernels:test_util",
+        "//tensorflow/lite/nnapi:nnapi_implementation",
+        "//tensorflow/lite/nnapi:nnapi_lib",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "quant_lstm_sup_test",
     size = "small",
diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
index b20628016f0..31bdc5f8b99 100644
--- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
+++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
@@ -60,6 +60,10 @@ FloatActivationsOpTest/Elu,30
 FloatActivationsOpTest/HardSwish
 QuantizedActivationsOpTest/HardSwish
 QuantizedActivationsOpTest/HardSwishBias
+QuantizedActivationsOpTest/Relu*
+QuantizedActivationsOpTest/PRelu,29
+QuantizedActivationsOpTest/PReluSameShapes,29
+QuantizedActivationsOpTest/PReluInt8.+,30
 
 # add_test
 FloatAddOpModel/.+
@@ -145,6 +149,7 @@ ConvolutionOpTest/ConvolutionOpTest/.+/\d+
 
 # dequantize_test
 DequantizeOpTest/Uint8
+DequantizeOpTest/Int8,30
 
 # depth_to_space_test
 DepthToSpaceOpModel/Float32
@@ -190,6 +195,7 @@ QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Uin
 QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29
 HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29
 HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29
+HybridAsymmetricInputFullyConnectedOpTest.SimpleTestQuantizedUint8,29
 FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+
 QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+
 QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29
@@ -207,6 +213,7 @@ FloatGatherOpTest/LastAxis,29
 TypesGatherOpTest/Float32Int32,29
 TypesGatherOpTest/Int32Int32,29
 TypesGatherOpTest/Uint8Int32,29
+TypesGatherOpTest/Int8Int32,29
 
 # hashtable_lookup_test
 # All test excepted the string one should be accelerated
@@ -286,13 +293,18 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
 
 # quantize_test
 QuantizeOpTest/UINT8,29
+QuantizeOpTest/INT8,30
+
+# rank
 
 # reduce_test
 -Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+
 -ConstUint8(Mean|Sum)OpTest/.+
+-ConstInt8MeanOpTest.NonSpecialAxisNonSameScale
+-ConstInt8MeanOpTest.QuantizedDifferentScale
 ConstUint8(Max|Min)OpTest/.+,29
 ConstUint8(Mean)OpTest/.+
-Constint8(Mean|Max|Min)OpTest/.+
+ConstInt8(Mean|Max|Min)OpTest/.+,29
 ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29
 ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29
 ConstFloat(Mean|Any)OpTest/NotKeepDims
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index 1c35ee370c2..58ab13ab657 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -201,6 +201,7 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
     case kTfLiteBuiltinConcatenation:
     case kTfLiteBuiltinEqual:
     case kTfLiteBuiltinExpandDims:
+    case kTfLiteBuiltinGather:
     case kTfLiteBuiltinGreater:
     case kTfLiteBuiltinGreaterEqual:
     case kTfLiteBuiltinHardSwish:
@@ -377,6 +378,7 @@ bool HasZeroes(TfLiteIntArrayView array) {
 enum {
   NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
   NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
+  NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
 };
 
 // Returns the SDK level to target when delegating to the given devices.
@@ -1065,6 +1067,8 @@ class NNAPIOpBuilder {
         tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
     const bool need_int8_conversion =
         tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
+    const bool use_int8_asymm_signed =
+        tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
     if (ann_tensor_index != -1) {
       indices->push_back(ann_tensor_index);
@@ -1095,12 +1099,25 @@ class NNAPIOpBuilder {
         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
         break;
       case kTfLiteUInt8:
+        nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
+        scale = tensor->params.scale;
+        zeroPoint = tensor->params.zero_point;
+        if (scale == 0) {
+          // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
+          // NNAPI.
+          scale = 1;
+        }
+        break;
       case kTfLiteInt8:
         // If explicit int8 conversion is needed, we still need
         // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
-        nn_type = (tensor_type == kTfLiteUInt8 || need_int8_conversion)
-                      ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
-                      : ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
+        if (use_int8_asymm_signed) {
+          nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
+        } else if (need_int8_conversion) {
+          nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
+        } else {
+          nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
+        }
         scale = tensor->params.scale;
         zeroPoint = tensor->params.zero_point;
         if (tensor->quantization.type == kTfLiteAffineQuantization) {
@@ -1130,8 +1147,7 @@ class NNAPIOpBuilder {
             operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
           }
           if (scale == 0) {
-            // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
-            // with zero scale are not valid in NNAPI.
+            // QUANT8 tensors with zero scale are not valid in NNAPI.
             scale = 1;
           }
         }
@@ -1248,7 +1264,6 @@ class NNAPIOpBuilder {
             "setting new operand value", nnapi_errno_);
       }
     }
-
     indices->push_back(ann_tensor_index);
     return kTfLiteOk;
   }
@@ -1437,7 +1452,6 @@ bool NNAPIDelegateKernel::Validate(
     bool is_accelerator_specified,
     std::vector<NNAPIValidationFailure>* map_failures) {
   OpValidationContext val_ctx{true, map_failures};
-
   switch (builtin_code) {
     case kTfLiteBuiltinAdd: {
       ExpectMaxOpVersion(version, 2, &val_ctx);
@@ -1789,18 +1803,21 @@ bool NNAPIDelegateKernel::Validate(
              "Supported op versions are 1 and 2 only", &val_ctx);
 
       const auto& input = context->tensors[node->inputs->data[0]];
-      Expect(input.type != kTfLiteFloat16,
-             NNAPIValidationFailureType::kUnsupportedInputType,
-             "kTfLiteFloat16 not supported as input", &val_ctx);
+      if (android_sdk_version < kMinSdkVersionForNNAPI12) {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
 
-      const auto zero_point = input.params.zero_point;
-      Expect(input.type != kTfLiteInt8 ||
-                 (zero_point == 0 &&
-                  android_sdk_version >= kMinSdkVersionForNNAPI12),
-             NNAPIValidationFailureType::kUnsupportedInputType,
-             "NN API supports int8 type since version 1.2 but only for "
-             "symmetric quantization.",
-             &val_ctx);
+        if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
+            input.type == kTfLiteInt8) {
+          const auto zero_point = input.params.zero_point;
+          Expect(zero_point == 0,
+                 NNAPIValidationFailureType::kUnsupportedInputType,
+                 "NN API supports int8 type since version 1.2 but only for "
+                 "symmetric quantization.",
+                 &val_ctx);
+        }
+      }
     } break;
     case kTfLiteBuiltinFloor: {
       ExpectOpVersion(version, 1, &val_ctx);
@@ -2150,21 +2167,38 @@ bool NNAPIDelegateKernel::Validate(
                                  &val_ctx);
       const TfLiteType input_type =
           context->tensors[node->inputs->data[0]].type;
-      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
-                           kTfLiteUInt8);
       const TfLiteType output_type =
           context->tensors[node->outputs->data[0]].type;
-      ExpectTypeIn(output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
-                   NNAPIValidationFailureType::kUnsupportedOutputType,
-                   "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
-                   "kTfLiteUInt8.",
-                   &val_ctx);
+      if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
+        EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
+                             kTfLiteUInt8, kTfLiteInt8);
+
+        ExpectTypeIn(
+            output_type,
+            {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
+            NNAPIValidationFailureType::kUnsupportedOutputType,
+            "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
+            "kTfLiteUInt8, kTfLiteInt8.",
+            &val_ctx);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
+                             kTfLiteUInt8);
+
+        ExpectTypeIn(
+            output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
+            NNAPIValidationFailureType::kUnsupportedOutputType,
+            "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
+            "kTfLiteUInt8.",
+            &val_ctx);
+      }
     } break;
     case kTfLiteBuiltinPrelu: {
       ExpectOpVersion(version, 1, &val_ctx);
       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                  &val_ctx);
-      ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
+      const auto input_type = context->tensors[node->inputs->data[0]].type;
+      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
+                           kTfLiteInt8);
     } break;
     case kTfLiteBuiltinTile: {
       ExpectOpVersion(version, 1, &val_ctx);
@@ -2240,19 +2274,18 @@ bool NNAPIDelegateKernel::Validate(
              &val_ctx);
     } break;
     case kTfLiteBuiltinGather: {
-      ExpectOpVersion(version, 1, &val_ctx);
+      ExpectOpVersion(version, 2, &val_ctx);
       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                  &val_ctx);
       const auto input_type = context->tensors[node->inputs->data[0]].type;
       const auto& positions = context->tensors[node->inputs->data[1]];
+
       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
-                           kTfLiteInt32, kTfLiteUInt8);
-      ExpectTypeIn(positions.type,
-                   {kTfLiteFloat32, kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8},
-                   NNAPIValidationFailureType::kUnsupportedInputType,
-                   "Positions type should be one of kTfLiteFloat32, "
-                   "kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8",
-                   &val_ctx);
+                           kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
+
+      Expect(positions.type == kTfLiteInt32,
+             NNAPIValidationFailureType::kUnsupportedInputType,
+             "Positions type should be one of kTfLiteInt32", &val_ctx);
       Expect(positions.dims->size != 0,
              NNAPIValidationFailureType::kUnsupportedOperandRank,
              "0-dimension args are not supported by NNAPI.", &val_ctx);
@@ -2283,8 +2316,13 @@ bool NNAPIDelegateKernel::Validate(
                                  &val_ctx);
       // Tensor indices: split_dim: 0, value: 1
       const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
-      EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
-                           kTfLiteInt32);
+      if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
+                             kTfLiteInt8, kTfLiteInt32);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
+                             kTfLiteInt32);
+      }
       const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
              NNAPIValidationFailureType::kUnsupportedInputType,
@@ -2308,30 +2346,41 @@ bool NNAPIDelegateKernel::Validate(
              NNAPIValidationFailureType::kUnsupportedInputType,
              "Value should be Float32.", &val_ctx);
       const auto output_type = context->tensors[node->outputs->data[0]].type;
-      Expect(output_type == kTfLiteUInt8,
-             NNAPIValidationFailureType::kUnsupportedOutputType,
-             "Output should be kTfLiteUInt8.", &val_ctx);
+      if (android_sdk_version < kMinSdkVersionForNNAPI13) {
+        Expect(output_type == kTfLiteUInt8,
+               NNAPIValidationFailureType::kUnsupportedOutputType,
+               "Output should be kTfLiteUInt8.", &val_ctx);
+      } else {
+        ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
+                     NNAPIValidationFailureType::kUnsupportedOutputType,
+                     "Output should be kTfLiteUInt8.", &val_ctx);
+      }
       const auto quantization_params =
           context->tensors[node->outputs->data[0]].params;
       Expect(quantization_params.scale > 0.f,
              NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
              "Quantization scale should be > 0.", &val_ctx);
     } break;
-    case kTfLiteBuiltinReduceAny:
-    case kTfLiteBuiltinReduceMin:
-    case kTfLiteBuiltinReduceMax: {
-      ExpectOpVersion(version, 1, &val_ctx);
+    case kTfLiteBuiltinReduceAny: {
+      ExpectOpVersion(version, 2, &val_ctx);
       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                  &val_ctx);
       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
              NNAPIValidationFailureType::kUnsupportedOutputType,
              "NNAPI does not support generating a scalar as output.", &val_ctx);
-      if (builtin_code == kTfLiteBuiltinReduceProd) {
-        const auto input_type = context->tensors[node->inputs->data[0]].type;
-        Expect(input_type == kTfLiteFloat32,
-               NNAPIValidationFailureType::kUnsupportedInputType,
-               "NNAPI only supports floating point REDUCE_PROD.", &val_ctx);
-      }
+    } break;
+    case kTfLiteBuiltinReduceMin:
+    case kTfLiteBuiltinReduceMax: {
+      ExpectMaxOpVersion(version, 2, &val_ctx);
+      ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
+                                 &val_ctx);
+      const auto input_tensor = context->tensors[node->inputs->data[0]];
+      const auto input_type = input_tensor.type;
+      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
+                           kTfLiteInt8);
+      Expect(input_tensor.dims->size != 0,
+             NNAPIValidationFailureType::kUnsupportedOutputType,
+             "NNAPI does not support generating a scalar as output.", &val_ctx);
     } break;
     case kTfLiteBuiltinDepthToSpace: {
       const TfLiteType input_type =
@@ -3093,16 +3142,10 @@ TfLiteStatus NNAPIDelegateKernel::Map(
     case kTfLiteBuiltinGather: {
       auto builtin = reinterpret_cast<TfLiteGatherParams*>(
           mapping_args.node->builtin_data);
-      mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[0],
-                                           /* hybrid_op */ false,
-                                           /* scalar_as_tensor */ false);
-
       mapping_args.builder->AddScalarInt32Operand(builtin->axis);
-
       mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
                                            /* hybrid_op */ false,
-                                           /* scalar_as_tensor */ false);
-
+                                           /* tensor_flags */ 0);
       *nn_op_type = ANEURALNETWORKS_GATHER;
     } break;
     case kTfLiteBuiltinBidirectionalSequenceLstm: {
@@ -3430,6 +3473,9 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
   // absolute indices but NN api indices inputs by relative indices.
   int relative_input_index = 0;
 
+  const bool use_int8_asymm_signed =
+      target_sdk_version_ >= kMinSdkVersionForNNAPI13;
+
   size_t input_offset = 0;
   for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
     if (absolute_input_index == kTfLiteOptionalTensor) {
@@ -3472,9 +3518,16 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
           }
         } else if (tensor->type == kTfLiteInt8 &&
                    ann_type_equivalent == kTfLiteInt32) {
-          for (int i = 0; i < num_elements; ++i) {
-            reinterpret_cast<int32_t*>(input_ptr)[i] =
-                static_cast<const int32_t>(tensor->data.int8[i]) + 128;
+          if (use_int8_asymm_signed) {
+            for (int i = 0; i < num_elements; ++i) {
+              reinterpret_cast<int32_t*>(input_ptr)[i] =
+                  static_cast<const int32_t>(tensor->data.int8[i]);
+            }
+          } else {
+            for (int i = 0; i < num_elements; ++i) {
+              reinterpret_cast<int32_t*>(input_ptr)[i] =
+                  static_cast<const int32_t>(tensor->data.int8[i]) + 128;
+            }
           }
         } else {
           context->ReportError(
@@ -3685,6 +3738,15 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
                          &dequantize_mapping, &allocation_memory_mapping_,
                          &nnapi_to_tflite_op_mapping_, nn_model_.get(),
                          nnapi_errno);
+
+  // If we have target accelerators the target SDK version might be
+  // different than the current android version.
+  target_sdk_version_ = nnapi_->android_sdk_version;
+  if (!nnapi_devices_.empty()) {
+    TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
+        context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
+  }
+
   // Add Tensors.
   for (auto node_index : nodes_) {
     // Obtain the op and registration.
@@ -3696,11 +3758,18 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
     const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
     const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
     const bool need_int8_conversion =
+        target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
         NeedInt8Conversion(context, reg->builtin_code, node);
+    const bool use_int8_asymm_signed =
+        target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
+
     int input_tensor_flags = 0;
     if (scalar_as_tensor) {
       input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
     }
+    if (use_int8_asymm_signed) {
+      input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
+    }
 
     // On SDK level less than 30, h_swish will be lowered into supported NNAPI
     // operations. Since SDK level 30, h_swish is supported as a single
@@ -3807,8 +3876,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
             break;
           case kTfLiteInt8:
             if (constant_value.allocation_type == kTfLiteMmapRo) {
-              builder.AddScalarInt32Operand(
-                  static_cast<int32_t>(*constant_value.data.int8) + 128);
+              if (need_int8_conversion) {
+                builder.AddScalarInt32Operand(
+                    static_cast<int32_t>(*constant_value.data.int8) + 128);
+              } else {
+                builder.AddScalarInt32Operand(*constant_value.data.int8);
+              }
             } else {
               builder.AddSingleValueTensorAsScalarOperand(
                   constant_value_id, ANEURALNETWORKS_INT32);
@@ -3836,7 +3909,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
           // specifying the output height and width, is not added and
           // instead the height and width will be added individually as
           // scalars by the mapping function returned by Map().
-          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
+          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
+                                                       input_tensor_flags));
         }
       } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
         // The K parameter tensor is not handled here but by the functor
@@ -3844,8 +3918,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
         // the else clause below
         continue;
       } else if (reg->builtin_code == kTfLiteBuiltinGather) {
-        // Everything is added during Map since input tensors
+        // Everything else is added during Map since input tensors
         // have different order.
+        if (input_pos == 0) {
+          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
+                                                       input_tensor_flags));
+        }
         continue;
       } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
                  input_pos == 1) {
@@ -3862,7 +3940,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
         // the axis, needs to be converted to a scalar since TFLite uses a
         // tensor but NNAPI uses a scalar as the axis.
         if (input_pos == 0) {
-          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
+          TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
+                                                       input_tensor_flags));
         } else {
           const int axis_id = node->inputs->data[1];
           const TfLiteTensor& axis_tensor = context->tensors[axis_id];
@@ -3908,12 +3987,26 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
                   std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
                   operand_tensor.params, &tensor_index));
               break;
-            case kTfLiteInt8:
-              TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
-                  ANEURALNETWORKS_TENSOR_QUANT8_SYMM, operand_tensor.type, {1},
-                  std::vector<int8_t>(1, operand_tensor.data.int8[0]),
-                  operand_tensor.params, &tensor_index));
-              break;
+            case kTfLiteInt8: {
+              auto params = operand_tensor.params;
+              if (params.scale == 0.0) {
+                params.scale = 1.0;
+              }
+
+              if (use_int8_asymm_signed) {
+                TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
+                    ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
+                    operand_tensor.type, {1},
+                    std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
+                    &tensor_index));
+              } else {
+                TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
+                    ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
+                    {1},
+                    std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
+                    params, &tensor_index));
+              }
+            } break;
             case kTfLiteInt32:
               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
                   ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
@@ -3995,19 +4088,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
       }
     }
 
-    // If we have target accelerators the target SDK version might be
-    // different than the current android version.
-    int target_sdk_version = nnapi_->android_sdk_version;
-    if (!nnapi_devices_.empty()) {
-      TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
-          context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno));
-    }
-
     // Get op type and operands
     // Fails if the Validate function failed
     int nn_op_type;
     TF_LITE_ENSURE_STATUS(
-        Map(context, reg->builtin_code, reg->version, target_sdk_version,
+        Map(context, reg->builtin_code, reg->version, target_sdk_version_,
             {context, &builder, node, &model_state_outputs_,
              &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
             &nn_op_type));
@@ -4017,6 +4102,9 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
     if (need_int8_conversion) {
       output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
     }
+    if (use_int8_asymm_signed) {
+      output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
+    }
     for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
       const auto output_index = node->outputs->data[output_pos];
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
index 26822c011e3..9aa0f303cc2 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@@ -341,6 +341,9 @@ class NNAPIDelegateKernel {
 
   std::vector<int> nnapi_to_tflite_op_mapping_;
 
+  // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
+  int target_sdk_version_ = 27;  // kMinSdkVersionForNNAPI13
+
   void AddDequantizeOperatorsWhereNeeded(
       const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
       int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
index fa7ff9dd1f1..5dbe4110131 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
@@ -71,6 +71,8 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
     ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
     ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>();
     EventWaitReturns<ANEURALNETWORKS_NO_ERROR>();
+    SetPriorityReturns<ANEURALNETWORKS_NO_ERROR>();
+    SetOperandSymmPerChannelQuantParamsReturns<ANEURALNETWORKS_NO_ERROR>();
     SetNnapiSupportedDevice("test-device", android_sdk_version);
   }
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc
new file mode 100644
index 00000000000..b9d702015c2
--- /dev/null
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc
@@ -0,0 +1,920 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/fully_connected.h"
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/minimal_logging.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
+#include "tensorflow/lite/nnapi/nnapi_implementation.h"
+
+namespace tflite {
+
+namespace ops {
+namespace builtin {
+
+TfLiteRegistration* Register_CONVOLUTION_REF();
+TfLiteRegistration* Register_DEQUANTIZE();
+
+}  // namespace builtin
+}  // namespace ops
+
+namespace {
+
+class SingleOpModelWithNNAPI : public SingleOpModel {
+ public:
+  SingleOpModelWithNNAPI() = default;
+  void Init(const NnApi* nnapi) {
+    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
+    SetDelegate(stateful_delegate_.get());
+  }
+
+  StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
+
+  void SetBufferHandle(int index, TfLiteBufferHandle handle) {
+    interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get());
+  }
+  TfLiteStatus GetCompilationStatus() { return compilation_status_; }
+
+ protected:
+  std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
+  TfLiteStatus compilation_status_;
+};
+
+class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI {
+ public:
+  HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches,
+                              const TensorData& input,
+                              const TensorData& weights,
+                              const TensorData& output = {TensorType_FLOAT32},
+                              bool asymmetric_inputs = false)
+      : batches_(batches), units_(units) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+    int total_input_size = 1;
+    for (size_t i = 0; i < input.shape.size(); ++i) {
+      total_input_size *= input.shape[i];
+    }
+    input_size_ = total_input_size / batches_;
+
+    input_ = AddInput(input);
+    weights_ = AddInput(weights);
+
+    TensorData bias{TensorType_FLOAT32, {units_}};
+    bias_ = AddInput(bias);
+
+    output_ = AddOutput(output);
+
+    auto options = CreateFullyConnectedOptions(
+                       builder_, ActivationFunctionType_RELU,
+                       tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
+                       false, asymmetric_inputs)
+                       .Union();
+    SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED,
+                 BuiltinOptions_FullyConnectedOptions, options);
+    resolver_ = absl::make_unique<SingleOpResolver>(
+        BuiltinOperator_FULLY_CONNECTED,
+        ops::builtin::Register_FULLY_CONNECTED_PIE());
+    BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)},
+                     /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+  void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
+  void SetWeights(const std::vector<float>& data) {
+    SymmetricQuantizeAndPopulate(weights_, data);
+  }
+  void SetSignedWeights(std::initializer_list<float> f) {
+    SignedSymmetricQuantizeAndPopulate(weights_, f);
+  }
+
+  void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+  int input_size() { return input_size_; }
+  int num_units() { return units_; }
+  int num_batches() { return batches_; }
+
+ protected:
+  int input_;
+  int weights_;
+  int bias_;
+  int output_;
+
+  int batches_;
+  int units_;
+  int input_size_;
+};
+
+struct NnApiSignedQuantizationTest
+    : ::tflite::delegate::nnapi::NnApiDelegateMockTest {
+  static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); }
+  void SetUp() override {
+    ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
+    nnapi_mock_->StubAddOperandWith(
+        [](ANeuralNetworksModel* model,
+           const ANeuralNetworksOperandType* type) -> int {
+          const auto nn_tensor_type = type->type;
+          if (tensors_count->find(nn_tensor_type) == tensors_count->end()) {
+            tensors_count->insert({nn_tensor_type, 0});
+          }
+          tensors_count->at(nn_tensor_type)++;
+          return ANEURALNETWORKS_NO_ERROR;
+        });
+  }
+  void TearDown() override { tensors_count->clear(); }
+  static void TearDownTestSuite() {
+    delete tensors_count;
+    tensors_count = nullptr;
+  }
+  static std::map<int, int>* tensors_count;
+};
+std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr;
+
+TEST_F(NnApiSignedQuantizationTest,
+       HybridFullyConnectedMapsToSignedSymmOnSdk29) {
+  nnapi_mock_->SetAndroidSdkVersion(29);
+
+  HybridFullyConnectedOpModel m(
+      nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
+      /*input=*/{TensorType_FLOAT32, {2, 10}},
+      /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
+  m.SetSignedWeights({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+  });
+  m.SetBias({1, 2, 3});
+  m.SetInput({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            4);  // fc_input, fc_weights, fc_bias, fc_output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1);  // activation
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            1);  // dequantize_weights_input
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       HybridFullyConnectedMapsToSignedSymmOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+
+  HybridFullyConnectedOpModel m(
+      nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
+      /*input=*/{TensorType_FLOAT32, {2, 10}},
+      /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
+  m.SetSignedWeights({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 2
+  });
+  m.SetBias({1, 2, 3});
+  m.SetInput({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            4);  // fc_input, fc_weights, fc_bias, fc_output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1);  // activation
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            1);  // dequantize_weights_input
+}
+
+template <typename FilterType>
+class BaseConvolutionOpModel : public SingleOpModelWithNNAPI {
+ public:
+  BaseConvolutionOpModel(
+      const NnApi* nnapi, TfLiteRegistration* registration,
+      const TensorData& input, const TensorData& filter,
+      const TensorData& output, int stride_width = 2, int stride_height = 2,
+      enum Padding padding = Padding_VALID,
+      enum ActivationFunctionType activation = ActivationFunctionType_NONE,
+      int dilation_width_factor = 1, int dilation_height_factor = 1,
+      std::initializer_list<FilterType> filter_data = {}) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+
+    input_ = AddInput(input);
+
+    if (filter_data.size()) {
+      filter_ = AddConstInput(filter, filter_data);
+    } else {
+      filter_ = AddInput(filter);
+    }
+
+    int bias_size = GetShape(filter_)[0];
+    if (input.type == TensorType_FLOAT32) {
+      bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
+    } else {
+      // This is a quantized version. The scale of 'bias' depends on the scales
+      // of input and filter. Supposedly this is correctly set during quantized
+      // training.
+      if (filter.per_channel_quantization) {
+        // per channel quantization.
+        std::vector<float> bias_scale(
+            filter.per_channel_quantization_scales.size());
+        std::vector<int64_t> bias_zero_points(
+            filter.per_channel_quantization_scales.size());
+        for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
+             ++i) {
+          bias_scale[i] =
+              input.scale * filter.per_channel_quantization_scales[i];
+          bias_zero_points[i] = 0;
+        }
+        tflite::TensorType bias_type = TensorType_INT32;
+        if (input.type == TensorType_INT16) {
+          // In case of 16-bit, the bias type is set to be int 64.
+          bias_type = TensorType_INT64;
+        }
+        TensorData bias{bias_type,
+                        {bias_size},
+                        /*min=*/0,
+                        /*max=*/0,
+                        /*scale=*/0,
+                        /*zero_point=*/0,
+                        true,
+                        /*per_channel_quantization_scales=*/bias_scale,
+                        /*per_channel_quantization_offsets=*/bias_zero_points,
+                        /*channel_index==*/0};
+        bias_ = AddInput(bias);
+      } else {
+        // per tensor quantization.
+        auto bias_scale = GetScale(input_) * GetScale(filter_);
+        TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
+        bias_ = AddInput(bias);
+      }
+    }
+
+    output_ = AddOutput(output);
+
+    SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
+                 CreateConv2DOptions(
+                     builder_, padding, stride_width, stride_height, activation,
+                     dilation_width_factor, dilation_height_factor)
+                     .Union());
+
+    resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D,
+                                                    registration);
+    BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
+                     /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+
+ protected:
+  int input_;
+  int filter_;
+  int bias_;
+  int output_;
+};
+
+class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> {
+ public:
+  using BaseConvolutionOpModel::BaseConvolutionOpModel;
+
+  void SetInput(std::initializer_list<float> data) {
+    QuantizeAndPopulate<uint8_t>(input_, data);
+  }
+
+  void SetFilter(std::initializer_list<float> data) {
+    QuantizeAndPopulate<uint8_t>(filter_, data);
+  }
+
+  void SetBias(std::initializer_list<float> data) {
+    QuantizeAndPopulate<int32_t>(bias_, data);
+  }
+
+  std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
+                               GetScale(output_), GetZeroPoint(output_));
+  }
+};
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) {
+  QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
+                                ops::builtin::Register_CONVOLUTION_REF(),
+                                {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
+                                {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
+                                {TensorType_UINT8, {}, -127, 128});
+  m.SetInput({
+      // First batch
+      1, 1, 1, 1,  // row = 1
+      2, 2, 2, 2,  // row = 2
+      // Second batch
+      1, 2, 3, 4,  // row = 1
+      1, 2, 3, 4,  // row = 2
+  });
+  m.SetFilter({
+      1, 2, 3, 4,    // first 2x2 filter
+      -1, 1, -1, 1,  // second 2x2 filter
+      -1, -1, 1, 1,  // third 2x2 filter
+  });
+  m.SetBias({1, 2, 3});
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
+                                ops::builtin::Register_CONVOLUTION_REF(),
+                                {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
+                                {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
+                                {TensorType_UINT8, {}, -127, 128});
+  m.SetInput({
+      // First batch
+      1, 1, 1, 1,  // row = 1
+      2, 2, 2, 2,  // row = 2
+      // Second batch
+      1, 2, 3, 4,  // row = 1
+      1, 2, 3, 4,  // row = 2
+  });
+  m.SetFilter({
+      1, 2, 3, 4,    // first 2x2 filter
+      -1, 1, -1, 1,  // second 2x2 filter
+      -1, -1, 1, 1,  // third 2x2 filter
+  });
+  m.SetBias({1, 2, 3});
+
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+class PerChannelQuantizedConvolutionOpModel
+    : public BaseConvolutionOpModel<int8_t> {
+ public:
+  using BaseConvolutionOpModel::BaseConvolutionOpModel;
+
+  void SetInput(std::initializer_list<float> data) {
+    QuantizeAndPopulate<int8_t>(input_, data);
+  }
+
+  void SetFilter(std::initializer_list<float> data) {
+    PerChannelSymmetricQuantizeAndPopulate(filter_, data);
+  }
+
+  void SetBias(std::initializer_list<float> data) {
+    PerChannelQuantizeBias(bias_, data);
+  }
+
+  std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
+                              GetZeroPoint(output_));
+  }
+};
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dSignedPerTensorMapsToUnsignedOnSdk29) {
+  nnapi_mock_->SetAndroidSdkVersion(29);
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1},
+       /*per_channel_quantization_offsets=*/{0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dSignedPerTensorMapsToUnsignedOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1},
+       /*per_channel_quantization_offsets=*/{0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 3);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            3);  // input, filter, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest,
+       Conv2dSignedPerChannelMapsToUnsignedOnSdk29) {
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1, 2},
+       /*per_channel_quantization_offsets=*/{0, 0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 4);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            2);  // input, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            1);                                                   // filter
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  PerChannelQuantizedConvolutionOpModel m(
+      nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
+      {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
+      {TensorType_INT8,
+       // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+       {2, 2, 2, 2},
+       0,
+       0,
+       0,
+       0,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{1, 2},
+       /*per_channel_quantization_offsets=*/{0, 0},
+       /*channel_index=*/0},
+      {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
+      /*stride_width=*/1, /*stride_height=*/1);
+  m.SetInput({
+      // [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
+      3, 2,    // batch = 0, y = 0, x = 0
+      1, -1,   // batch = 0, y = 0, x = 1
+      -2, -3,  // batch = 0, y = 0, x = 2
+      4, 3,    // batch = 0, y = 1, x = 0
+      2, -2,   // batch = 0, y = 1, x = 1
+      -3, -4,  // batch = 0, y = 1, x = 2
+  });
+  m.SetFilter(
+      // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
+      {
+          1, 2,  // out channel = 0, y = 0, x = 0
+          3, 4,  // out channel = 0, y = 0, x = 1
+          3, 4,  // out channel = 0, y = 1, x = 0
+          5, 6,  // out channel = 0, y = 1, x = 1
+          7, 8,  // out channel = 1, y = 0, x = 0
+          5, 6,  // out channel = 1, y = 0, x = 1
+          3, 4,  // out channel = 1, y = 1, x = 0
+          1, 2,  // out channel = 1, y = 1, x = 1
+      });
+  m.SetBias({3, -2});
+
+  // Invoke and verify output.
+  // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 4);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            2);  // input, output
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
+            1);                                                   // filter
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1);  // bias
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
+            4);  //  padding, stride_width, stride_height, activation
+}
+
+class QuantizeOpModel : public SingleOpModelWithNNAPI {
+ public:
+  QuantizeOpModel(const NnApi* nnapi, const TensorData& input,
+                  const TensorData& output) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+    input_ = AddInput(input);
+    output_ = AddOutput(output);
+    SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions,
+                 CreateQuantizeOptions(builder_).Union());
+
+    BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+
+  void SetInput(std::initializer_list<float> data) {
+    PopulateTensor(input_, data);
+  }
+
+  template <typename T>
+  void SetInputAndQuantize(std::initializer_list<float> data) {
+    QuantizeAndPopulate<T>(input_, data);
+  }
+
+  template <typename T>
+  std::vector<T> GetOutput() {
+    return ExtractVector<T>(output_);
+  }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) {
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
+                    {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
+
+  m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // output
+}
+
+TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
+                    {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
+
+  m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // output
+}
+
+// Quantize with Int8 output is only supported since SDK level 30.
+TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5 zero_point=1 for INT8
+  QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
+                    {TensorType_INT8, {2, 5}, 0, 0, 0.5, -1});
+
+  m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            1);  // output
+}
+
+class DequantizeOpModel : public SingleOpModelWithNNAPI {
+ public:
+  DequantizeOpModel(const NnApi* nnapi, TensorType type,
+                    std::initializer_list<int> shape, float scale,
+                    int32_t zero_point, int version) {
+    SingleOpModelWithNNAPI::Init(nnapi);
+    const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
+    input_ = AddInput(input_tensor_data);
+    output_ = AddOutput({TensorType_FLOAT32, shape});
+    SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
+                 CreateDequantizeOptions(builder_).Union());
+
+    resolver_ = absl::make_unique<SingleOpResolver>(
+        BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(),
+        version);
+
+    BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
+                     /* allow_fp32_relax_to_fp16 */ false,
+                     /*apply_delegate=*/false);
+    compilation_status_ = ApplyDelegate();
+  }
+
+  template <typename T>
+  void SetInput(std::initializer_list<T> data) {
+    PopulateTensor(input_, data);
+  }
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ private:
+  int input_;
+  int output_;
+};
+
+TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) {
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
+                      127, 1);
+
+  m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
+                      127, 1);
+
+  m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+// Dequantize with Int8 input is only supported for symmetric quantization on
+// SDK level 29
+TEST_F(NnApiSignedQuantizationTest,
+       DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) {
+  // [-63.5, 64] -> scale=0.5, zero_point=0 for INT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0,
+                      2);
+
+  m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+// Dequantize with Int8 input is only supported since SDK level 30.
+TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) {
+  nnapi_mock_->SetAndroidSdkVersion(30);
+  // [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
+  DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1,
+                      2);
+
+  m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+
+  ASSERT_EQ(tensors_count->size(), 2);
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            tensors_count->end());
+  ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
+            tensors_count->end());
+
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
+            1);  // input
+  EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
+            1);  // output
+}
+
+}  // namespace
+}  // namespace tflite
+
+int main(int argc, char** argv) {
+  ::tflite::LogToStderr();
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
index 3c30a0479fa..8415df58b8b 100644
--- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h
+++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
@@ -46,6 +46,7 @@ enum {
   ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
   ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
   ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
+  ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
 };
 
 /**
diff --git a/tensorflow/lite/nnapi/nnapi_handler.h b/tensorflow/lite/nnapi/nnapi_handler.h
index 00c0b23e3cf..16e1e9fea10 100644
--- a/tensorflow/lite/nnapi/nnapi_handler.h
+++ b/tensorflow/lite/nnapi/nnapi_handler.h
@@ -118,6 +118,11 @@ class NnApiHandler {
            const ANeuralNetworksOperandType* type) { return Value; };
   }
 
+  void StubAddOperandWith(int(stub)(ANeuralNetworksModel* model,
+                                    const ANeuralNetworksOperandType* type)) {
+    nnapi_->ANeuralNetworksModel_addOperand = stub;
+  }
+
   template <int Value>
   void SetOperandValueReturns() {
     nnapi_->ANeuralNetworksModel_setOperandValue =
@@ -268,6 +273,23 @@ class NnApiHandler {
     };
   }
 
+  template <int Value>
+  void SetPriorityReturns() {
+    nnapi_->ANeuralNetworksCompilation_setPriority =
+        [](ANeuralNetworksCompilation* compilation, int priority) -> int {
+      return Value;
+    };
+  }
+
+  template <int Value>
+  void SetOperandSymmPerChannelQuantParamsReturns() {
+    nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams =
+        [](ANeuralNetworksModel* model, int32_t index,
+           const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) {
+          return Value;
+        };
+  }
+
   /*
    * Sets the SDK Version in the nnapi structure.
    * If set_unsupported_ops_to_null is set to true, all the functions not