Add NNAPI Delegation support for requantization use cases (transforming a quantized tensor into another quantized one with different quantization parameters) by converting the operation into a dequantize-quantize pair.

PiperOrigin-RevId: 325816400 Change-Id: I55f8726f0478e9795c667a9cf4eddda084ed95a7
2020-08-10 09:09:59 -07:00 · 2020-08-10 09:09:59 -07:00 · 4ed4c14e4c
commit 4ed4c14e4c
parent 2c6f7e24dd
4 changed files with 24 additions and 4 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@ -128,6 +128,7 @@
      * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
    * Deprecate `Interpreter::UseNNAPI(bool)` C++ API
      * Prefer using `NnApiDelegate()` and related delegate configuration methods directly.
    * Add NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair.
    * <ADD RELEASE NOTES HERE>
 *   `tf.random`:
    * <ADD RELEASE NOTES HERE>
--- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
+++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc
@ -309,6 +309,8 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
 # quantize_test
 QuantizeOpTest/UINT8,29
 QuantizeOpTest/UInt8UInt8.+,29
 QuantizeOpTest/Int8Int8.+,30
 QuantizeOpTest/INT8,30
 # rank
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -2436,13 +2436,20 @@ bool NNAPIDelegateKernel::Validate(
             "Input should be Float32.", &val_ctx);
    } break;
    case kTfLiteBuiltinQuantize: {
-      ExpectOpVersion(version, 1, &val_ctx);
+      ExpectMaxOpVersion(version, 2, &val_ctx);
      ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
                                 &val_ctx);
      const auto value_type = context->tensors[node->inputs->data[0]].type;
-      Expect(value_type == kTfLiteFloat32,
+      Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
             NNAPIValidationFailureType::kUnsupportedInputType,
-             "Value should be Float32.", &val_ctx);
+             "Value should be quantized or Float32.", &val_ctx);
      if (IsQuantized(value_type)) {
        const auto quantization_params =
            context->tensors[node->inputs->data[0]].params;
        Expect(quantization_params.scale > 0.f,
               NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
               "Quantization scale should be > 0.", &val_ctx);
      }
      const auto output_type = context->tensors[node->outputs->data[0]].type;
      if (android_sdk_version < kMinSdkVersionForNNAPI13) {
        Expect(output_type == kTfLiteUInt8,
@ -3284,6 +3291,15 @@ TfLiteStatus NNAPIDelegateKernel::Map(
      *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
    } break;
    case kTfLiteBuiltinQuantize: {
      auto input_index = mapping_args.node->inputs->data[0];
      // NNAPI doesn't support requantization cases but only quantizations
      // from float. Dequantizing our input adding a Dequantize node before
      // this one.
      if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
        mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
                                            mapping_args.node_index);
      }
      *nn_op_type = ANEURALNETWORKS_QUANTIZE;
    } break;
    case kTfLiteBuiltinReduceAny: {
@ -4254,7 +4270,7 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
    int nn_op_type;
    TF_LITE_ENSURE_STATUS(
        Map(context, reg->builtin_code, reg->version, target_sdk_version_,
-            {context, &builder, node, &model_state_outputs_,
+            {context, &builder, node, node_index, &model_state_outputs_,
             &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
            &nn_op_type));
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@ -111,6 +111,7 @@ struct NNAPIOpMappingArgs {
  TfLiteContext* context;
  NNAPIOpBuilder* builder;
  TfLiteNode* node;
  int node_index;
  std::vector<int>* model_state_outputs;
  std::vector<int>* model_state_tfl_inputs;
  std::vector<std::tuple<int, int>>* feedback_loops;