From e071e66f03eb6ae234212400f34061f25e79a699 Mon Sep 17 00:00:00 2001 From: Lev Proleev <levp@google.com> Date: Tue, 23 Jun 2020 05:29:56 -0700 Subject: [PATCH] Add support for TENSOR_QUANT8_ASYMM_SIGNED in NNAPI delegate PiperOrigin-RevId: 317846923 Change-Id: I1c61f53e89228cd2482435e9255e390864bd83e3 --- tensorflow/lite/delegates/nnapi/BUILD | 26 + .../delegates/nnapi/acceleration_test_list.cc | 14 +- .../lite/delegates/nnapi/nnapi_delegate.cc | 246 +++-- .../delegates/nnapi/nnapi_delegate_kernel.h | 3 + .../nnapi/nnapi_delegate_mock_test.h | 2 + ...nnapi_delegate_signed_quantization_test.cc | 920 ++++++++++++++++++ tensorflow/lite/nnapi/NeuralNetworksTypes.h | 1 + tensorflow/lite/nnapi/nnapi_handler.h | 22 + 8 files changed, 1154 insertions(+), 80 deletions(-) create mode 100644 tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD index ec9f6907f21..beeaff1b99d 100644 --- a/tensorflow/lite/delegates/nnapi/BUILD +++ b/tensorflow/lite/delegates/nnapi/BUILD @@ -190,6 +190,32 @@ cc_test( ], ) +cc_test( + name = "nnapi_delegate_signed_quantization_test", + size = "small", + srcs = [ + "nnapi_delegate_signed_quantization_test.cc", + ], + tags = [ + "no_mac", + "no_windows", + "tflite_not_portable_ios", + ], + deps = [ + ":nnapi_delegate", + ":nnapi_delegate_mock_test", + "//tensorflow/lite:framework", + "//tensorflow/lite:kernel_api", + "//tensorflow/lite:minimal_logging", + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/kernels:test_util", + "//tensorflow/lite/nnapi:nnapi_implementation", + "//tensorflow/lite/nnapi:nnapi_lib", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "quant_lstm_sup_test", size = "small", diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index b20628016f0..31bdc5f8b99 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -60,6 +60,10 @@ FloatActivationsOpTest/Elu,30 FloatActivationsOpTest/HardSwish QuantizedActivationsOpTest/HardSwish QuantizedActivationsOpTest/HardSwishBias +QuantizedActivationsOpTest/Relu* +QuantizedActivationsOpTest/PRelu,29 +QuantizedActivationsOpTest/PReluSameShapes,29 +QuantizedActivationsOpTest/PReluInt8.+,30 # add_test FloatAddOpModel/.+ @@ -145,6 +149,7 @@ ConvolutionOpTest/ConvolutionOpTest/.+/\d+ # dequantize_test DequantizeOpTest/Uint8 +DequantizeOpTest/Int8,30 # depth_to_space_test DepthToSpaceOpModel/Float32 @@ -190,6 +195,7 @@ QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Uin QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29 HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29 HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29 +HybridAsymmetricInputFullyConnectedOpTest.SimpleTestQuantizedUint8,29 FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+ QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+ QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29 @@ -207,6 +213,7 @@ FloatGatherOpTest/LastAxis,29 TypesGatherOpTest/Float32Int32,29 TypesGatherOpTest/Int32Int32,29 TypesGatherOpTest/Uint8Int32,29 +TypesGatherOpTest/Int8Int32,29 # hashtable_lookup_test # All test excepted the string one should be accelerated @@ -286,13 +293,18 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29 # quantize_test QuantizeOpTest/UINT8,29 +QuantizeOpTest/INT8,30 + +# rank # reduce_test -Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+ -ConstUint8(Mean|Sum)OpTest/.+ +-ConstInt8MeanOpTest.NonSpecialAxisNonSameScale +-ConstInt8MeanOpTest.QuantizedDifferentScale ConstUint8(Max|Min)OpTest/.+,29 ConstUint8(Mean)OpTest/.+ -Constint8(Mean|Max|Min)OpTest/.+ +ConstInt8(Mean|Max|Min)OpTest/.+,29 ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29 ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29 ConstFloat(Mean|Any)OpTest/NotKeepDims diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 1c35ee370c2..58ab13ab657 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -201,6 +201,7 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code, case kTfLiteBuiltinConcatenation: case kTfLiteBuiltinEqual: case kTfLiteBuiltinExpandDims: + case kTfLiteBuiltinGather: case kTfLiteBuiltinGreater: case kTfLiteBuiltinGreaterEqual: case kTfLiteBuiltinHardSwish: @@ -377,6 +378,7 @@ bool HasZeroes(TfLiteIntArrayView array) { enum { NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0, NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1, + NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2, }; // Returns the SDK level to target when delegating to the given devices. @@ -1065,6 +1067,8 @@ class NNAPIOpBuilder { tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR; const bool need_int8_conversion = tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION; + const bool use_int8_asymm_signed = + tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED; int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index); if (ann_tensor_index != -1) { indices->push_back(ann_tensor_index); @@ -1095,12 +1099,25 @@ class NNAPIOpBuilder { nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; break; case kTfLiteUInt8: + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; + scale = tensor->params.scale; + zeroPoint = tensor->params.zero_point; + if (scale == 0) { + // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in + // NNAPI. + scale = 1; + } + break; case kTfLiteInt8: // If explicit int8 conversion is needed, we still need // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type. - nn_type = (tensor_type == kTfLiteUInt8 || need_int8_conversion) - ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM - : ANEURALNETWORKS_TENSOR_QUANT8_SYMM; + if (use_int8_asymm_signed) { + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED; + } else if (need_int8_conversion) { + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; + } else { + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM; + } scale = tensor->params.scale; zeroPoint = tensor->params.zero_point; if (tensor->quantization.type == kTfLiteAffineQuantization) { @@ -1130,8 +1147,7 @@ class NNAPIOpBuilder { operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8); } if (scale == 0) { - // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM - // with zero scale are not valid in NNAPI. + // QUANT8 tensors with zero scale are not valid in NNAPI. scale = 1; } } @@ -1248,7 +1264,6 @@ class NNAPIOpBuilder { "setting new operand value", nnapi_errno_); } } - indices->push_back(ann_tensor_index); return kTfLiteOk; } @@ -1437,7 +1452,6 @@ bool NNAPIDelegateKernel::Validate( bool is_accelerator_specified, std::vector<NNAPIValidationFailure>* map_failures) { OpValidationContext val_ctx{true, map_failures}; - switch (builtin_code) { case kTfLiteBuiltinAdd: { ExpectMaxOpVersion(version, 2, &val_ctx); @@ -1789,18 +1803,21 @@ bool NNAPIDelegateKernel::Validate( "Supported op versions are 1 and 2 only", &val_ctx); const auto& input = context->tensors[node->inputs->data[0]]; - Expect(input.type != kTfLiteFloat16, - NNAPIValidationFailureType::kUnsupportedInputType, - "kTfLiteFloat16 not supported as input", &val_ctx); + if (android_sdk_version < kMinSdkVersionForNNAPI12) { + EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8); + } else { + EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8); - const auto zero_point = input.params.zero_point; - Expect(input.type != kTfLiteInt8 || - (zero_point == 0 && - android_sdk_version >= kMinSdkVersionForNNAPI12), - NNAPIValidationFailureType::kUnsupportedInputType, - "NN API supports int8 type since version 1.2 but only for " - "symmetric quantization.", - &val_ctx); + if (android_sdk_version == kMinSdkVersionForNNAPI12 && + input.type == kTfLiteInt8) { + const auto zero_point = input.params.zero_point; + Expect(zero_point == 0, + NNAPIValidationFailureType::kUnsupportedInputType, + "NN API supports int8 type since version 1.2 but only for " + "symmetric quantization.", + &val_ctx); + } + } } break; case kTfLiteBuiltinFloor: { ExpectOpVersion(version, 1, &val_ctx); @@ -2150,21 +2167,38 @@ bool NNAPIDelegateKernel::Validate( &val_ctx); const TfLiteType input_type = context->tensors[node->inputs->data[0]].type; - EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, - kTfLiteUInt8); const TfLiteType output_type = context->tensors[node->outputs->data[0]].type; - ExpectTypeIn(output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8}, - NNAPIValidationFailureType::kUnsupportedOutputType, - "Output type should be one of kTfLiteFloat32, kTfLiteInt32, " - "kTfLiteUInt8.", - &val_ctx); + if (android_sdk_version >= kMinSdkVersionForNNAPI13) { + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, + kTfLiteUInt8, kTfLiteInt8); + + ExpectTypeIn( + output_type, + {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8}, + NNAPIValidationFailureType::kUnsupportedOutputType, + "Output type should be one of kTfLiteFloat32, kTfLiteInt32, " + "kTfLiteUInt8, kTfLiteInt8.", + &val_ctx); + } else { + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32, + kTfLiteUInt8); + + ExpectTypeIn( + output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8}, + NNAPIValidationFailureType::kUnsupportedOutputType, + "Output type should be one of kTfLiteFloat32, kTfLiteInt32, " + "kTfLiteUInt8.", + &val_ctx); + } } break; case kTfLiteBuiltinPrelu: { ExpectOpVersion(version, 1, &val_ctx); ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, &val_ctx); - ExpectIsFloatOrUint8Operator(context, node, &val_ctx); + const auto input_type = context->tensors[node->inputs->data[0]].type; + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, + kTfLiteInt8); } break; case kTfLiteBuiltinTile: { ExpectOpVersion(version, 1, &val_ctx); @@ -2240,19 +2274,18 @@ bool NNAPIDelegateKernel::Validate( &val_ctx); } break; case kTfLiteBuiltinGather: { - ExpectOpVersion(version, 1, &val_ctx); + ExpectOpVersion(version, 2, &val_ctx); ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, &val_ctx); const auto input_type = context->tensors[node->inputs->data[0]].type; const auto& positions = context->tensors[node->inputs->data[1]]; + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16, - kTfLiteInt32, kTfLiteUInt8); - ExpectTypeIn(positions.type, - {kTfLiteFloat32, kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8}, - NNAPIValidationFailureType::kUnsupportedInputType, - "Positions type should be one of kTfLiteFloat32, " - "kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8", - &val_ctx); + kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8); + + Expect(positions.type == kTfLiteInt32, + NNAPIValidationFailureType::kUnsupportedInputType, + "Positions type should be one of kTfLiteInt32", &val_ctx); Expect(positions.dims->size != 0, NNAPIValidationFailureType::kUnsupportedOperandRank, "0-dimension args are not supported by NNAPI.", &val_ctx); @@ -2283,8 +2316,13 @@ bool NNAPIDelegateKernel::Validate( &val_ctx); // Tensor indices: split_dim: 0, value: 1 const TfLiteTensor& input = context->tensors[node->inputs->data[1]]; - EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8, - kTfLiteInt32); + if (android_sdk_version >= kMinSdkVersionForNNAPI13) { + EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8, + kTfLiteInt8, kTfLiteInt32); + } else { + EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8, + kTfLiteInt32); + } const TfLiteTensor& axis = context->tensors[node->inputs->data[0]]; Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo, NNAPIValidationFailureType::kUnsupportedInputType, @@ -2308,30 +2346,41 @@ bool NNAPIDelegateKernel::Validate( NNAPIValidationFailureType::kUnsupportedInputType, "Value should be Float32.", &val_ctx); const auto output_type = context->tensors[node->outputs->data[0]].type; - Expect(output_type == kTfLiteUInt8, - NNAPIValidationFailureType::kUnsupportedOutputType, - "Output should be kTfLiteUInt8.", &val_ctx); + if (android_sdk_version < kMinSdkVersionForNNAPI13) { + Expect(output_type == kTfLiteUInt8, + NNAPIValidationFailureType::kUnsupportedOutputType, + "Output should be kTfLiteUInt8.", &val_ctx); + } else { + ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8}, + NNAPIValidationFailureType::kUnsupportedOutputType, + "Output should be kTfLiteUInt8.", &val_ctx); + } const auto quantization_params = context->tensors[node->outputs->data[0]].params; Expect(quantization_params.scale > 0.f, NNAPIValidationFailureType::kUnsupportedQuantizationParameters, "Quantization scale should be > 0.", &val_ctx); } break; - case kTfLiteBuiltinReduceAny: - case kTfLiteBuiltinReduceMin: - case kTfLiteBuiltinReduceMax: { - ExpectOpVersion(version, 1, &val_ctx); + case kTfLiteBuiltinReduceAny: { + ExpectOpVersion(version, 2, &val_ctx); ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, &val_ctx); Expect(context->tensors[node->outputs->data[0]].dims->size != 0, NNAPIValidationFailureType::kUnsupportedOutputType, "NNAPI does not support generating a scalar as output.", &val_ctx); - if (builtin_code == kTfLiteBuiltinReduceProd) { - const auto input_type = context->tensors[node->inputs->data[0]].type; - Expect(input_type == kTfLiteFloat32, - NNAPIValidationFailureType::kUnsupportedInputType, - "NNAPI only supports floating point REDUCE_PROD.", &val_ctx); - } + } break; + case kTfLiteBuiltinReduceMin: + case kTfLiteBuiltinReduceMax: { + ExpectMaxOpVersion(version, 2, &val_ctx); + ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, + &val_ctx); + const auto input_tensor = context->tensors[node->inputs->data[0]]; + const auto input_type = input_tensor.type; + EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8, + kTfLiteInt8); + Expect(input_tensor.dims->size != 0, + NNAPIValidationFailureType::kUnsupportedOutputType, + "NNAPI does not support generating a scalar as output.", &val_ctx); } break; case kTfLiteBuiltinDepthToSpace: { const TfLiteType input_type = @@ -3093,16 +3142,10 @@ TfLiteStatus NNAPIDelegateKernel::Map( case kTfLiteBuiltinGather: { auto builtin = reinterpret_cast<TfLiteGatherParams*>( mapping_args.node->builtin_data); - mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[0], - /* hybrid_op */ false, - /* scalar_as_tensor */ false); - mapping_args.builder->AddScalarInt32Operand(builtin->axis); - mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1], /* hybrid_op */ false, - /* scalar_as_tensor */ false); - + /* tensor_flags */ 0); *nn_op_type = ANEURALNETWORKS_GATHER; } break; case kTfLiteBuiltinBidirectionalSequenceLstm: { @@ -3430,6 +3473,9 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, // absolute indices but NN api indices inputs by relative indices. int relative_input_index = 0; + const bool use_int8_asymm_signed = + target_sdk_version_ >= kMinSdkVersionForNNAPI13; + size_t input_offset = 0; for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) { if (absolute_input_index == kTfLiteOptionalTensor) { @@ -3472,9 +3518,16 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, } } else if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteInt32) { - for (int i = 0; i < num_elements; ++i) { - reinterpret_cast<int32_t*>(input_ptr)[i] = - static_cast<const int32_t>(tensor->data.int8[i]) + 128; + if (use_int8_asymm_signed) { + for (int i = 0; i < num_elements; ++i) { + reinterpret_cast<int32_t*>(input_ptr)[i] = + static_cast<const int32_t>(tensor->data.int8[i]); + } + } else { + for (int i = 0; i < num_elements; ++i) { + reinterpret_cast<int32_t*>(input_ptr)[i] = + static_cast<const int32_t>(tensor->data.int8[i]) + 128; + } } } else { context->ReportError( @@ -3685,6 +3738,15 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, &dequantize_mapping, &allocation_memory_mapping_, &nnapi_to_tflite_op_mapping_, nn_model_.get(), nnapi_errno); + + // If we have target accelerators the target SDK version might be + // different than the current android version. + target_sdk_version_ = nnapi_->android_sdk_version; + if (!nnapi_devices_.empty()) { + TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( + context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno)); + } + // Add Tensors. for (auto node_index : nodes_) { // Obtain the op and registration. @@ -3696,11 +3758,18 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node); const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code); const bool need_int8_conversion = + target_sdk_version_ < kMinSdkVersionForNNAPI13 && NeedInt8Conversion(context, reg->builtin_code, node); + const bool use_int8_asymm_signed = + target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op; + int input_tensor_flags = 0; if (scalar_as_tensor) { input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR; } + if (use_int8_asymm_signed) { + input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED; + } // On SDK level less than 30, h_swish will be lowered into supported NNAPI // operations. Since SDK level 30, h_swish is supported as a single @@ -3807,8 +3876,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, break; case kTfLiteInt8: if (constant_value.allocation_type == kTfLiteMmapRo) { - builder.AddScalarInt32Operand( - static_cast<int32_t>(*constant_value.data.int8) + 128); + if (need_int8_conversion) { + builder.AddScalarInt32Operand( + static_cast<int32_t>(*constant_value.data.int8) + 128); + } else { + builder.AddScalarInt32Operand(*constant_value.data.int8); + } } else { builder.AddSingleValueTensorAsScalarOperand( constant_value_id, ANEURALNETWORKS_INT32); @@ -3836,7 +3909,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, // specifying the output height and width, is not added and // instead the height and width will be added individually as // scalars by the mapping function returned by Map(). - TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op)); + TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, + input_tensor_flags)); } } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) { // The K parameter tensor is not handled here but by the functor @@ -3844,8 +3918,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, // the else clause below continue; } else if (reg->builtin_code == kTfLiteBuiltinGather) { - // Everything is added during Map since input tensors + // Everything else is added during Map since input tensors // have different order. + if (input_pos == 0) { + TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, + input_tensor_flags)); + } continue; } else if (reg->builtin_code == kTfLiteBuiltinExpandDims && input_pos == 1) { @@ -3862,7 +3940,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, // the axis, needs to be converted to a scalar since TFLite uses a // tensor but NNAPI uses a scalar as the axis. if (input_pos == 0) { - TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op)); + TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op, + input_tensor_flags)); } else { const int axis_id = node->inputs->data[1]; const TfLiteTensor& axis_tensor = context->tensors[axis_id]; @@ -3908,12 +3987,26 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, std::vector<uint8_t>(1, operand_tensor.data.uint8[0]), operand_tensor.params, &tensor_index)); break; - case kTfLiteInt8: - TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( - ANEURALNETWORKS_TENSOR_QUANT8_SYMM, operand_tensor.type, {1}, - std::vector<int8_t>(1, operand_tensor.data.int8[0]), - operand_tensor.params, &tensor_index)); - break; + case kTfLiteInt8: { + auto params = operand_tensor.params; + if (params.scale == 0.0) { + params.scale = 1.0; + } + + if (use_int8_asymm_signed) { + TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, + operand_tensor.type, {1}, + std::vector<int8_t>(1, operand_tensor.data.int8[0]), params, + &tensor_index)); + } else { + TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, + {1}, + std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128), + params, &tensor_index)); + } + } break; case kTfLiteInt32: TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor( ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1}, @@ -3995,19 +4088,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, } } - // If we have target accelerators the target SDK version might be - // different than the current android version. - int target_sdk_version = nnapi_->android_sdk_version; - if (!nnapi_devices_.empty()) { - TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( - context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno)); - } - // Get op type and operands // Fails if the Validate function failed int nn_op_type; TF_LITE_ENSURE_STATUS( - Map(context, reg->builtin_code, reg->version, target_sdk_version, + Map(context, reg->builtin_code, reg->version, target_sdk_version_, {context, &builder, node, &model_state_outputs_, &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno}, &nn_op_type)); @@ -4017,6 +4102,9 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, if (need_int8_conversion) { output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION; } + if (use_int8_asymm_signed) { + output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED; + } for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) { const auto output_index = node->outputs->data[output_pos]; diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h index 26822c011e3..9aa0f303cc2 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h @@ -341,6 +341,9 @@ class NNAPIDelegateKernel { std::vector<int> nnapi_to_tflite_op_mapping_; + // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors + int target_sdk_version_ = 27; // kMinSdkVersionForNNAPI13 + void AddDequantizeOperatorsWhereNeeded( const TfLiteContext* context, int builtin_code, const TfLiteNode* node, int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno); diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h index fa7ff9dd1f1..5dbe4110131 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h @@ -71,6 +71,8 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler { ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>(); ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>(); EventWaitReturns<ANEURALNETWORKS_NO_ERROR>(); + SetPriorityReturns<ANEURALNETWORKS_NO_ERROR>(); + SetOperandSymmPerChannelQuantParamsReturns<ANEURALNETWORKS_NO_ERROR>(); SetNnapiSupportedDevice("test-device", android_sdk_version); } diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc new file mode 100644 index 00000000000..b9d702015c2 --- /dev/null +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc @@ -0,0 +1,920 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include <gtest/gtest.h> +#include "tensorflow/lite/builtin_ops.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h" +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/fully_connected.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/minimal_logging.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h" +#include "tensorflow/lite/nnapi/nnapi_implementation.h" + +namespace tflite { + +namespace ops { +namespace builtin { + +TfLiteRegistration* Register_CONVOLUTION_REF(); +TfLiteRegistration* Register_DEQUANTIZE(); + +} // namespace builtin +} // namespace ops + +namespace { + +class SingleOpModelWithNNAPI : public SingleOpModel { + public: + SingleOpModelWithNNAPI() = default; + void Init(const NnApi* nnapi) { + stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi)); + SetDelegate(stateful_delegate_.get()); + } + + StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); } + + void SetBufferHandle(int index, TfLiteBufferHandle handle) { + interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get()); + } + TfLiteStatus GetCompilationStatus() { return compilation_status_; } + + protected: + std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_; + TfLiteStatus compilation_status_; +}; + +class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI { + public: + HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches, + const TensorData& input, + const TensorData& weights, + const TensorData& output = {TensorType_FLOAT32}, + bool asymmetric_inputs = false) + : batches_(batches), units_(units) { + SingleOpModelWithNNAPI::Init(nnapi); + int total_input_size = 1; + for (size_t i = 0; i < input.shape.size(); ++i) { + total_input_size *= input.shape[i]; + } + input_size_ = total_input_size / batches_; + + input_ = AddInput(input); + weights_ = AddInput(weights); + + TensorData bias{TensorType_FLOAT32, {units_}}; + bias_ = AddInput(bias); + + output_ = AddOutput(output); + + auto options = CreateFullyConnectedOptions( + builder_, ActivationFunctionType_RELU, + tflite::FullyConnectedOptionsWeightsFormat_DEFAULT, + false, asymmetric_inputs) + .Union(); + SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED, + BuiltinOptions_FullyConnectedOptions, options); + resolver_ = absl::make_unique<SingleOpResolver>( + BuiltinOperator_FULLY_CONNECTED, + ops::builtin::Register_FULLY_CONNECTED_PIE()); + BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)}, + /*num_threads=*/-1, + /* allow_fp32_relax_to_fp16 */ false, + /*apply_delegate=*/false); + compilation_status_ = ApplyDelegate(); + } + void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); } + void SetWeights(const std::vector<float>& data) { + SymmetricQuantizeAndPopulate(weights_, data); + } + void SetSignedWeights(std::initializer_list<float> f) { + SignedSymmetricQuantizeAndPopulate(weights_, f); + } + + void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); } + std::vector<float> GetOutput() { return ExtractVector<float>(output_); } + std::vector<int> GetOutputShape() { return GetTensorShape(output_); } + + int input_size() { return input_size_; } + int num_units() { return units_; } + int num_batches() { return batches_; } + + protected: + int input_; + int weights_; + int bias_; + int output_; + + int batches_; + int units_; + int input_size_; +}; + +struct NnApiSignedQuantizationTest + : ::tflite::delegate::nnapi::NnApiDelegateMockTest { + static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); } + void SetUp() override { + ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp(); + nnapi_mock_->StubAddOperandWith( + [](ANeuralNetworksModel* model, + const ANeuralNetworksOperandType* type) -> int { + const auto nn_tensor_type = type->type; + if (tensors_count->find(nn_tensor_type) == tensors_count->end()) { + tensors_count->insert({nn_tensor_type, 0}); + } + tensors_count->at(nn_tensor_type)++; + return ANEURALNETWORKS_NO_ERROR; + }); + } + void TearDown() override { tensors_count->clear(); } + static void TearDownTestSuite() { + delete tensors_count; + tensors_count = nullptr; + } + static std::map<int, int>* tensors_count; +}; +std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr; + +TEST_F(NnApiSignedQuantizationTest, + HybridFullyConnectedMapsToSignedSymmOnSdk29) { + nnapi_mock_->SetAndroidSdkVersion(29); + + HybridFullyConnectedOpModel m( + nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2, + /*input=*/{TensorType_FLOAT32, {2, 10}}, + /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0}); + m.SetSignedWeights({ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }); + m.SetBias({1, 2, 3}); + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }); + + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 3); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 4); // fc_input, fc_weights, fc_bias, fc_output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM), + 1); // dequantize_weights_input +} + +TEST_F(NnApiSignedQuantizationTest, + HybridFullyConnectedMapsToSignedSymmOnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + + HybridFullyConnectedOpModel m( + nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2, + /*input=*/{TensorType_FLOAT32, {2, 10}}, + /*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0}); + m.SetSignedWeights({ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2 + }); + m.SetBias({1, 2, 3}); + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1 + }); + + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 3); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 4); // fc_input, fc_weights, fc_bias, fc_output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM), + 1); // dequantize_weights_input +} + +template <typename FilterType> +class BaseConvolutionOpModel : public SingleOpModelWithNNAPI { + public: + BaseConvolutionOpModel( + const NnApi* nnapi, TfLiteRegistration* registration, + const TensorData& input, const TensorData& filter, + const TensorData& output, int stride_width = 2, int stride_height = 2, + enum Padding padding = Padding_VALID, + enum ActivationFunctionType activation = ActivationFunctionType_NONE, + int dilation_width_factor = 1, int dilation_height_factor = 1, + std::initializer_list<FilterType> filter_data = {}) { + SingleOpModelWithNNAPI::Init(nnapi); + + input_ = AddInput(input); + + if (filter_data.size()) { + filter_ = AddConstInput(filter, filter_data); + } else { + filter_ = AddInput(filter); + } + + int bias_size = GetShape(filter_)[0]; + if (input.type == TensorType_FLOAT32) { + bias_ = AddInput({TensorType_FLOAT32, {bias_size}}); + } else { + // This is a quantized version. The scale of 'bias' depends on the scales + // of input and filter. Supposedly this is correctly set during quantized + // training. + if (filter.per_channel_quantization) { + // per channel quantization. + std::vector<float> bias_scale( + filter.per_channel_quantization_scales.size()); + std::vector<int64_t> bias_zero_points( + filter.per_channel_quantization_scales.size()); + for (size_t i = 0; i < filter.per_channel_quantization_scales.size(); + ++i) { + bias_scale[i] = + input.scale * filter.per_channel_quantization_scales[i]; + bias_zero_points[i] = 0; + } + tflite::TensorType bias_type = TensorType_INT32; + if (input.type == TensorType_INT16) { + // In case of 16-bit, the bias type is set to be int 64. + bias_type = TensorType_INT64; + } + TensorData bias{bias_type, + {bias_size}, + /*min=*/0, + /*max=*/0, + /*scale=*/0, + /*zero_point=*/0, + true, + /*per_channel_quantization_scales=*/bias_scale, + /*per_channel_quantization_offsets=*/bias_zero_points, + /*channel_index==*/0}; + bias_ = AddInput(bias); + } else { + // per tensor quantization. + auto bias_scale = GetScale(input_) * GetScale(filter_); + TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale}; + bias_ = AddInput(bias); + } + } + + output_ = AddOutput(output); + + SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions, + CreateConv2DOptions( + builder_, padding, stride_width, stride_height, activation, + dilation_width_factor, dilation_height_factor) + .Union()); + + resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D, + registration); + BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)}, + /*num_threads=*/-1, + /* allow_fp32_relax_to_fp16 */ false, + /*apply_delegate=*/false); + compilation_status_ = ApplyDelegate(); + } + + protected: + int input_; + int filter_; + int bias_; + int output_; +}; + +class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> { + public: + using BaseConvolutionOpModel::BaseConvolutionOpModel; + + void SetInput(std::initializer_list<float> data) { + QuantizeAndPopulate<uint8_t>(input_, data); + } + + void SetFilter(std::initializer_list<float> data) { + QuantizeAndPopulate<uint8_t>(filter_, data); + } + + void SetBias(std::initializer_list<float> data) { + QuantizeAndPopulate<int32_t>(bias_, data); + } + + std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); } + std::vector<float> GetDequantizedOutput() { + return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST_F(NnApiSignedQuantizationTest, + Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) { + QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(), + ops::builtin::Register_CONVOLUTION_REF(), + {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64}, + {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, + {TensorType_UINT8, {}, -127, 128}); + m.SetInput({ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }); + m.SetFilter({ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }); + m.SetBias({1, 2, 3}); + + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 3); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 3); // input, filter, output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), + 4); // padding, stride_width, stride_height, activation +} + +TEST_F(NnApiSignedQuantizationTest, + Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(), + ops::builtin::Register_CONVOLUTION_REF(), + {TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64}, + {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64}, + {TensorType_UINT8, {}, -127, 128}); + m.SetInput({ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }); + m.SetFilter({ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + }); + m.SetBias({1, 2, 3}); + + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 3); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 3); // input, filter, output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), + 4); // padding, stride_width, stride_height, activation +} + +class PerChannelQuantizedConvolutionOpModel + : public BaseConvolutionOpModel<int8_t> { + public: + using BaseConvolutionOpModel::BaseConvolutionOpModel; + + void SetInput(std::initializer_list<float> data) { + QuantizeAndPopulate<int8_t>(input_, data); + } + + void SetFilter(std::initializer_list<float> data) { + PerChannelSymmetricQuantizeAndPopulate(filter_, data); + } + + void SetBias(std::initializer_list<float> data) { + PerChannelQuantizeBias(bias_, data); + } + + std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); } + std::vector<float> GetDequantizedOutput() { + return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_), + GetZeroPoint(output_)); + } +}; + +TEST_F(NnApiSignedQuantizationTest, + Conv2dSignedPerTensorMapsToUnsignedOnSdk29) { + nnapi_mock_->SetAndroidSdkVersion(29); + PerChannelQuantizedConvolutionOpModel m( + nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(), + {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1}, + {TensorType_INT8, + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + {2, 2, 2, 2}, + 0, + 0, + 0, + 0, + /*per_channel_quantization=*/true, + /*per_channel_quantization_scales=*/{1}, + /*per_channel_quantization_offsets=*/{0}, + /*channel_index=*/0}, + {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, + /*stride_width=*/1, /*stride_height=*/1); + m.SetInput({ + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 + }); + m.SetFilter( + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + { + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 7, 8, // out channel = 1, y = 0, x = 0 + 5, 6, // out channel = 1, y = 0, x = 1 + 3, 4, // out channel = 1, y = 1, x = 0 + 1, 2, // out channel = 1, y = 1, x = 1 + }); + m.SetBias({3, -2}); + + // Invoke and verify output. + // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 3); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 3); // input, filter, output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), + 4); // padding, stride_width, stride_height, activation +} + +TEST_F(NnApiSignedQuantizationTest, + Conv2dSignedPerTensorMapsToUnsignedOnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + PerChannelQuantizedConvolutionOpModel m( + nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(), + {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1}, + {TensorType_INT8, + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + {2, 2, 2, 2}, + 0, + 0, + 0, + 0, + /*per_channel_quantization=*/true, + /*per_channel_quantization_scales=*/{1}, + /*per_channel_quantization_offsets=*/{0}, + /*channel_index=*/0}, + {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, + /*stride_width=*/1, /*stride_height=*/1); + m.SetInput({ + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 + }); + m.SetFilter( + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + { + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 7, 8, // out channel = 1, y = 0, x = 0 + 5, 6, // out channel = 1, y = 0, x = 1 + 3, 4, // out channel = 1, y = 1, x = 0 + 1, 2, // out channel = 1, y = 1, x = 1 + }); + m.SetBias({3, -2}); + + // Invoke and verify output. + // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 3); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + 3); // input, filter, output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), + 4); // padding, stride_width, stride_height, activation +} + +TEST_F(NnApiSignedQuantizationTest, + Conv2dSignedPerChannelMapsToUnsignedOnSdk29) { + PerChannelQuantizedConvolutionOpModel m( + nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(), + {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1}, + {TensorType_INT8, + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + {2, 2, 2, 2}, + 0, + 0, + 0, + 0, + /*per_channel_quantization=*/true, + /*per_channel_quantization_scales=*/{1, 2}, + /*per_channel_quantization_offsets=*/{0, 0}, + /*channel_index=*/0}, + {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, + /*stride_width=*/1, /*stride_height=*/1); + m.SetInput({ + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 + }); + m.SetFilter( + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + { + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 7, 8, // out channel = 1, y = 0, x = 0 + 5, 6, // out channel = 1, y = 0, x = 1 + 3, 4, // out channel = 1, y = 1, x = 0 + 1, 2, // out channel = 1, y = 1, x = 1 + }); + m.SetBias({3, -2}); + + // Invoke and verify output. + // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 4); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 2); // input, output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL), + 1); // filter + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), + 4); // padding, stride_width, stride_height, activation +} + +TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + PerChannelQuantizedConvolutionOpModel m( + nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(), + {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1}, + {TensorType_INT8, + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + {2, 2, 2, 2}, + 0, + 0, + 0, + 0, + /*per_channel_quantization=*/true, + /*per_channel_quantization_scales=*/{1, 2}, + /*per_channel_quantization_offsets=*/{0, 0}, + /*channel_index=*/0}, + {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, + /*stride_width=*/1, /*stride_height=*/1); + m.SetInput({ + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 + }); + m.SetFilter( + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + { + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 7, 8, // out channel = 1, y = 0, x = 0 + 5, 6, // out channel = 1, y = 0, x = 1 + 3, 4, // out channel = 1, y = 1, x = 0 + 1, 2, // out channel = 1, y = 1, x = 1 + }); + m.SetBias({3, -2}); + + // Invoke and verify output. + // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 4); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + 2); // input, output + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL), + 1); // filter + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), + 4); // padding, stride_width, stride_height, activation +} + +class QuantizeOpModel : public SingleOpModelWithNNAPI { + public: + QuantizeOpModel(const NnApi* nnapi, const TensorData& input, + const TensorData& output) { + SingleOpModelWithNNAPI::Init(nnapi); + input_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions, + CreateQuantizeOptions(builder_).Union()); + + BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1, + /* allow_fp32_relax_to_fp16 */ false, + /*apply_delegate=*/false); + compilation_status_ = ApplyDelegate(); + } + + void SetInput(std::initializer_list<float> data) { + PopulateTensor(input_, data); + } + + template <typename T> + void SetInputAndQuantize(std::initializer_list<float> data) { + QuantizeAndPopulate<T>(input_, data); + } + + template <typename T> + std::vector<T> GetOutput() { + return ExtractVector<T>(output_); + } + + private: + int input_; + int output_; +}; + +TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) { + // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8 + QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}}, + {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127}); + + m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 1); // output +} + +TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8 + QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}}, + {TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127}); + + m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 1); // output +} + +// Quantize with Int8 output is only supported since SDK level 30. +TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + // [-63.5, 64] -> scale=0.5 zero_point=1 for INT8 + QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}}, + {TensorType_INT8, {2, 5}, 0, 0, 0.5, -1}); + + m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + 1); // output +} + +class DequantizeOpModel : public SingleOpModelWithNNAPI { + public: + DequantizeOpModel(const NnApi* nnapi, TensorType type, + std::initializer_list<int> shape, float scale, + int32_t zero_point, int version) { + SingleOpModelWithNNAPI::Init(nnapi); + const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point}; + input_ = AddInput(input_tensor_data); + output_ = AddOutput({TensorType_FLOAT32, shape}); + SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions, + CreateDequantizeOptions(builder_).Union()); + + resolver_ = absl::make_unique<SingleOpResolver>( + BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(), + version); + + BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1, + /* allow_fp32_relax_to_fp16 */ false, + /*apply_delegate=*/false); + compilation_status_ = ApplyDelegate(); + } + + template <typename T> + void SetInput(std::initializer_list<T> data) { + PopulateTensor(input_, data); + } + + std::vector<float> GetOutput() { return ExtractVector<float>(output_); } + + private: + int input_; + int output_; +}; + +TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) { + // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8 + DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5, + 127, 1); + + m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // output +} + +TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8 + DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5, + 127, 1); + + m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // output +} + +// Dequantize with Int8 input is only supported for symmetric quantization on +// SDK level 29 +TEST_F(NnApiSignedQuantizationTest, + DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) { + // [-63.5, 64] -> scale=0.5, zero_point=0 for INT8 + DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0, + 2); + + m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // output +} + +// Dequantize with Int8 input is only supported since SDK level 30. +TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) { + nnapi_mock_->SetAndroidSdkVersion(30); + // [-63.5, 64] -> scale=0.5, zero_point=1 for INT8 + DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1, + 2); + + m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127}); + m.Invoke(); + EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk); + + ASSERT_EQ(tensors_count->size(), 2); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + tensors_count->end()); + ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32), + tensors_count->end()); + + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED), + 1); // input + EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32), + 1); // output +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h index 3c30a0479fa..8415df58b8b 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h +++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h @@ -46,6 +46,7 @@ enum { ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7, ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11, ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13, + ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14, }; /** diff --git a/tensorflow/lite/nnapi/nnapi_handler.h b/tensorflow/lite/nnapi/nnapi_handler.h index 00c0b23e3cf..16e1e9fea10 100644 --- a/tensorflow/lite/nnapi/nnapi_handler.h +++ b/tensorflow/lite/nnapi/nnapi_handler.h @@ -118,6 +118,11 @@ class NnApiHandler { const ANeuralNetworksOperandType* type) { return Value; }; } + void StubAddOperandWith(int(stub)(ANeuralNetworksModel* model, + const ANeuralNetworksOperandType* type)) { + nnapi_->ANeuralNetworksModel_addOperand = stub; + } + template <int Value> void SetOperandValueReturns() { nnapi_->ANeuralNetworksModel_setOperandValue = @@ -268,6 +273,23 @@ class NnApiHandler { }; } + template <int Value> + void SetPriorityReturns() { + nnapi_->ANeuralNetworksCompilation_setPriority = + [](ANeuralNetworksCompilation* compilation, int priority) -> int { + return Value; + }; + } + + template <int Value> + void SetOperandSymmPerChannelQuantParamsReturns() { + nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams = + [](ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) { + return Value; + }; + } + /* * Sets the SDK Version in the nnapi structure. * If set_unsupported_ops_to_null is set to true, all the functions not