Add NNAPI Delegation support for requantization use cases (transforming a quantized tensor into another quantized one with different quantization parameters) by converting the operation into a dequantize-quantize pair.

PiperOrigin-RevId: 325816400
Change-Id: I55f8726f0478e9795c667a9cf4eddda084ed95a7
This commit is contained in:
Stefano Galarraga 2020-08-10 09:09:59 -07:00 committed by TensorFlower Gardener
parent 2c6f7e24dd
commit 4ed4c14e4c
4 changed files with 24 additions and 4 deletions

View File

@ -128,6 +128,7 @@
* Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
* Deprecate `Interpreter::UseNNAPI(bool)` C++ API
* Prefer using `NnApiDelegate()` and related delegate configuration methods directly.
* Add NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair.
* <ADD RELEASE NOTES HERE>
* `tf.random`:
* <ADD RELEASE NOTES HERE>

View File

@ -309,6 +309,8 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
# quantize_test
QuantizeOpTest/UINT8,29
QuantizeOpTest/UInt8UInt8.+,29
QuantizeOpTest/Int8Int8.+,30
QuantizeOpTest/INT8,30
# rank

View File

@ -2436,13 +2436,20 @@ bool NNAPIDelegateKernel::Validate(
"Input should be Float32.", &val_ctx);
} break;
case kTfLiteBuiltinQuantize: {
ExpectOpVersion(version, 1, &val_ctx);
ExpectMaxOpVersion(version, 2, &val_ctx);
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
&val_ctx);
const auto value_type = context->tensors[node->inputs->data[0]].type;
Expect(value_type == kTfLiteFloat32,
Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
NNAPIValidationFailureType::kUnsupportedInputType,
"Value should be Float32.", &val_ctx);
"Value should be quantized or Float32.", &val_ctx);
if (IsQuantized(value_type)) {
const auto quantization_params =
context->tensors[node->inputs->data[0]].params;
Expect(quantization_params.scale > 0.f,
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
"Quantization scale should be > 0.", &val_ctx);
}
const auto output_type = context->tensors[node->outputs->data[0]].type;
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
Expect(output_type == kTfLiteUInt8,
@ -3284,6 +3291,15 @@ TfLiteStatus NNAPIDelegateKernel::Map(
*nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
} break;
case kTfLiteBuiltinQuantize: {
auto input_index = mapping_args.node->inputs->data[0];
// NNAPI doesn't support requantization cases but only quantizations
// from float. Dequantizing our input adding a Dequantize node before
// this one.
if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
mapping_args.node_index);
}
*nn_op_type = ANEURALNETWORKS_QUANTIZE;
} break;
case kTfLiteBuiltinReduceAny: {
@ -4254,7 +4270,7 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
int nn_op_type;
TF_LITE_ENSURE_STATUS(
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
{context, &builder, node, &model_state_outputs_,
{context, &builder, node, node_index, &model_state_outputs_,
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
&nn_op_type));

View File

@ -111,6 +111,7 @@ struct NNAPIOpMappingArgs {
TfLiteContext* context;
NNAPIOpBuilder* builder;
TfLiteNode* node;
int node_index;
std::vector<int>* model_state_outputs;
std::vector<int>* model_state_tfl_inputs;
std::vector<std::tuple<int, int>>* feedback_loops;