Add NNAPI Delegation support for requantization use cases (transforming a quantized tensor into another quantized one with different quantization parameters) by converting the operation into a dequantize-quantize pair.
PiperOrigin-RevId: 325816400 Change-Id: I55f8726f0478e9795c667a9cf4eddda084ed95a7
This commit is contained in:
parent
2c6f7e24dd
commit
4ed4c14e4c
@ -128,6 +128,7 @@
|
|||||||
* Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
|
* Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
|
||||||
* Deprecate `Interpreter::UseNNAPI(bool)` C++ API
|
* Deprecate `Interpreter::UseNNAPI(bool)` C++ API
|
||||||
* Prefer using `NnApiDelegate()` and related delegate configuration methods directly.
|
* Prefer using `NnApiDelegate()` and related delegate configuration methods directly.
|
||||||
|
* Add NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair.
|
||||||
* <ADD RELEASE NOTES HERE>
|
* <ADD RELEASE NOTES HERE>
|
||||||
* `tf.random`:
|
* `tf.random`:
|
||||||
* <ADD RELEASE NOTES HERE>
|
* <ADD RELEASE NOTES HERE>
|
||||||
|
@ -309,6 +309,8 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
|
|||||||
|
|
||||||
# quantize_test
|
# quantize_test
|
||||||
QuantizeOpTest/UINT8,29
|
QuantizeOpTest/UINT8,29
|
||||||
|
QuantizeOpTest/UInt8UInt8.+,29
|
||||||
|
QuantizeOpTest/Int8Int8.+,30
|
||||||
QuantizeOpTest/INT8,30
|
QuantizeOpTest/INT8,30
|
||||||
|
|
||||||
# rank
|
# rank
|
||||||
|
@ -2436,13 +2436,20 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
"Input should be Float32.", &val_ctx);
|
"Input should be Float32.", &val_ctx);
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinQuantize: {
|
case kTfLiteBuiltinQuantize: {
|
||||||
ExpectOpVersion(version, 1, &val_ctx);
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
||||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||||
&val_ctx);
|
&val_ctx);
|
||||||
const auto value_type = context->tensors[node->inputs->data[0]].type;
|
const auto value_type = context->tensors[node->inputs->data[0]].type;
|
||||||
Expect(value_type == kTfLiteFloat32,
|
Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
|
||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||||
"Value should be Float32.", &val_ctx);
|
"Value should be quantized or Float32.", &val_ctx);
|
||||||
|
if (IsQuantized(value_type)) {
|
||||||
|
const auto quantization_params =
|
||||||
|
context->tensors[node->inputs->data[0]].params;
|
||||||
|
Expect(quantization_params.scale > 0.f,
|
||||||
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
||||||
|
"Quantization scale should be > 0.", &val_ctx);
|
||||||
|
}
|
||||||
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
||||||
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
|
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
|
||||||
Expect(output_type == kTfLiteUInt8,
|
Expect(output_type == kTfLiteUInt8,
|
||||||
@ -3284,6 +3291,15 @@ TfLiteStatus NNAPIDelegateKernel::Map(
|
|||||||
*nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
|
*nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinQuantize: {
|
case kTfLiteBuiltinQuantize: {
|
||||||
|
auto input_index = mapping_args.node->inputs->data[0];
|
||||||
|
// NNAPI doesn't support requantization cases but only quantizations
|
||||||
|
// from float. Dequantizing our input adding a Dequantize node before
|
||||||
|
// this one.
|
||||||
|
if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
|
||||||
|
mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
|
||||||
|
mapping_args.node_index);
|
||||||
|
}
|
||||||
|
|
||||||
*nn_op_type = ANEURALNETWORKS_QUANTIZE;
|
*nn_op_type = ANEURALNETWORKS_QUANTIZE;
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinReduceAny: {
|
case kTfLiteBuiltinReduceAny: {
|
||||||
@ -4254,7 +4270,7 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
|
|||||||
int nn_op_type;
|
int nn_op_type;
|
||||||
TF_LITE_ENSURE_STATUS(
|
TF_LITE_ENSURE_STATUS(
|
||||||
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
|
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
|
||||||
{context, &builder, node, &model_state_outputs_,
|
{context, &builder, node, node_index, &model_state_outputs_,
|
||||||
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
||||||
&nn_op_type));
|
&nn_op_type));
|
||||||
|
|
||||||
|
@ -111,6 +111,7 @@ struct NNAPIOpMappingArgs {
|
|||||||
TfLiteContext* context;
|
TfLiteContext* context;
|
||||||
NNAPIOpBuilder* builder;
|
NNAPIOpBuilder* builder;
|
||||||
TfLiteNode* node;
|
TfLiteNode* node;
|
||||||
|
int node_index;
|
||||||
std::vector<int>* model_state_outputs;
|
std::vector<int>* model_state_outputs;
|
||||||
std::vector<int>* model_state_tfl_inputs;
|
std::vector<int>* model_state_tfl_inputs;
|
||||||
std::vector<std::tuple<int, int>>* feedback_loops;
|
std::vector<std::tuple<int, int>>* feedback_loops;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user