Add NNAPI Delegation support for requantization use cases (transforming a quantized tensor into another quantized one with different quantization parameters) by converting the operation into a dequantize-quantize pair.
PiperOrigin-RevId: 325816400 Change-Id: I55f8726f0478e9795c667a9cf4eddda084ed95a7
This commit is contained in:
parent
2c6f7e24dd
commit
4ed4c14e4c
@ -128,6 +128,7 @@
|
||||
* Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
|
||||
* Deprecate `Interpreter::UseNNAPI(bool)` C++ API
|
||||
* Prefer using `NnApiDelegate()` and related delegate configuration methods directly.
|
||||
* Add NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair.
|
||||
* <ADD RELEASE NOTES HERE>
|
||||
* `tf.random`:
|
||||
* <ADD RELEASE NOTES HERE>
|
||||
|
@ -309,6 +309,8 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
|
||||
|
||||
# quantize_test
|
||||
QuantizeOpTest/UINT8,29
|
||||
QuantizeOpTest/UInt8UInt8.+,29
|
||||
QuantizeOpTest/Int8Int8.+,30
|
||||
QuantizeOpTest/INT8,30
|
||||
|
||||
# rank
|
||||
|
@ -2436,13 +2436,20 @@ bool NNAPIDelegateKernel::Validate(
|
||||
"Input should be Float32.", &val_ctx);
|
||||
} break;
|
||||
case kTfLiteBuiltinQuantize: {
|
||||
ExpectOpVersion(version, 1, &val_ctx);
|
||||
ExpectMaxOpVersion(version, 2, &val_ctx);
|
||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||
&val_ctx);
|
||||
const auto value_type = context->tensors[node->inputs->data[0]].type;
|
||||
Expect(value_type == kTfLiteFloat32,
|
||||
Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"Value should be Float32.", &val_ctx);
|
||||
"Value should be quantized or Float32.", &val_ctx);
|
||||
if (IsQuantized(value_type)) {
|
||||
const auto quantization_params =
|
||||
context->tensors[node->inputs->data[0]].params;
|
||||
Expect(quantization_params.scale > 0.f,
|
||||
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
||||
"Quantization scale should be > 0.", &val_ctx);
|
||||
}
|
||||
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
||||
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
|
||||
Expect(output_type == kTfLiteUInt8,
|
||||
@ -3284,6 +3291,15 @@ TfLiteStatus NNAPIDelegateKernel::Map(
|
||||
*nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
|
||||
} break;
|
||||
case kTfLiteBuiltinQuantize: {
|
||||
auto input_index = mapping_args.node->inputs->data[0];
|
||||
// NNAPI doesn't support requantization cases but only quantizations
|
||||
// from float. Dequantizing our input adding a Dequantize node before
|
||||
// this one.
|
||||
if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
|
||||
mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
|
||||
mapping_args.node_index);
|
||||
}
|
||||
|
||||
*nn_op_type = ANEURALNETWORKS_QUANTIZE;
|
||||
} break;
|
||||
case kTfLiteBuiltinReduceAny: {
|
||||
@ -4254,7 +4270,7 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
|
||||
int nn_op_type;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
|
||||
{context, &builder, node, &model_state_outputs_,
|
||||
{context, &builder, node, node_index, &model_state_outputs_,
|
||||
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
||||
&nn_op_type));
|
||||
|
||||
|
@ -111,6 +111,7 @@ struct NNAPIOpMappingArgs {
|
||||
TfLiteContext* context;
|
||||
NNAPIOpBuilder* builder;
|
||||
TfLiteNode* node;
|
||||
int node_index;
|
||||
std::vector<int>* model_state_outputs;
|
||||
std::vector<int>* model_state_tfl_inputs;
|
||||
std::vector<std::tuple<int, int>>* feedback_loops;
|
||||
|
Loading…
Reference in New Issue
Block a user