Add support for TENSOR_QUANT8_ASYMM_SIGNED in NNAPI delegate
PiperOrigin-RevId: 317846923 Change-Id: I1c61f53e89228cd2482435e9255e390864bd83e3
This commit is contained in:
parent
7198070f4d
commit
e071e66f03
@ -190,6 +190,32 @@ cc_test(
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "nnapi_delegate_signed_quantization_test",
|
||||
size = "small",
|
||||
srcs = [
|
||||
"nnapi_delegate_signed_quantization_test.cc",
|
||||
],
|
||||
tags = [
|
||||
"no_mac",
|
||||
"no_windows",
|
||||
"tflite_not_portable_ios",
|
||||
],
|
||||
deps = [
|
||||
":nnapi_delegate",
|
||||
":nnapi_delegate_mock_test",
|
||||
"//tensorflow/lite:framework",
|
||||
"//tensorflow/lite:kernel_api",
|
||||
"//tensorflow/lite:minimal_logging",
|
||||
"//tensorflow/lite/c:common",
|
||||
"//tensorflow/lite/kernels:builtin_ops",
|
||||
"//tensorflow/lite/kernels:test_util",
|
||||
"//tensorflow/lite/nnapi:nnapi_implementation",
|
||||
"//tensorflow/lite/nnapi:nnapi_lib",
|
||||
"@com_google_googletest//:gtest",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "quant_lstm_sup_test",
|
||||
size = "small",
|
||||
|
@ -60,6 +60,10 @@ FloatActivationsOpTest/Elu,30
|
||||
FloatActivationsOpTest/HardSwish
|
||||
QuantizedActivationsOpTest/HardSwish
|
||||
QuantizedActivationsOpTest/HardSwishBias
|
||||
QuantizedActivationsOpTest/Relu*
|
||||
QuantizedActivationsOpTest/PRelu,29
|
||||
QuantizedActivationsOpTest/PReluSameShapes,29
|
||||
QuantizedActivationsOpTest/PReluInt8.+,30
|
||||
|
||||
# add_test
|
||||
FloatAddOpModel/.+
|
||||
@ -145,6 +149,7 @@ ConvolutionOpTest/ConvolutionOpTest/.+/\d+
|
||||
|
||||
# dequantize_test
|
||||
DequantizeOpTest/Uint8
|
||||
DequantizeOpTest/Int8,30
|
||||
|
||||
# depth_to_space_test
|
||||
DepthToSpaceOpModel/Float32
|
||||
@ -190,6 +195,7 @@ QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Uin
|
||||
QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29
|
||||
HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29
|
||||
HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29
|
||||
HybridAsymmetricInputFullyConnectedOpTest.SimpleTestQuantizedUint8,29
|
||||
FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+
|
||||
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+
|
||||
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29
|
||||
@ -207,6 +213,7 @@ FloatGatherOpTest/LastAxis,29
|
||||
TypesGatherOpTest/Float32Int32,29
|
||||
TypesGatherOpTest/Int32Int32,29
|
||||
TypesGatherOpTest/Uint8Int32,29
|
||||
TypesGatherOpTest/Int8Int32,29
|
||||
|
||||
# hashtable_lookup_test
|
||||
# All test excepted the string one should be accelerated
|
||||
@ -286,13 +293,18 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
|
||||
|
||||
# quantize_test
|
||||
QuantizeOpTest/UINT8,29
|
||||
QuantizeOpTest/INT8,30
|
||||
|
||||
# rank
|
||||
|
||||
# reduce_test
|
||||
-Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+
|
||||
-ConstUint8(Mean|Sum)OpTest/.+
|
||||
-ConstInt8MeanOpTest.NonSpecialAxisNonSameScale
|
||||
-ConstInt8MeanOpTest.QuantizedDifferentScale
|
||||
ConstUint8(Max|Min)OpTest/.+,29
|
||||
ConstUint8(Mean)OpTest/.+
|
||||
Constint8(Mean|Max|Min)OpTest/.+
|
||||
ConstInt8(Mean|Max|Min)OpTest/.+,29
|
||||
ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29
|
||||
ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29
|
||||
ConstFloat(Mean|Any)OpTest/NotKeepDims
|
||||
|
@ -201,6 +201,7 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
|
||||
case kTfLiteBuiltinConcatenation:
|
||||
case kTfLiteBuiltinEqual:
|
||||
case kTfLiteBuiltinExpandDims:
|
||||
case kTfLiteBuiltinGather:
|
||||
case kTfLiteBuiltinGreater:
|
||||
case kTfLiteBuiltinGreaterEqual:
|
||||
case kTfLiteBuiltinHardSwish:
|
||||
@ -377,6 +378,7 @@ bool HasZeroes(TfLiteIntArrayView array) {
|
||||
enum {
|
||||
NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
|
||||
NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
|
||||
NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
|
||||
};
|
||||
|
||||
// Returns the SDK level to target when delegating to the given devices.
|
||||
@ -1065,6 +1067,8 @@ class NNAPIOpBuilder {
|
||||
tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
||||
const bool need_int8_conversion =
|
||||
tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
|
||||
const bool use_int8_asymm_signed =
|
||||
tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
||||
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
|
||||
if (ann_tensor_index != -1) {
|
||||
indices->push_back(ann_tensor_index);
|
||||
@ -1095,12 +1099,25 @@ class NNAPIOpBuilder {
|
||||
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
|
||||
break;
|
||||
case kTfLiteUInt8:
|
||||
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
||||
scale = tensor->params.scale;
|
||||
zeroPoint = tensor->params.zero_point;
|
||||
if (scale == 0) {
|
||||
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
|
||||
// NNAPI.
|
||||
scale = 1;
|
||||
}
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
// If explicit int8 conversion is needed, we still need
|
||||
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
|
||||
nn_type = (tensor_type == kTfLiteUInt8 || need_int8_conversion)
|
||||
? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
|
||||
: ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
|
||||
if (use_int8_asymm_signed) {
|
||||
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
|
||||
} else if (need_int8_conversion) {
|
||||
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
||||
} else {
|
||||
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
|
||||
}
|
||||
scale = tensor->params.scale;
|
||||
zeroPoint = tensor->params.zero_point;
|
||||
if (tensor->quantization.type == kTfLiteAffineQuantization) {
|
||||
@ -1130,8 +1147,7 @@ class NNAPIOpBuilder {
|
||||
operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
|
||||
}
|
||||
if (scale == 0) {
|
||||
// TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
|
||||
// with zero scale are not valid in NNAPI.
|
||||
// QUANT8 tensors with zero scale are not valid in NNAPI.
|
||||
scale = 1;
|
||||
}
|
||||
}
|
||||
@ -1248,7 +1264,6 @@ class NNAPIOpBuilder {
|
||||
"setting new operand value", nnapi_errno_);
|
||||
}
|
||||
}
|
||||
|
||||
indices->push_back(ann_tensor_index);
|
||||
return kTfLiteOk;
|
||||
}
|
||||
@ -1437,7 +1452,6 @@ bool NNAPIDelegateKernel::Validate(
|
||||
bool is_accelerator_specified,
|
||||
std::vector<NNAPIValidationFailure>* map_failures) {
|
||||
OpValidationContext val_ctx{true, map_failures};
|
||||
|
||||
switch (builtin_code) {
|
||||
case kTfLiteBuiltinAdd: {
|
||||
ExpectMaxOpVersion(version, 2, &val_ctx);
|
||||
@ -1789,18 +1803,21 @@ bool NNAPIDelegateKernel::Validate(
|
||||
"Supported op versions are 1 and 2 only", &val_ctx);
|
||||
|
||||
const auto& input = context->tensors[node->inputs->data[0]];
|
||||
Expect(input.type != kTfLiteFloat16,
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"kTfLiteFloat16 not supported as input", &val_ctx);
|
||||
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
||||
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
|
||||
} else {
|
||||
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
|
||||
|
||||
const auto zero_point = input.params.zero_point;
|
||||
Expect(input.type != kTfLiteInt8 ||
|
||||
(zero_point == 0 &&
|
||||
android_sdk_version >= kMinSdkVersionForNNAPI12),
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"NN API supports int8 type since version 1.2 but only for "
|
||||
"symmetric quantization.",
|
||||
&val_ctx);
|
||||
if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
|
||||
input.type == kTfLiteInt8) {
|
||||
const auto zero_point = input.params.zero_point;
|
||||
Expect(zero_point == 0,
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"NN API supports int8 type since version 1.2 but only for "
|
||||
"symmetric quantization.",
|
||||
&val_ctx);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case kTfLiteBuiltinFloor: {
|
||||
ExpectOpVersion(version, 1, &val_ctx);
|
||||
@ -2150,21 +2167,38 @@ bool NNAPIDelegateKernel::Validate(
|
||||
&val_ctx);
|
||||
const TfLiteType input_type =
|
||||
context->tensors[node->inputs->data[0]].type;
|
||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
||||
kTfLiteUInt8);
|
||||
const TfLiteType output_type =
|
||||
context->tensors[node->outputs->data[0]].type;
|
||||
ExpectTypeIn(output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
||||
"kTfLiteUInt8.",
|
||||
&val_ctx);
|
||||
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
||||
kTfLiteUInt8, kTfLiteInt8);
|
||||
|
||||
ExpectTypeIn(
|
||||
output_type,
|
||||
{kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
||||
"kTfLiteUInt8, kTfLiteInt8.",
|
||||
&val_ctx);
|
||||
} else {
|
||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
||||
kTfLiteUInt8);
|
||||
|
||||
ExpectTypeIn(
|
||||
output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
||||
"kTfLiteUInt8.",
|
||||
&val_ctx);
|
||||
}
|
||||
} break;
|
||||
case kTfLiteBuiltinPrelu: {
|
||||
ExpectOpVersion(version, 1, &val_ctx);
|
||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||
&val_ctx);
|
||||
ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
|
||||
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
||||
kTfLiteInt8);
|
||||
} break;
|
||||
case kTfLiteBuiltinTile: {
|
||||
ExpectOpVersion(version, 1, &val_ctx);
|
||||
@ -2240,19 +2274,18 @@ bool NNAPIDelegateKernel::Validate(
|
||||
&val_ctx);
|
||||
} break;
|
||||
case kTfLiteBuiltinGather: {
|
||||
ExpectOpVersion(version, 1, &val_ctx);
|
||||
ExpectOpVersion(version, 2, &val_ctx);
|
||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||
&val_ctx);
|
||||
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
||||
const auto& positions = context->tensors[node->inputs->data[1]];
|
||||
|
||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
|
||||
kTfLiteInt32, kTfLiteUInt8);
|
||||
ExpectTypeIn(positions.type,
|
||||
{kTfLiteFloat32, kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8},
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"Positions type should be one of kTfLiteFloat32, "
|
||||
"kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8",
|
||||
&val_ctx);
|
||||
kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
|
||||
|
||||
Expect(positions.type == kTfLiteInt32,
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"Positions type should be one of kTfLiteInt32", &val_ctx);
|
||||
Expect(positions.dims->size != 0,
|
||||
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
||||
"0-dimension args are not supported by NNAPI.", &val_ctx);
|
||||
@ -2283,8 +2316,13 @@ bool NNAPIDelegateKernel::Validate(
|
||||
&val_ctx);
|
||||
// Tensor indices: split_dim: 0, value: 1
|
||||
const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
|
||||
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
||||
kTfLiteInt32);
|
||||
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
||||
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
||||
kTfLiteInt8, kTfLiteInt32);
|
||||
} else {
|
||||
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
||||
kTfLiteInt32);
|
||||
}
|
||||
const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
|
||||
Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
@ -2308,30 +2346,41 @@ bool NNAPIDelegateKernel::Validate(
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"Value should be Float32.", &val_ctx);
|
||||
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
||||
Expect(output_type == kTfLiteUInt8,
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"Output should be kTfLiteUInt8.", &val_ctx);
|
||||
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
|
||||
Expect(output_type == kTfLiteUInt8,
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"Output should be kTfLiteUInt8.", &val_ctx);
|
||||
} else {
|
||||
ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"Output should be kTfLiteUInt8.", &val_ctx);
|
||||
}
|
||||
const auto quantization_params =
|
||||
context->tensors[node->outputs->data[0]].params;
|
||||
Expect(quantization_params.scale > 0.f,
|
||||
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
||||
"Quantization scale should be > 0.", &val_ctx);
|
||||
} break;
|
||||
case kTfLiteBuiltinReduceAny:
|
||||
case kTfLiteBuiltinReduceMin:
|
||||
case kTfLiteBuiltinReduceMax: {
|
||||
ExpectOpVersion(version, 1, &val_ctx);
|
||||
case kTfLiteBuiltinReduceAny: {
|
||||
ExpectOpVersion(version, 2, &val_ctx);
|
||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||
&val_ctx);
|
||||
Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
||||
if (builtin_code == kTfLiteBuiltinReduceProd) {
|
||||
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
||||
Expect(input_type == kTfLiteFloat32,
|
||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||
"NNAPI only supports floating point REDUCE_PROD.", &val_ctx);
|
||||
}
|
||||
} break;
|
||||
case kTfLiteBuiltinReduceMin:
|
||||
case kTfLiteBuiltinReduceMax: {
|
||||
ExpectMaxOpVersion(version, 2, &val_ctx);
|
||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||
&val_ctx);
|
||||
const auto input_tensor = context->tensors[node->inputs->data[0]];
|
||||
const auto input_type = input_tensor.type;
|
||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
||||
kTfLiteInt8);
|
||||
Expect(input_tensor.dims->size != 0,
|
||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
||||
} break;
|
||||
case kTfLiteBuiltinDepthToSpace: {
|
||||
const TfLiteType input_type =
|
||||
@ -3093,16 +3142,10 @@ TfLiteStatus NNAPIDelegateKernel::Map(
|
||||
case kTfLiteBuiltinGather: {
|
||||
auto builtin = reinterpret_cast<TfLiteGatherParams*>(
|
||||
mapping_args.node->builtin_data);
|
||||
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[0],
|
||||
/* hybrid_op */ false,
|
||||
/* scalar_as_tensor */ false);
|
||||
|
||||
mapping_args.builder->AddScalarInt32Operand(builtin->axis);
|
||||
|
||||
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
|
||||
/* hybrid_op */ false,
|
||||
/* scalar_as_tensor */ false);
|
||||
|
||||
/* tensor_flags */ 0);
|
||||
*nn_op_type = ANEURALNETWORKS_GATHER;
|
||||
} break;
|
||||
case kTfLiteBuiltinBidirectionalSequenceLstm: {
|
||||
@ -3430,6 +3473,9 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
|
||||
// absolute indices but NN api indices inputs by relative indices.
|
||||
int relative_input_index = 0;
|
||||
|
||||
const bool use_int8_asymm_signed =
|
||||
target_sdk_version_ >= kMinSdkVersionForNNAPI13;
|
||||
|
||||
size_t input_offset = 0;
|
||||
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
|
||||
if (absolute_input_index == kTfLiteOptionalTensor) {
|
||||
@ -3472,9 +3518,16 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
|
||||
}
|
||||
} else if (tensor->type == kTfLiteInt8 &&
|
||||
ann_type_equivalent == kTfLiteInt32) {
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
||||
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
|
||||
if (use_int8_asymm_signed) {
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
||||
static_cast<const int32_t>(tensor->data.int8[i]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
||||
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
context->ReportError(
|
||||
@ -3685,6 +3738,15 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
&dequantize_mapping, &allocation_memory_mapping_,
|
||||
&nnapi_to_tflite_op_mapping_, nn_model_.get(),
|
||||
nnapi_errno);
|
||||
|
||||
// If we have target accelerators the target SDK version might be
|
||||
// different than the current android version.
|
||||
target_sdk_version_ = nnapi_->android_sdk_version;
|
||||
if (!nnapi_devices_.empty()) {
|
||||
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
|
||||
context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
|
||||
}
|
||||
|
||||
// Add Tensors.
|
||||
for (auto node_index : nodes_) {
|
||||
// Obtain the op and registration.
|
||||
@ -3696,11 +3758,18 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
|
||||
const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
|
||||
const bool need_int8_conversion =
|
||||
target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
|
||||
NeedInt8Conversion(context, reg->builtin_code, node);
|
||||
const bool use_int8_asymm_signed =
|
||||
target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
|
||||
|
||||
int input_tensor_flags = 0;
|
||||
if (scalar_as_tensor) {
|
||||
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
||||
}
|
||||
if (use_int8_asymm_signed) {
|
||||
input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
||||
}
|
||||
|
||||
// On SDK level less than 30, h_swish will be lowered into supported NNAPI
|
||||
// operations. Since SDK level 30, h_swish is supported as a single
|
||||
@ -3807,8 +3876,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
if (constant_value.allocation_type == kTfLiteMmapRo) {
|
||||
builder.AddScalarInt32Operand(
|
||||
static_cast<int32_t>(*constant_value.data.int8) + 128);
|
||||
if (need_int8_conversion) {
|
||||
builder.AddScalarInt32Operand(
|
||||
static_cast<int32_t>(*constant_value.data.int8) + 128);
|
||||
} else {
|
||||
builder.AddScalarInt32Operand(*constant_value.data.int8);
|
||||
}
|
||||
} else {
|
||||
builder.AddSingleValueTensorAsScalarOperand(
|
||||
constant_value_id, ANEURALNETWORKS_INT32);
|
||||
@ -3836,7 +3909,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
// specifying the output height and width, is not added and
|
||||
// instead the height and width will be added individually as
|
||||
// scalars by the mapping function returned by Map().
|
||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
|
||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
||||
input_tensor_flags));
|
||||
}
|
||||
} else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
|
||||
// The K parameter tensor is not handled here but by the functor
|
||||
@ -3844,8 +3918,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
// the else clause below
|
||||
continue;
|
||||
} else if (reg->builtin_code == kTfLiteBuiltinGather) {
|
||||
// Everything is added during Map since input tensors
|
||||
// Everything else is added during Map since input tensors
|
||||
// have different order.
|
||||
if (input_pos == 0) {
|
||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
||||
input_tensor_flags));
|
||||
}
|
||||
continue;
|
||||
} else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
|
||||
input_pos == 1) {
|
||||
@ -3862,7 +3940,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
// the axis, needs to be converted to a scalar since TFLite uses a
|
||||
// tensor but NNAPI uses a scalar as the axis.
|
||||
if (input_pos == 0) {
|
||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
|
||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
||||
input_tensor_flags));
|
||||
} else {
|
||||
const int axis_id = node->inputs->data[1];
|
||||
const TfLiteTensor& axis_tensor = context->tensors[axis_id];
|
||||
@ -3908,12 +3987,26 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
|
||||
operand_tensor.params, &tensor_index));
|
||||
break;
|
||||
case kTfLiteInt8:
|
||||
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||
ANEURALNETWORKS_TENSOR_QUANT8_SYMM, operand_tensor.type, {1},
|
||||
std::vector<int8_t>(1, operand_tensor.data.int8[0]),
|
||||
operand_tensor.params, &tensor_index));
|
||||
break;
|
||||
case kTfLiteInt8: {
|
||||
auto params = operand_tensor.params;
|
||||
if (params.scale == 0.0) {
|
||||
params.scale = 1.0;
|
||||
}
|
||||
|
||||
if (use_int8_asymm_signed) {
|
||||
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
|
||||
operand_tensor.type, {1},
|
||||
std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
|
||||
&tensor_index));
|
||||
} else {
|
||||
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
|
||||
{1},
|
||||
std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
|
||||
params, &tensor_index));
|
||||
}
|
||||
} break;
|
||||
case kTfLiteInt32:
|
||||
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||
ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
|
||||
@ -3995,19 +4088,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
}
|
||||
}
|
||||
|
||||
// If we have target accelerators the target SDK version might be
|
||||
// different than the current android version.
|
||||
int target_sdk_version = nnapi_->android_sdk_version;
|
||||
if (!nnapi_devices_.empty()) {
|
||||
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
|
||||
context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno));
|
||||
}
|
||||
|
||||
// Get op type and operands
|
||||
// Fails if the Validate function failed
|
||||
int nn_op_type;
|
||||
TF_LITE_ENSURE_STATUS(
|
||||
Map(context, reg->builtin_code, reg->version, target_sdk_version,
|
||||
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
|
||||
{context, &builder, node, &model_state_outputs_,
|
||||
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
||||
&nn_op_type));
|
||||
@ -4017,6 +4102,9 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
||||
if (need_int8_conversion) {
|
||||
output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
|
||||
}
|
||||
if (use_int8_asymm_signed) {
|
||||
output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
||||
}
|
||||
for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
|
||||
const auto output_index = node->outputs->data[output_pos];
|
||||
|
||||
|
@ -341,6 +341,9 @@ class NNAPIDelegateKernel {
|
||||
|
||||
std::vector<int> nnapi_to_tflite_op_mapping_;
|
||||
|
||||
// Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
|
||||
int target_sdk_version_ = 27; // kMinSdkVersionForNNAPI13
|
||||
|
||||
void AddDequantizeOperatorsWhereNeeded(
|
||||
const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
|
||||
int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
|
||||
|
@ -71,6 +71,8 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
|
||||
ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||
ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||
EventWaitReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||
SetPriorityReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||
SetOperandSymmPerChannelQuantParamsReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||
SetNnapiSupportedDevice("test-device", android_sdk_version);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,920 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "tensorflow/lite/builtin_ops.h"
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
||||
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
|
||||
#include "tensorflow/lite/interpreter.h"
|
||||
#include "tensorflow/lite/kernels/fully_connected.h"
|
||||
#include "tensorflow/lite/kernels/test_util.h"
|
||||
#include "tensorflow/lite/minimal_logging.h"
|
||||
#include "tensorflow/lite/model.h"
|
||||
#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
|
||||
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
|
||||
|
||||
namespace tflite {
|
||||
|
||||
namespace ops {
|
||||
namespace builtin {
|
||||
|
||||
TfLiteRegistration* Register_CONVOLUTION_REF();
|
||||
TfLiteRegistration* Register_DEQUANTIZE();
|
||||
|
||||
} // namespace builtin
|
||||
} // namespace ops
|
||||
|
||||
namespace {
|
||||
|
||||
class SingleOpModelWithNNAPI : public SingleOpModel {
|
||||
public:
|
||||
SingleOpModelWithNNAPI() = default;
|
||||
void Init(const NnApi* nnapi) {
|
||||
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
|
||||
SetDelegate(stateful_delegate_.get());
|
||||
}
|
||||
|
||||
StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
|
||||
|
||||
void SetBufferHandle(int index, TfLiteBufferHandle handle) {
|
||||
interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get());
|
||||
}
|
||||
TfLiteStatus GetCompilationStatus() { return compilation_status_; }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
|
||||
TfLiteStatus compilation_status_;
|
||||
};
|
||||
|
||||
class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI {
|
||||
public:
|
||||
HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches,
|
||||
const TensorData& input,
|
||||
const TensorData& weights,
|
||||
const TensorData& output = {TensorType_FLOAT32},
|
||||
bool asymmetric_inputs = false)
|
||||
: batches_(batches), units_(units) {
|
||||
SingleOpModelWithNNAPI::Init(nnapi);
|
||||
int total_input_size = 1;
|
||||
for (size_t i = 0; i < input.shape.size(); ++i) {
|
||||
total_input_size *= input.shape[i];
|
||||
}
|
||||
input_size_ = total_input_size / batches_;
|
||||
|
||||
input_ = AddInput(input);
|
||||
weights_ = AddInput(weights);
|
||||
|
||||
TensorData bias{TensorType_FLOAT32, {units_}};
|
||||
bias_ = AddInput(bias);
|
||||
|
||||
output_ = AddOutput(output);
|
||||
|
||||
auto options = CreateFullyConnectedOptions(
|
||||
builder_, ActivationFunctionType_RELU,
|
||||
tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
|
||||
false, asymmetric_inputs)
|
||||
.Union();
|
||||
SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED,
|
||||
BuiltinOptions_FullyConnectedOptions, options);
|
||||
resolver_ = absl::make_unique<SingleOpResolver>(
|
||||
BuiltinOperator_FULLY_CONNECTED,
|
||||
ops::builtin::Register_FULLY_CONNECTED_PIE());
|
||||
BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)},
|
||||
/*num_threads=*/-1,
|
||||
/* allow_fp32_relax_to_fp16 */ false,
|
||||
/*apply_delegate=*/false);
|
||||
compilation_status_ = ApplyDelegate();
|
||||
}
|
||||
void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
|
||||
void SetWeights(const std::vector<float>& data) {
|
||||
SymmetricQuantizeAndPopulate(weights_, data);
|
||||
}
|
||||
void SetSignedWeights(std::initializer_list<float> f) {
|
||||
SignedSymmetricQuantizeAndPopulate(weights_, f);
|
||||
}
|
||||
|
||||
void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
|
||||
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||
std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
|
||||
|
||||
int input_size() { return input_size_; }
|
||||
int num_units() { return units_; }
|
||||
int num_batches() { return batches_; }
|
||||
|
||||
protected:
|
||||
int input_;
|
||||
int weights_;
|
||||
int bias_;
|
||||
int output_;
|
||||
|
||||
int batches_;
|
||||
int units_;
|
||||
int input_size_;
|
||||
};
|
||||
|
||||
struct NnApiSignedQuantizationTest
|
||||
: ::tflite::delegate::nnapi::NnApiDelegateMockTest {
|
||||
static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); }
|
||||
void SetUp() override {
|
||||
::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
|
||||
nnapi_mock_->StubAddOperandWith(
|
||||
[](ANeuralNetworksModel* model,
|
||||
const ANeuralNetworksOperandType* type) -> int {
|
||||
const auto nn_tensor_type = type->type;
|
||||
if (tensors_count->find(nn_tensor_type) == tensors_count->end()) {
|
||||
tensors_count->insert({nn_tensor_type, 0});
|
||||
}
|
||||
tensors_count->at(nn_tensor_type)++;
|
||||
return ANEURALNETWORKS_NO_ERROR;
|
||||
});
|
||||
}
|
||||
void TearDown() override { tensors_count->clear(); }
|
||||
static void TearDownTestSuite() {
|
||||
delete tensors_count;
|
||||
tensors_count = nullptr;
|
||||
}
|
||||
static std::map<int, int>* tensors_count;
|
||||
};
|
||||
std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr;
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
HybridFullyConnectedMapsToSignedSymmOnSdk29) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(29);
|
||||
|
||||
HybridFullyConnectedOpModel m(
|
||||
nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
|
||||
/*input=*/{TensorType_FLOAT32, {2, 10}},
|
||||
/*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
|
||||
m.SetSignedWeights({
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
|
||||
});
|
||||
m.SetBias({1, 2, 3});
|
||||
m.SetInput({
|
||||
1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
|
||||
1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
|
||||
});
|
||||
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 3);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
4); // fc_input, fc_weights, fc_bias, fc_output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||
1); // dequantize_weights_input
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
HybridFullyConnectedMapsToSignedSymmOnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
|
||||
HybridFullyConnectedOpModel m(
|
||||
nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
|
||||
/*input=*/{TensorType_FLOAT32, {2, 10}},
|
||||
/*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
|
||||
m.SetSignedWeights({
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
|
||||
});
|
||||
m.SetBias({1, 2, 3});
|
||||
m.SetInput({
|
||||
1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
|
||||
1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
|
||||
});
|
||||
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 3);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
4); // fc_input, fc_weights, fc_bias, fc_output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||
1); // dequantize_weights_input
|
||||
}
|
||||
|
||||
template <typename FilterType>
|
||||
class BaseConvolutionOpModel : public SingleOpModelWithNNAPI {
|
||||
public:
|
||||
BaseConvolutionOpModel(
|
||||
const NnApi* nnapi, TfLiteRegistration* registration,
|
||||
const TensorData& input, const TensorData& filter,
|
||||
const TensorData& output, int stride_width = 2, int stride_height = 2,
|
||||
enum Padding padding = Padding_VALID,
|
||||
enum ActivationFunctionType activation = ActivationFunctionType_NONE,
|
||||
int dilation_width_factor = 1, int dilation_height_factor = 1,
|
||||
std::initializer_list<FilterType> filter_data = {}) {
|
||||
SingleOpModelWithNNAPI::Init(nnapi);
|
||||
|
||||
input_ = AddInput(input);
|
||||
|
||||
if (filter_data.size()) {
|
||||
filter_ = AddConstInput(filter, filter_data);
|
||||
} else {
|
||||
filter_ = AddInput(filter);
|
||||
}
|
||||
|
||||
int bias_size = GetShape(filter_)[0];
|
||||
if (input.type == TensorType_FLOAT32) {
|
||||
bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
|
||||
} else {
|
||||
// This is a quantized version. The scale of 'bias' depends on the scales
|
||||
// of input and filter. Supposedly this is correctly set during quantized
|
||||
// training.
|
||||
if (filter.per_channel_quantization) {
|
||||
// per channel quantization.
|
||||
std::vector<float> bias_scale(
|
||||
filter.per_channel_quantization_scales.size());
|
||||
std::vector<int64_t> bias_zero_points(
|
||||
filter.per_channel_quantization_scales.size());
|
||||
for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
|
||||
++i) {
|
||||
bias_scale[i] =
|
||||
input.scale * filter.per_channel_quantization_scales[i];
|
||||
bias_zero_points[i] = 0;
|
||||
}
|
||||
tflite::TensorType bias_type = TensorType_INT32;
|
||||
if (input.type == TensorType_INT16) {
|
||||
// In case of 16-bit, the bias type is set to be int 64.
|
||||
bias_type = TensorType_INT64;
|
||||
}
|
||||
TensorData bias{bias_type,
|
||||
{bias_size},
|
||||
/*min=*/0,
|
||||
/*max=*/0,
|
||||
/*scale=*/0,
|
||||
/*zero_point=*/0,
|
||||
true,
|
||||
/*per_channel_quantization_scales=*/bias_scale,
|
||||
/*per_channel_quantization_offsets=*/bias_zero_points,
|
||||
/*channel_index==*/0};
|
||||
bias_ = AddInput(bias);
|
||||
} else {
|
||||
// per tensor quantization.
|
||||
auto bias_scale = GetScale(input_) * GetScale(filter_);
|
||||
TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
|
||||
bias_ = AddInput(bias);
|
||||
}
|
||||
}
|
||||
|
||||
output_ = AddOutput(output);
|
||||
|
||||
SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
|
||||
CreateConv2DOptions(
|
||||
builder_, padding, stride_width, stride_height, activation,
|
||||
dilation_width_factor, dilation_height_factor)
|
||||
.Union());
|
||||
|
||||
resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D,
|
||||
registration);
|
||||
BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
|
||||
/*num_threads=*/-1,
|
||||
/* allow_fp32_relax_to_fp16 */ false,
|
||||
/*apply_delegate=*/false);
|
||||
compilation_status_ = ApplyDelegate();
|
||||
}
|
||||
|
||||
protected:
|
||||
int input_;
|
||||
int filter_;
|
||||
int bias_;
|
||||
int output_;
|
||||
};
|
||||
|
||||
class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> {
|
||||
public:
|
||||
using BaseConvolutionOpModel::BaseConvolutionOpModel;
|
||||
|
||||
void SetInput(std::initializer_list<float> data) {
|
||||
QuantizeAndPopulate<uint8_t>(input_, data);
|
||||
}
|
||||
|
||||
void SetFilter(std::initializer_list<float> data) {
|
||||
QuantizeAndPopulate<uint8_t>(filter_, data);
|
||||
}
|
||||
|
||||
void SetBias(std::initializer_list<float> data) {
|
||||
QuantizeAndPopulate<int32_t>(bias_, data);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
|
||||
std::vector<float> GetDequantizedOutput() {
|
||||
return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
|
||||
GetScale(output_), GetZeroPoint(output_));
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) {
|
||||
QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
|
||||
ops::builtin::Register_CONVOLUTION_REF(),
|
||||
{TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
|
||||
{TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
|
||||
{TensorType_UINT8, {}, -127, 128});
|
||||
m.SetInput({
|
||||
// First batch
|
||||
1, 1, 1, 1, // row = 1
|
||||
2, 2, 2, 2, // row = 2
|
||||
// Second batch
|
||||
1, 2, 3, 4, // row = 1
|
||||
1, 2, 3, 4, // row = 2
|
||||
});
|
||||
m.SetFilter({
|
||||
1, 2, 3, 4, // first 2x2 filter
|
||||
-1, 1, -1, 1, // second 2x2 filter
|
||||
-1, -1, 1, 1, // third 2x2 filter
|
||||
});
|
||||
m.SetBias({1, 2, 3});
|
||||
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 3);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
3); // input, filter, output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||
4); // padding, stride_width, stride_height, activation
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
|
||||
ops::builtin::Register_CONVOLUTION_REF(),
|
||||
{TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
|
||||
{TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
|
||||
{TensorType_UINT8, {}, -127, 128});
|
||||
m.SetInput({
|
||||
// First batch
|
||||
1, 1, 1, 1, // row = 1
|
||||
2, 2, 2, 2, // row = 2
|
||||
// Second batch
|
||||
1, 2, 3, 4, // row = 1
|
||||
1, 2, 3, 4, // row = 2
|
||||
});
|
||||
m.SetFilter({
|
||||
1, 2, 3, 4, // first 2x2 filter
|
||||
-1, 1, -1, 1, // second 2x2 filter
|
||||
-1, -1, 1, 1, // third 2x2 filter
|
||||
});
|
||||
m.SetBias({1, 2, 3});
|
||||
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 3);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
3); // input, filter, output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||
4); // padding, stride_width, stride_height, activation
|
||||
}
|
||||
|
||||
class PerChannelQuantizedConvolutionOpModel
|
||||
: public BaseConvolutionOpModel<int8_t> {
|
||||
public:
|
||||
using BaseConvolutionOpModel::BaseConvolutionOpModel;
|
||||
|
||||
void SetInput(std::initializer_list<float> data) {
|
||||
QuantizeAndPopulate<int8_t>(input_, data);
|
||||
}
|
||||
|
||||
void SetFilter(std::initializer_list<float> data) {
|
||||
PerChannelSymmetricQuantizeAndPopulate(filter_, data);
|
||||
}
|
||||
|
||||
void SetBias(std::initializer_list<float> data) {
|
||||
PerChannelQuantizeBias(bias_, data);
|
||||
}
|
||||
|
||||
std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
|
||||
std::vector<float> GetDequantizedOutput() {
|
||||
return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
|
||||
GetZeroPoint(output_));
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
Conv2dSignedPerTensorMapsToUnsignedOnSdk29) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(29);
|
||||
PerChannelQuantizedConvolutionOpModel m(
|
||||
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||
{TensorType_INT8,
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{2, 2, 2, 2},
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
/*per_channel_quantization=*/true,
|
||||
/*per_channel_quantization_scales=*/{1},
|
||||
/*per_channel_quantization_offsets=*/{0},
|
||||
/*channel_index=*/0},
|
||||
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||
/*stride_width=*/1, /*stride_height=*/1);
|
||||
m.SetInput({
|
||||
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||
3, 2, // batch = 0, y = 0, x = 0
|
||||
1, -1, // batch = 0, y = 0, x = 1
|
||||
-2, -3, // batch = 0, y = 0, x = 2
|
||||
4, 3, // batch = 0, y = 1, x = 0
|
||||
2, -2, // batch = 0, y = 1, x = 1
|
||||
-3, -4, // batch = 0, y = 1, x = 2
|
||||
});
|
||||
m.SetFilter(
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{
|
||||
1, 2, // out channel = 0, y = 0, x = 0
|
||||
3, 4, // out channel = 0, y = 0, x = 1
|
||||
3, 4, // out channel = 0, y = 1, x = 0
|
||||
5, 6, // out channel = 0, y = 1, x = 1
|
||||
7, 8, // out channel = 1, y = 0, x = 0
|
||||
5, 6, // out channel = 1, y = 0, x = 1
|
||||
3, 4, // out channel = 1, y = 1, x = 0
|
||||
1, 2, // out channel = 1, y = 1, x = 1
|
||||
});
|
||||
m.SetBias({3, -2});
|
||||
|
||||
// Invoke and verify output.
|
||||
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 3);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
3); // input, filter, output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||
4); // padding, stride_width, stride_height, activation
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
Conv2dSignedPerTensorMapsToUnsignedOnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
PerChannelQuantizedConvolutionOpModel m(
|
||||
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||
{TensorType_INT8,
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{2, 2, 2, 2},
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
/*per_channel_quantization=*/true,
|
||||
/*per_channel_quantization_scales=*/{1},
|
||||
/*per_channel_quantization_offsets=*/{0},
|
||||
/*channel_index=*/0},
|
||||
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||
/*stride_width=*/1, /*stride_height=*/1);
|
||||
m.SetInput({
|
||||
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||
3, 2, // batch = 0, y = 0, x = 0
|
||||
1, -1, // batch = 0, y = 0, x = 1
|
||||
-2, -3, // batch = 0, y = 0, x = 2
|
||||
4, 3, // batch = 0, y = 1, x = 0
|
||||
2, -2, // batch = 0, y = 1, x = 1
|
||||
-3, -4, // batch = 0, y = 1, x = 2
|
||||
});
|
||||
m.SetFilter(
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{
|
||||
1, 2, // out channel = 0, y = 0, x = 0
|
||||
3, 4, // out channel = 0, y = 0, x = 1
|
||||
3, 4, // out channel = 0, y = 1, x = 0
|
||||
5, 6, // out channel = 0, y = 1, x = 1
|
||||
7, 8, // out channel = 1, y = 0, x = 0
|
||||
5, 6, // out channel = 1, y = 0, x = 1
|
||||
3, 4, // out channel = 1, y = 1, x = 0
|
||||
1, 2, // out channel = 1, y = 1, x = 1
|
||||
});
|
||||
m.SetBias({3, -2});
|
||||
|
||||
// Invoke and verify output.
|
||||
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 3);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
3); // input, filter, output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||
4); // padding, stride_width, stride_height, activation
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
Conv2dSignedPerChannelMapsToUnsignedOnSdk29) {
|
||||
PerChannelQuantizedConvolutionOpModel m(
|
||||
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||
{TensorType_INT8,
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{2, 2, 2, 2},
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
/*per_channel_quantization=*/true,
|
||||
/*per_channel_quantization_scales=*/{1, 2},
|
||||
/*per_channel_quantization_offsets=*/{0, 0},
|
||||
/*channel_index=*/0},
|
||||
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||
/*stride_width=*/1, /*stride_height=*/1);
|
||||
m.SetInput({
|
||||
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||
3, 2, // batch = 0, y = 0, x = 0
|
||||
1, -1, // batch = 0, y = 0, x = 1
|
||||
-2, -3, // batch = 0, y = 0, x = 2
|
||||
4, 3, // batch = 0, y = 1, x = 0
|
||||
2, -2, // batch = 0, y = 1, x = 1
|
||||
-3, -4, // batch = 0, y = 1, x = 2
|
||||
});
|
||||
m.SetFilter(
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{
|
||||
1, 2, // out channel = 0, y = 0, x = 0
|
||||
3, 4, // out channel = 0, y = 0, x = 1
|
||||
3, 4, // out channel = 0, y = 1, x = 0
|
||||
5, 6, // out channel = 0, y = 1, x = 1
|
||||
7, 8, // out channel = 1, y = 0, x = 0
|
||||
5, 6, // out channel = 1, y = 0, x = 1
|
||||
3, 4, // out channel = 1, y = 1, x = 0
|
||||
1, 2, // out channel = 1, y = 1, x = 1
|
||||
});
|
||||
m.SetBias({3, -2});
|
||||
|
||||
// Invoke and verify output.
|
||||
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 4);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
2); // input, output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||
1); // filter
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||
4); // padding, stride_width, stride_height, activation
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
PerChannelQuantizedConvolutionOpModel m(
|
||||
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||
{TensorType_INT8,
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{2, 2, 2, 2},
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
/*per_channel_quantization=*/true,
|
||||
/*per_channel_quantization_scales=*/{1, 2},
|
||||
/*per_channel_quantization_offsets=*/{0, 0},
|
||||
/*channel_index=*/0},
|
||||
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||
/*stride_width=*/1, /*stride_height=*/1);
|
||||
m.SetInput({
|
||||
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||
3, 2, // batch = 0, y = 0, x = 0
|
||||
1, -1, // batch = 0, y = 0, x = 1
|
||||
-2, -3, // batch = 0, y = 0, x = 2
|
||||
4, 3, // batch = 0, y = 1, x = 0
|
||||
2, -2, // batch = 0, y = 1, x = 1
|
||||
-3, -4, // batch = 0, y = 1, x = 2
|
||||
});
|
||||
m.SetFilter(
|
||||
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||
{
|
||||
1, 2, // out channel = 0, y = 0, x = 0
|
||||
3, 4, // out channel = 0, y = 0, x = 1
|
||||
3, 4, // out channel = 0, y = 1, x = 0
|
||||
5, 6, // out channel = 0, y = 1, x = 1
|
||||
7, 8, // out channel = 1, y = 0, x = 0
|
||||
5, 6, // out channel = 1, y = 0, x = 1
|
||||
3, 4, // out channel = 1, y = 1, x = 0
|
||||
1, 2, // out channel = 1, y = 1, x = 1
|
||||
});
|
||||
m.SetBias({3, -2});
|
||||
|
||||
// Invoke and verify output.
|
||||
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 4);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
2); // input, output
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||
1); // filter
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||
4); // padding, stride_width, stride_height, activation
|
||||
}
|
||||
|
||||
class QuantizeOpModel : public SingleOpModelWithNNAPI {
|
||||
public:
|
||||
QuantizeOpModel(const NnApi* nnapi, const TensorData& input,
|
||||
const TensorData& output) {
|
||||
SingleOpModelWithNNAPI::Init(nnapi);
|
||||
input_ = AddInput(input);
|
||||
output_ = AddOutput(output);
|
||||
SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions,
|
||||
CreateQuantizeOptions(builder_).Union());
|
||||
|
||||
BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
|
||||
/* allow_fp32_relax_to_fp16 */ false,
|
||||
/*apply_delegate=*/false);
|
||||
compilation_status_ = ApplyDelegate();
|
||||
}
|
||||
|
||||
void SetInput(std::initializer_list<float> data) {
|
||||
PopulateTensor(input_, data);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SetInputAndQuantize(std::initializer_list<float> data) {
|
||||
QuantizeAndPopulate<T>(input_, data);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> GetOutput() {
|
||||
return ExtractVector<T>(output_);
|
||||
}
|
||||
|
||||
private:
|
||||
int input_;
|
||||
int output_;
|
||||
};
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) {
|
||||
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
|
||||
{TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
|
||||
|
||||
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
1); // output
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
|
||||
{TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
|
||||
|
||||
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
1); // output
|
||||
}
|
||||
|
||||
// Quantize with Int8 output is only supported since SDK level 30.
|
||||
TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
// [-63.5, 64] -> scale=0.5 zero_point=1 for INT8
|
||||
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
|
||||
{TensorType_INT8, {2, 5}, 0, 0, 0.5, -1});
|
||||
|
||||
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
1); // output
|
||||
}
|
||||
|
||||
class DequantizeOpModel : public SingleOpModelWithNNAPI {
|
||||
public:
|
||||
DequantizeOpModel(const NnApi* nnapi, TensorType type,
|
||||
std::initializer_list<int> shape, float scale,
|
||||
int32_t zero_point, int version) {
|
||||
SingleOpModelWithNNAPI::Init(nnapi);
|
||||
const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
|
||||
input_ = AddInput(input_tensor_data);
|
||||
output_ = AddOutput({TensorType_FLOAT32, shape});
|
||||
SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
|
||||
CreateDequantizeOptions(builder_).Union());
|
||||
|
||||
resolver_ = absl::make_unique<SingleOpResolver>(
|
||||
BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(),
|
||||
version);
|
||||
|
||||
BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
|
||||
/* allow_fp32_relax_to_fp16 */ false,
|
||||
/*apply_delegate=*/false);
|
||||
compilation_status_ = ApplyDelegate();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SetInput(std::initializer_list<T> data) {
|
||||
PopulateTensor(input_, data);
|
||||
}
|
||||
|
||||
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||
|
||||
private:
|
||||
int input_;
|
||||
int output_;
|
||||
};
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) {
|
||||
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
|
||||
127, 1);
|
||||
|
||||
m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // output
|
||||
}
|
||||
|
||||
TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
|
||||
127, 1);
|
||||
|
||||
m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // output
|
||||
}
|
||||
|
||||
// Dequantize with Int8 input is only supported for symmetric quantization on
|
||||
// SDK level 29
|
||||
TEST_F(NnApiSignedQuantizationTest,
|
||||
DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) {
|
||||
// [-63.5, 64] -> scale=0.5, zero_point=0 for INT8
|
||||
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0,
|
||||
2);
|
||||
|
||||
m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // output
|
||||
}
|
||||
|
||||
// Dequantize with Int8 input is only supported since SDK level 30.
|
||||
TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) {
|
||||
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||
// [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
|
||||
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1,
|
||||
2);
|
||||
|
||||
m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
|
||||
m.Invoke();
|
||||
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||
|
||||
ASSERT_EQ(tensors_count->size(), 2);
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
tensors_count->end());
|
||||
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
tensors_count->end());
|
||||
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||
1); // input
|
||||
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||
1); // output
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace tflite
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::tflite::LogToStderr();
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
@ -46,6 +46,7 @@ enum {
|
||||
ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
|
||||
ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
|
||||
ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
|
||||
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -118,6 +118,11 @@ class NnApiHandler {
|
||||
const ANeuralNetworksOperandType* type) { return Value; };
|
||||
}
|
||||
|
||||
void StubAddOperandWith(int(stub)(ANeuralNetworksModel* model,
|
||||
const ANeuralNetworksOperandType* type)) {
|
||||
nnapi_->ANeuralNetworksModel_addOperand = stub;
|
||||
}
|
||||
|
||||
template <int Value>
|
||||
void SetOperandValueReturns() {
|
||||
nnapi_->ANeuralNetworksModel_setOperandValue =
|
||||
@ -268,6 +273,23 @@ class NnApiHandler {
|
||||
};
|
||||
}
|
||||
|
||||
template <int Value>
|
||||
void SetPriorityReturns() {
|
||||
nnapi_->ANeuralNetworksCompilation_setPriority =
|
||||
[](ANeuralNetworksCompilation* compilation, int priority) -> int {
|
||||
return Value;
|
||||
};
|
||||
}
|
||||
|
||||
template <int Value>
|
||||
void SetOperandSymmPerChannelQuantParamsReturns() {
|
||||
nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams =
|
||||
[](ANeuralNetworksModel* model, int32_t index,
|
||||
const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) {
|
||||
return Value;
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets the SDK Version in the nnapi structure.
|
||||
* If set_unsupported_ops_to_null is set to true, all the functions not
|
||||
|
Loading…
Reference in New Issue
Block a user