Add support for TENSOR_QUANT8_ASYMM_SIGNED in NNAPI delegate
PiperOrigin-RevId: 317846923 Change-Id: I1c61f53e89228cd2482435e9255e390864bd83e3
This commit is contained in:
parent
7198070f4d
commit
e071e66f03
@ -190,6 +190,32 @@ cc_test(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_test(
|
||||||
|
name = "nnapi_delegate_signed_quantization_test",
|
||||||
|
size = "small",
|
||||||
|
srcs = [
|
||||||
|
"nnapi_delegate_signed_quantization_test.cc",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"no_mac",
|
||||||
|
"no_windows",
|
||||||
|
"tflite_not_portable_ios",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":nnapi_delegate",
|
||||||
|
":nnapi_delegate_mock_test",
|
||||||
|
"//tensorflow/lite:framework",
|
||||||
|
"//tensorflow/lite:kernel_api",
|
||||||
|
"//tensorflow/lite:minimal_logging",
|
||||||
|
"//tensorflow/lite/c:common",
|
||||||
|
"//tensorflow/lite/kernels:builtin_ops",
|
||||||
|
"//tensorflow/lite/kernels:test_util",
|
||||||
|
"//tensorflow/lite/nnapi:nnapi_implementation",
|
||||||
|
"//tensorflow/lite/nnapi:nnapi_lib",
|
||||||
|
"@com_google_googletest//:gtest",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_test(
|
cc_test(
|
||||||
name = "quant_lstm_sup_test",
|
name = "quant_lstm_sup_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
@ -60,6 +60,10 @@ FloatActivationsOpTest/Elu,30
|
|||||||
FloatActivationsOpTest/HardSwish
|
FloatActivationsOpTest/HardSwish
|
||||||
QuantizedActivationsOpTest/HardSwish
|
QuantizedActivationsOpTest/HardSwish
|
||||||
QuantizedActivationsOpTest/HardSwishBias
|
QuantizedActivationsOpTest/HardSwishBias
|
||||||
|
QuantizedActivationsOpTest/Relu*
|
||||||
|
QuantizedActivationsOpTest/PRelu,29
|
||||||
|
QuantizedActivationsOpTest/PReluSameShapes,29
|
||||||
|
QuantizedActivationsOpTest/PReluInt8.+,30
|
||||||
|
|
||||||
# add_test
|
# add_test
|
||||||
FloatAddOpModel/.+
|
FloatAddOpModel/.+
|
||||||
@ -145,6 +149,7 @@ ConvolutionOpTest/ConvolutionOpTest/.+/\d+
|
|||||||
|
|
||||||
# dequantize_test
|
# dequantize_test
|
||||||
DequantizeOpTest/Uint8
|
DequantizeOpTest/Uint8
|
||||||
|
DequantizeOpTest/Int8,30
|
||||||
|
|
||||||
# depth_to_space_test
|
# depth_to_space_test
|
||||||
DepthToSpaceOpModel/Float32
|
DepthToSpaceOpModel/Float32
|
||||||
@ -190,6 +195,7 @@ QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Uin
|
|||||||
QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29
|
QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29
|
||||||
HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29
|
HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29
|
||||||
HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29
|
HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29
|
||||||
|
HybridAsymmetricInputFullyConnectedOpTest.SimpleTestQuantizedUint8,29
|
||||||
FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+
|
FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+
|
||||||
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+
|
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+
|
||||||
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29
|
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29
|
||||||
@ -207,6 +213,7 @@ FloatGatherOpTest/LastAxis,29
|
|||||||
TypesGatherOpTest/Float32Int32,29
|
TypesGatherOpTest/Float32Int32,29
|
||||||
TypesGatherOpTest/Int32Int32,29
|
TypesGatherOpTest/Int32Int32,29
|
||||||
TypesGatherOpTest/Uint8Int32,29
|
TypesGatherOpTest/Uint8Int32,29
|
||||||
|
TypesGatherOpTest/Int8Int32,29
|
||||||
|
|
||||||
# hashtable_lookup_test
|
# hashtable_lookup_test
|
||||||
# All test excepted the string one should be accelerated
|
# All test excepted the string one should be accelerated
|
||||||
@ -286,13 +293,18 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
|
|||||||
|
|
||||||
# quantize_test
|
# quantize_test
|
||||||
QuantizeOpTest/UINT8,29
|
QuantizeOpTest/UINT8,29
|
||||||
|
QuantizeOpTest/INT8,30
|
||||||
|
|
||||||
|
# rank
|
||||||
|
|
||||||
# reduce_test
|
# reduce_test
|
||||||
-Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+
|
-Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+
|
||||||
-ConstUint8(Mean|Sum)OpTest/.+
|
-ConstUint8(Mean|Sum)OpTest/.+
|
||||||
|
-ConstInt8MeanOpTest.NonSpecialAxisNonSameScale
|
||||||
|
-ConstInt8MeanOpTest.QuantizedDifferentScale
|
||||||
ConstUint8(Max|Min)OpTest/.+,29
|
ConstUint8(Max|Min)OpTest/.+,29
|
||||||
ConstUint8(Mean)OpTest/.+
|
ConstUint8(Mean)OpTest/.+
|
||||||
Constint8(Mean|Max|Min)OpTest/.+
|
ConstInt8(Mean|Max|Min)OpTest/.+,29
|
||||||
ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29
|
ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29
|
||||||
ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29
|
ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29
|
||||||
ConstFloat(Mean|Any)OpTest/NotKeepDims
|
ConstFloat(Mean|Any)OpTest/NotKeepDims
|
||||||
|
@ -201,6 +201,7 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
|
|||||||
case kTfLiteBuiltinConcatenation:
|
case kTfLiteBuiltinConcatenation:
|
||||||
case kTfLiteBuiltinEqual:
|
case kTfLiteBuiltinEqual:
|
||||||
case kTfLiteBuiltinExpandDims:
|
case kTfLiteBuiltinExpandDims:
|
||||||
|
case kTfLiteBuiltinGather:
|
||||||
case kTfLiteBuiltinGreater:
|
case kTfLiteBuiltinGreater:
|
||||||
case kTfLiteBuiltinGreaterEqual:
|
case kTfLiteBuiltinGreaterEqual:
|
||||||
case kTfLiteBuiltinHardSwish:
|
case kTfLiteBuiltinHardSwish:
|
||||||
@ -377,6 +378,7 @@ bool HasZeroes(TfLiteIntArrayView array) {
|
|||||||
enum {
|
enum {
|
||||||
NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
|
NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
|
||||||
NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
|
NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
|
||||||
|
NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Returns the SDK level to target when delegating to the given devices.
|
// Returns the SDK level to target when delegating to the given devices.
|
||||||
@ -1065,6 +1067,8 @@ class NNAPIOpBuilder {
|
|||||||
tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
||||||
const bool need_int8_conversion =
|
const bool need_int8_conversion =
|
||||||
tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
|
tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
|
||||||
|
const bool use_int8_asymm_signed =
|
||||||
|
tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
||||||
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
|
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
|
||||||
if (ann_tensor_index != -1) {
|
if (ann_tensor_index != -1) {
|
||||||
indices->push_back(ann_tensor_index);
|
indices->push_back(ann_tensor_index);
|
||||||
@ -1095,12 +1099,25 @@ class NNAPIOpBuilder {
|
|||||||
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
|
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
|
||||||
break;
|
break;
|
||||||
case kTfLiteUInt8:
|
case kTfLiteUInt8:
|
||||||
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
||||||
|
scale = tensor->params.scale;
|
||||||
|
zeroPoint = tensor->params.zero_point;
|
||||||
|
if (scale == 0) {
|
||||||
|
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
|
||||||
|
// NNAPI.
|
||||||
|
scale = 1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case kTfLiteInt8:
|
case kTfLiteInt8:
|
||||||
// If explicit int8 conversion is needed, we still need
|
// If explicit int8 conversion is needed, we still need
|
||||||
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
|
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
|
||||||
nn_type = (tensor_type == kTfLiteUInt8 || need_int8_conversion)
|
if (use_int8_asymm_signed) {
|
||||||
? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
|
||||||
: ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
|
} else if (need_int8_conversion) {
|
||||||
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
|
||||||
|
} else {
|
||||||
|
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
|
||||||
|
}
|
||||||
scale = tensor->params.scale;
|
scale = tensor->params.scale;
|
||||||
zeroPoint = tensor->params.zero_point;
|
zeroPoint = tensor->params.zero_point;
|
||||||
if (tensor->quantization.type == kTfLiteAffineQuantization) {
|
if (tensor->quantization.type == kTfLiteAffineQuantization) {
|
||||||
@ -1130,8 +1147,7 @@ class NNAPIOpBuilder {
|
|||||||
operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
|
operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
|
||||||
}
|
}
|
||||||
if (scale == 0) {
|
if (scale == 0) {
|
||||||
// TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
|
// QUANT8 tensors with zero scale are not valid in NNAPI.
|
||||||
// with zero scale are not valid in NNAPI.
|
|
||||||
scale = 1;
|
scale = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1248,7 +1264,6 @@ class NNAPIOpBuilder {
|
|||||||
"setting new operand value", nnapi_errno_);
|
"setting new operand value", nnapi_errno_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
indices->push_back(ann_tensor_index);
|
indices->push_back(ann_tensor_index);
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
@ -1437,7 +1452,6 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
bool is_accelerator_specified,
|
bool is_accelerator_specified,
|
||||||
std::vector<NNAPIValidationFailure>* map_failures) {
|
std::vector<NNAPIValidationFailure>* map_failures) {
|
||||||
OpValidationContext val_ctx{true, map_failures};
|
OpValidationContext val_ctx{true, map_failures};
|
||||||
|
|
||||||
switch (builtin_code) {
|
switch (builtin_code) {
|
||||||
case kTfLiteBuiltinAdd: {
|
case kTfLiteBuiltinAdd: {
|
||||||
ExpectMaxOpVersion(version, 2, &val_ctx);
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
||||||
@ -1789,18 +1803,21 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
"Supported op versions are 1 and 2 only", &val_ctx);
|
"Supported op versions are 1 and 2 only", &val_ctx);
|
||||||
|
|
||||||
const auto& input = context->tensors[node->inputs->data[0]];
|
const auto& input = context->tensors[node->inputs->data[0]];
|
||||||
Expect(input.type != kTfLiteFloat16,
|
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
|
||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
|
||||||
"kTfLiteFloat16 not supported as input", &val_ctx);
|
} else {
|
||||||
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
|
||||||
|
|
||||||
|
if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
|
||||||
|
input.type == kTfLiteInt8) {
|
||||||
const auto zero_point = input.params.zero_point;
|
const auto zero_point = input.params.zero_point;
|
||||||
Expect(input.type != kTfLiteInt8 ||
|
Expect(zero_point == 0,
|
||||||
(zero_point == 0 &&
|
|
||||||
android_sdk_version >= kMinSdkVersionForNNAPI12),
|
|
||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||||
"NN API supports int8 type since version 1.2 but only for "
|
"NN API supports int8 type since version 1.2 but only for "
|
||||||
"symmetric quantization.",
|
"symmetric quantization.",
|
||||||
&val_ctx);
|
&val_ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinFloor: {
|
case kTfLiteBuiltinFloor: {
|
||||||
ExpectOpVersion(version, 1, &val_ctx);
|
ExpectOpVersion(version, 1, &val_ctx);
|
||||||
@ -2150,21 +2167,38 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
&val_ctx);
|
&val_ctx);
|
||||||
const TfLiteType input_type =
|
const TfLiteType input_type =
|
||||||
context->tensors[node->inputs->data[0]].type;
|
context->tensors[node->inputs->data[0]].type;
|
||||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
|
||||||
kTfLiteUInt8);
|
|
||||||
const TfLiteType output_type =
|
const TfLiteType output_type =
|
||||||
context->tensors[node->outputs->data[0]].type;
|
context->tensors[node->outputs->data[0]].type;
|
||||||
ExpectTypeIn(output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
||||||
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
||||||
|
kTfLiteUInt8, kTfLiteInt8);
|
||||||
|
|
||||||
|
ExpectTypeIn(
|
||||||
|
output_type,
|
||||||
|
{kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
|
||||||
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||||
|
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
||||||
|
"kTfLiteUInt8, kTfLiteInt8.",
|
||||||
|
&val_ctx);
|
||||||
|
} else {
|
||||||
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
|
||||||
|
kTfLiteUInt8);
|
||||||
|
|
||||||
|
ExpectTypeIn(
|
||||||
|
output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
|
||||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||||
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
|
||||||
"kTfLiteUInt8.",
|
"kTfLiteUInt8.",
|
||||||
&val_ctx);
|
&val_ctx);
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinPrelu: {
|
case kTfLiteBuiltinPrelu: {
|
||||||
ExpectOpVersion(version, 1, &val_ctx);
|
ExpectOpVersion(version, 1, &val_ctx);
|
||||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||||
&val_ctx);
|
&val_ctx);
|
||||||
ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
||||||
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
||||||
|
kTfLiteInt8);
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinTile: {
|
case kTfLiteBuiltinTile: {
|
||||||
ExpectOpVersion(version, 1, &val_ctx);
|
ExpectOpVersion(version, 1, &val_ctx);
|
||||||
@ -2240,19 +2274,18 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
&val_ctx);
|
&val_ctx);
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinGather: {
|
case kTfLiteBuiltinGather: {
|
||||||
ExpectOpVersion(version, 1, &val_ctx);
|
ExpectOpVersion(version, 2, &val_ctx);
|
||||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||||
&val_ctx);
|
&val_ctx);
|
||||||
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
||||||
const auto& positions = context->tensors[node->inputs->data[1]];
|
const auto& positions = context->tensors[node->inputs->data[1]];
|
||||||
|
|
||||||
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
|
||||||
kTfLiteInt32, kTfLiteUInt8);
|
kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
|
||||||
ExpectTypeIn(positions.type,
|
|
||||||
{kTfLiteFloat32, kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8},
|
Expect(positions.type == kTfLiteInt32,
|
||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||||
"Positions type should be one of kTfLiteFloat32, "
|
"Positions type should be one of kTfLiteInt32", &val_ctx);
|
||||||
"kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8",
|
|
||||||
&val_ctx);
|
|
||||||
Expect(positions.dims->size != 0,
|
Expect(positions.dims->size != 0,
|
||||||
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
NNAPIValidationFailureType::kUnsupportedOperandRank,
|
||||||
"0-dimension args are not supported by NNAPI.", &val_ctx);
|
"0-dimension args are not supported by NNAPI.", &val_ctx);
|
||||||
@ -2283,8 +2316,13 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
&val_ctx);
|
&val_ctx);
|
||||||
// Tensor indices: split_dim: 0, value: 1
|
// Tensor indices: split_dim: 0, value: 1
|
||||||
const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
|
const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
|
||||||
|
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
|
||||||
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
||||||
|
kTfLiteInt8, kTfLiteInt32);
|
||||||
|
} else {
|
||||||
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
|
||||||
kTfLiteInt32);
|
kTfLiteInt32);
|
||||||
|
}
|
||||||
const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
|
const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
|
||||||
Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
|
Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
|
||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||||
@ -2308,30 +2346,41 @@ bool NNAPIDelegateKernel::Validate(
|
|||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
NNAPIValidationFailureType::kUnsupportedInputType,
|
||||||
"Value should be Float32.", &val_ctx);
|
"Value should be Float32.", &val_ctx);
|
||||||
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
const auto output_type = context->tensors[node->outputs->data[0]].type;
|
||||||
|
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
|
||||||
Expect(output_type == kTfLiteUInt8,
|
Expect(output_type == kTfLiteUInt8,
|
||||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||||
"Output should be kTfLiteUInt8.", &val_ctx);
|
"Output should be kTfLiteUInt8.", &val_ctx);
|
||||||
|
} else {
|
||||||
|
ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
|
||||||
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||||
|
"Output should be kTfLiteUInt8.", &val_ctx);
|
||||||
|
}
|
||||||
const auto quantization_params =
|
const auto quantization_params =
|
||||||
context->tensors[node->outputs->data[0]].params;
|
context->tensors[node->outputs->data[0]].params;
|
||||||
Expect(quantization_params.scale > 0.f,
|
Expect(quantization_params.scale > 0.f,
|
||||||
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
|
||||||
"Quantization scale should be > 0.", &val_ctx);
|
"Quantization scale should be > 0.", &val_ctx);
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinReduceAny:
|
case kTfLiteBuiltinReduceAny: {
|
||||||
case kTfLiteBuiltinReduceMin:
|
ExpectOpVersion(version, 2, &val_ctx);
|
||||||
case kTfLiteBuiltinReduceMax: {
|
|
||||||
ExpectOpVersion(version, 1, &val_ctx);
|
|
||||||
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||||
&val_ctx);
|
&val_ctx);
|
||||||
Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
|
Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
|
||||||
NNAPIValidationFailureType::kUnsupportedOutputType,
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||||
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
||||||
if (builtin_code == kTfLiteBuiltinReduceProd) {
|
} break;
|
||||||
const auto input_type = context->tensors[node->inputs->data[0]].type;
|
case kTfLiteBuiltinReduceMin:
|
||||||
Expect(input_type == kTfLiteFloat32,
|
case kTfLiteBuiltinReduceMax: {
|
||||||
NNAPIValidationFailureType::kUnsupportedInputType,
|
ExpectMaxOpVersion(version, 2, &val_ctx);
|
||||||
"NNAPI only supports floating point REDUCE_PROD.", &val_ctx);
|
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
|
||||||
}
|
&val_ctx);
|
||||||
|
const auto input_tensor = context->tensors[node->inputs->data[0]];
|
||||||
|
const auto input_type = input_tensor.type;
|
||||||
|
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
|
||||||
|
kTfLiteInt8);
|
||||||
|
Expect(input_tensor.dims->size != 0,
|
||||||
|
NNAPIValidationFailureType::kUnsupportedOutputType,
|
||||||
|
"NNAPI does not support generating a scalar as output.", &val_ctx);
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinDepthToSpace: {
|
case kTfLiteBuiltinDepthToSpace: {
|
||||||
const TfLiteType input_type =
|
const TfLiteType input_type =
|
||||||
@ -3093,16 +3142,10 @@ TfLiteStatus NNAPIDelegateKernel::Map(
|
|||||||
case kTfLiteBuiltinGather: {
|
case kTfLiteBuiltinGather: {
|
||||||
auto builtin = reinterpret_cast<TfLiteGatherParams*>(
|
auto builtin = reinterpret_cast<TfLiteGatherParams*>(
|
||||||
mapping_args.node->builtin_data);
|
mapping_args.node->builtin_data);
|
||||||
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[0],
|
|
||||||
/* hybrid_op */ false,
|
|
||||||
/* scalar_as_tensor */ false);
|
|
||||||
|
|
||||||
mapping_args.builder->AddScalarInt32Operand(builtin->axis);
|
mapping_args.builder->AddScalarInt32Operand(builtin->axis);
|
||||||
|
|
||||||
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
|
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
|
||||||
/* hybrid_op */ false,
|
/* hybrid_op */ false,
|
||||||
/* scalar_as_tensor */ false);
|
/* tensor_flags */ 0);
|
||||||
|
|
||||||
*nn_op_type = ANEURALNETWORKS_GATHER;
|
*nn_op_type = ANEURALNETWORKS_GATHER;
|
||||||
} break;
|
} break;
|
||||||
case kTfLiteBuiltinBidirectionalSequenceLstm: {
|
case kTfLiteBuiltinBidirectionalSequenceLstm: {
|
||||||
@ -3430,6 +3473,9 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
|
|||||||
// absolute indices but NN api indices inputs by relative indices.
|
// absolute indices but NN api indices inputs by relative indices.
|
||||||
int relative_input_index = 0;
|
int relative_input_index = 0;
|
||||||
|
|
||||||
|
const bool use_int8_asymm_signed =
|
||||||
|
target_sdk_version_ >= kMinSdkVersionForNNAPI13;
|
||||||
|
|
||||||
size_t input_offset = 0;
|
size_t input_offset = 0;
|
||||||
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
|
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
|
||||||
if (absolute_input_index == kTfLiteOptionalTensor) {
|
if (absolute_input_index == kTfLiteOptionalTensor) {
|
||||||
@ -3472,10 +3518,17 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
|
|||||||
}
|
}
|
||||||
} else if (tensor->type == kTfLiteInt8 &&
|
} else if (tensor->type == kTfLiteInt8 &&
|
||||||
ann_type_equivalent == kTfLiteInt32) {
|
ann_type_equivalent == kTfLiteInt32) {
|
||||||
|
if (use_int8_asymm_signed) {
|
||||||
|
for (int i = 0; i < num_elements; ++i) {
|
||||||
|
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
||||||
|
static_cast<const int32_t>(tensor->data.int8[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
for (int i = 0; i < num_elements; ++i) {
|
for (int i = 0; i < num_elements; ++i) {
|
||||||
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
reinterpret_cast<int32_t*>(input_ptr)[i] =
|
||||||
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
|
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
context->ReportError(
|
context->ReportError(
|
||||||
context,
|
context,
|
||||||
@ -3685,6 +3738,15 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
&dequantize_mapping, &allocation_memory_mapping_,
|
&dequantize_mapping, &allocation_memory_mapping_,
|
||||||
&nnapi_to_tflite_op_mapping_, nn_model_.get(),
|
&nnapi_to_tflite_op_mapping_, nn_model_.get(),
|
||||||
nnapi_errno);
|
nnapi_errno);
|
||||||
|
|
||||||
|
// If we have target accelerators the target SDK version might be
|
||||||
|
// different than the current android version.
|
||||||
|
target_sdk_version_ = nnapi_->android_sdk_version;
|
||||||
|
if (!nnapi_devices_.empty()) {
|
||||||
|
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
|
||||||
|
context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
|
||||||
|
}
|
||||||
|
|
||||||
// Add Tensors.
|
// Add Tensors.
|
||||||
for (auto node_index : nodes_) {
|
for (auto node_index : nodes_) {
|
||||||
// Obtain the op and registration.
|
// Obtain the op and registration.
|
||||||
@ -3696,11 +3758,18 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
|
const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
|
||||||
const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
|
const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
|
||||||
const bool need_int8_conversion =
|
const bool need_int8_conversion =
|
||||||
|
target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
|
||||||
NeedInt8Conversion(context, reg->builtin_code, node);
|
NeedInt8Conversion(context, reg->builtin_code, node);
|
||||||
|
const bool use_int8_asymm_signed =
|
||||||
|
target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
|
||||||
|
|
||||||
int input_tensor_flags = 0;
|
int input_tensor_flags = 0;
|
||||||
if (scalar_as_tensor) {
|
if (scalar_as_tensor) {
|
||||||
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
|
||||||
}
|
}
|
||||||
|
if (use_int8_asymm_signed) {
|
||||||
|
input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
||||||
|
}
|
||||||
|
|
||||||
// On SDK level less than 30, h_swish will be lowered into supported NNAPI
|
// On SDK level less than 30, h_swish will be lowered into supported NNAPI
|
||||||
// operations. Since SDK level 30, h_swish is supported as a single
|
// operations. Since SDK level 30, h_swish is supported as a single
|
||||||
@ -3807,8 +3876,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
break;
|
break;
|
||||||
case kTfLiteInt8:
|
case kTfLiteInt8:
|
||||||
if (constant_value.allocation_type == kTfLiteMmapRo) {
|
if (constant_value.allocation_type == kTfLiteMmapRo) {
|
||||||
|
if (need_int8_conversion) {
|
||||||
builder.AddScalarInt32Operand(
|
builder.AddScalarInt32Operand(
|
||||||
static_cast<int32_t>(*constant_value.data.int8) + 128);
|
static_cast<int32_t>(*constant_value.data.int8) + 128);
|
||||||
|
} else {
|
||||||
|
builder.AddScalarInt32Operand(*constant_value.data.int8);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
builder.AddSingleValueTensorAsScalarOperand(
|
builder.AddSingleValueTensorAsScalarOperand(
|
||||||
constant_value_id, ANEURALNETWORKS_INT32);
|
constant_value_id, ANEURALNETWORKS_INT32);
|
||||||
@ -3836,7 +3909,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
// specifying the output height and width, is not added and
|
// specifying the output height and width, is not added and
|
||||||
// instead the height and width will be added individually as
|
// instead the height and width will be added individually as
|
||||||
// scalars by the mapping function returned by Map().
|
// scalars by the mapping function returned by Map().
|
||||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
||||||
|
input_tensor_flags));
|
||||||
}
|
}
|
||||||
} else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
|
} else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
|
||||||
// The K parameter tensor is not handled here but by the functor
|
// The K parameter tensor is not handled here but by the functor
|
||||||
@ -3844,8 +3918,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
// the else clause below
|
// the else clause below
|
||||||
continue;
|
continue;
|
||||||
} else if (reg->builtin_code == kTfLiteBuiltinGather) {
|
} else if (reg->builtin_code == kTfLiteBuiltinGather) {
|
||||||
// Everything is added during Map since input tensors
|
// Everything else is added during Map since input tensors
|
||||||
// have different order.
|
// have different order.
|
||||||
|
if (input_pos == 0) {
|
||||||
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
||||||
|
input_tensor_flags));
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
} else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
|
} else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
|
||||||
input_pos == 1) {
|
input_pos == 1) {
|
||||||
@ -3862,7 +3940,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
// the axis, needs to be converted to a scalar since TFLite uses a
|
// the axis, needs to be converted to a scalar since TFLite uses a
|
||||||
// tensor but NNAPI uses a scalar as the axis.
|
// tensor but NNAPI uses a scalar as the axis.
|
||||||
if (input_pos == 0) {
|
if (input_pos == 0) {
|
||||||
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
|
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
|
||||||
|
input_tensor_flags));
|
||||||
} else {
|
} else {
|
||||||
const int axis_id = node->inputs->data[1];
|
const int axis_id = node->inputs->data[1];
|
||||||
const TfLiteTensor& axis_tensor = context->tensors[axis_id];
|
const TfLiteTensor& axis_tensor = context->tensors[axis_id];
|
||||||
@ -3908,12 +3987,26 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
|
std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
|
||||||
operand_tensor.params, &tensor_index));
|
operand_tensor.params, &tensor_index));
|
||||||
break;
|
break;
|
||||||
case kTfLiteInt8:
|
case kTfLiteInt8: {
|
||||||
|
auto params = operand_tensor.params;
|
||||||
|
if (params.scale == 0.0) {
|
||||||
|
params.scale = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (use_int8_asymm_signed) {
|
||||||
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||||
ANEURALNETWORKS_TENSOR_QUANT8_SYMM, operand_tensor.type, {1},
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
|
||||||
std::vector<int8_t>(1, operand_tensor.data.int8[0]),
|
operand_tensor.type, {1},
|
||||||
operand_tensor.params, &tensor_index));
|
std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
|
||||||
break;
|
&tensor_index));
|
||||||
|
} else {
|
||||||
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||||
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
|
||||||
|
{1},
|
||||||
|
std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
|
||||||
|
params, &tensor_index));
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case kTfLiteInt32:
|
case kTfLiteInt32:
|
||||||
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
|
||||||
ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
|
ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
|
||||||
@ -3995,19 +4088,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have target accelerators the target SDK version might be
|
|
||||||
// different than the current android version.
|
|
||||||
int target_sdk_version = nnapi_->android_sdk_version;
|
|
||||||
if (!nnapi_devices_.empty()) {
|
|
||||||
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
|
|
||||||
context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get op type and operands
|
// Get op type and operands
|
||||||
// Fails if the Validate function failed
|
// Fails if the Validate function failed
|
||||||
int nn_op_type;
|
int nn_op_type;
|
||||||
TF_LITE_ENSURE_STATUS(
|
TF_LITE_ENSURE_STATUS(
|
||||||
Map(context, reg->builtin_code, reg->version, target_sdk_version,
|
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
|
||||||
{context, &builder, node, &model_state_outputs_,
|
{context, &builder, node, &model_state_outputs_,
|
||||||
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
|
||||||
&nn_op_type));
|
&nn_op_type));
|
||||||
@ -4017,6 +4102,9 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
|
|||||||
if (need_int8_conversion) {
|
if (need_int8_conversion) {
|
||||||
output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
|
output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
|
||||||
}
|
}
|
||||||
|
if (use_int8_asymm_signed) {
|
||||||
|
output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
|
||||||
|
}
|
||||||
for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
|
for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
|
||||||
const auto output_index = node->outputs->data[output_pos];
|
const auto output_index = node->outputs->data[output_pos];
|
||||||
|
|
||||||
|
@ -341,6 +341,9 @@ class NNAPIDelegateKernel {
|
|||||||
|
|
||||||
std::vector<int> nnapi_to_tflite_op_mapping_;
|
std::vector<int> nnapi_to_tflite_op_mapping_;
|
||||||
|
|
||||||
|
// Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
|
||||||
|
int target_sdk_version_ = 27; // kMinSdkVersionForNNAPI13
|
||||||
|
|
||||||
void AddDequantizeOperatorsWhereNeeded(
|
void AddDequantizeOperatorsWhereNeeded(
|
||||||
const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
|
const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
|
||||||
int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
|
int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
|
||||||
|
@ -71,6 +71,8 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
|
|||||||
ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
|
ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||||
ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>();
|
ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||||
EventWaitReturns<ANEURALNETWORKS_NO_ERROR>();
|
EventWaitReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||||
|
SetPriorityReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||||
|
SetOperandSymmPerChannelQuantParamsReturns<ANEURALNETWORKS_NO_ERROR>();
|
||||||
SetNnapiSupportedDevice("test-device", android_sdk_version);
|
SetNnapiSupportedDevice("test-device", android_sdk_version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,920 @@
|
|||||||
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include "tensorflow/lite/builtin_ops.h"
|
||||||
|
#include "tensorflow/lite/c/common.h"
|
||||||
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
||||||
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
|
||||||
|
#include "tensorflow/lite/interpreter.h"
|
||||||
|
#include "tensorflow/lite/kernels/fully_connected.h"
|
||||||
|
#include "tensorflow/lite/kernels/test_util.h"
|
||||||
|
#include "tensorflow/lite/minimal_logging.h"
|
||||||
|
#include "tensorflow/lite/model.h"
|
||||||
|
#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
|
||||||
|
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
|
||||||
|
namespace ops {
|
||||||
|
namespace builtin {
|
||||||
|
|
||||||
|
TfLiteRegistration* Register_CONVOLUTION_REF();
|
||||||
|
TfLiteRegistration* Register_DEQUANTIZE();
|
||||||
|
|
||||||
|
} // namespace builtin
|
||||||
|
} // namespace ops
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class SingleOpModelWithNNAPI : public SingleOpModel {
|
||||||
|
public:
|
||||||
|
SingleOpModelWithNNAPI() = default;
|
||||||
|
void Init(const NnApi* nnapi) {
|
||||||
|
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
|
||||||
|
SetDelegate(stateful_delegate_.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
|
||||||
|
|
||||||
|
void SetBufferHandle(int index, TfLiteBufferHandle handle) {
|
||||||
|
interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get());
|
||||||
|
}
|
||||||
|
TfLiteStatus GetCompilationStatus() { return compilation_status_; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
|
||||||
|
TfLiteStatus compilation_status_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI {
|
||||||
|
public:
|
||||||
|
HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches,
|
||||||
|
const TensorData& input,
|
||||||
|
const TensorData& weights,
|
||||||
|
const TensorData& output = {TensorType_FLOAT32},
|
||||||
|
bool asymmetric_inputs = false)
|
||||||
|
: batches_(batches), units_(units) {
|
||||||
|
SingleOpModelWithNNAPI::Init(nnapi);
|
||||||
|
int total_input_size = 1;
|
||||||
|
for (size_t i = 0; i < input.shape.size(); ++i) {
|
||||||
|
total_input_size *= input.shape[i];
|
||||||
|
}
|
||||||
|
input_size_ = total_input_size / batches_;
|
||||||
|
|
||||||
|
input_ = AddInput(input);
|
||||||
|
weights_ = AddInput(weights);
|
||||||
|
|
||||||
|
TensorData bias{TensorType_FLOAT32, {units_}};
|
||||||
|
bias_ = AddInput(bias);
|
||||||
|
|
||||||
|
output_ = AddOutput(output);
|
||||||
|
|
||||||
|
auto options = CreateFullyConnectedOptions(
|
||||||
|
builder_, ActivationFunctionType_RELU,
|
||||||
|
tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
|
||||||
|
false, asymmetric_inputs)
|
||||||
|
.Union();
|
||||||
|
SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED,
|
||||||
|
BuiltinOptions_FullyConnectedOptions, options);
|
||||||
|
resolver_ = absl::make_unique<SingleOpResolver>(
|
||||||
|
BuiltinOperator_FULLY_CONNECTED,
|
||||||
|
ops::builtin::Register_FULLY_CONNECTED_PIE());
|
||||||
|
BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)},
|
||||||
|
/*num_threads=*/-1,
|
||||||
|
/* allow_fp32_relax_to_fp16 */ false,
|
||||||
|
/*apply_delegate=*/false);
|
||||||
|
compilation_status_ = ApplyDelegate();
|
||||||
|
}
|
||||||
|
void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
|
||||||
|
void SetWeights(const std::vector<float>& data) {
|
||||||
|
SymmetricQuantizeAndPopulate(weights_, data);
|
||||||
|
}
|
||||||
|
void SetSignedWeights(std::initializer_list<float> f) {
|
||||||
|
SignedSymmetricQuantizeAndPopulate(weights_, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
|
||||||
|
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||||
|
std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
|
||||||
|
|
||||||
|
int input_size() { return input_size_; }
|
||||||
|
int num_units() { return units_; }
|
||||||
|
int num_batches() { return batches_; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int input_;
|
||||||
|
int weights_;
|
||||||
|
int bias_;
|
||||||
|
int output_;
|
||||||
|
|
||||||
|
int batches_;
|
||||||
|
int units_;
|
||||||
|
int input_size_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NnApiSignedQuantizationTest
|
||||||
|
: ::tflite::delegate::nnapi::NnApiDelegateMockTest {
|
||||||
|
static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); }
|
||||||
|
void SetUp() override {
|
||||||
|
::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
|
||||||
|
nnapi_mock_->StubAddOperandWith(
|
||||||
|
[](ANeuralNetworksModel* model,
|
||||||
|
const ANeuralNetworksOperandType* type) -> int {
|
||||||
|
const auto nn_tensor_type = type->type;
|
||||||
|
if (tensors_count->find(nn_tensor_type) == tensors_count->end()) {
|
||||||
|
tensors_count->insert({nn_tensor_type, 0});
|
||||||
|
}
|
||||||
|
tensors_count->at(nn_tensor_type)++;
|
||||||
|
return ANEURALNETWORKS_NO_ERROR;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
void TearDown() override { tensors_count->clear(); }
|
||||||
|
static void TearDownTestSuite() {
|
||||||
|
delete tensors_count;
|
||||||
|
tensors_count = nullptr;
|
||||||
|
}
|
||||||
|
static std::map<int, int>* tensors_count;
|
||||||
|
};
|
||||||
|
std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr;
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
HybridFullyConnectedMapsToSignedSymmOnSdk29) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(29);
|
||||||
|
|
||||||
|
HybridFullyConnectedOpModel m(
|
||||||
|
nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
|
||||||
|
/*input=*/{TensorType_FLOAT32, {2, 10}},
|
||||||
|
/*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
|
||||||
|
m.SetSignedWeights({
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
|
||||||
|
});
|
||||||
|
m.SetBias({1, 2, 3});
|
||||||
|
m.SetInput({
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
|
||||||
|
1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
|
||||||
|
});
|
||||||
|
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 3);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
4); // fc_input, fc_weights, fc_bias, fc_output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||||
|
1); // dequantize_weights_input
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
HybridFullyConnectedMapsToSignedSymmOnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
|
||||||
|
HybridFullyConnectedOpModel m(
|
||||||
|
nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
|
||||||
|
/*input=*/{TensorType_FLOAT32, {2, 10}},
|
||||||
|
/*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
|
||||||
|
m.SetSignedWeights({
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
|
||||||
|
});
|
||||||
|
m.SetBias({1, 2, 3});
|
||||||
|
m.SetInput({
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
|
||||||
|
1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
|
||||||
|
});
|
||||||
|
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 3);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
4); // fc_input, fc_weights, fc_bias, fc_output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||||
|
1); // dequantize_weights_input
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename FilterType>
|
||||||
|
class BaseConvolutionOpModel : public SingleOpModelWithNNAPI {
|
||||||
|
public:
|
||||||
|
BaseConvolutionOpModel(
|
||||||
|
const NnApi* nnapi, TfLiteRegistration* registration,
|
||||||
|
const TensorData& input, const TensorData& filter,
|
||||||
|
const TensorData& output, int stride_width = 2, int stride_height = 2,
|
||||||
|
enum Padding padding = Padding_VALID,
|
||||||
|
enum ActivationFunctionType activation = ActivationFunctionType_NONE,
|
||||||
|
int dilation_width_factor = 1, int dilation_height_factor = 1,
|
||||||
|
std::initializer_list<FilterType> filter_data = {}) {
|
||||||
|
SingleOpModelWithNNAPI::Init(nnapi);
|
||||||
|
|
||||||
|
input_ = AddInput(input);
|
||||||
|
|
||||||
|
if (filter_data.size()) {
|
||||||
|
filter_ = AddConstInput(filter, filter_data);
|
||||||
|
} else {
|
||||||
|
filter_ = AddInput(filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
int bias_size = GetShape(filter_)[0];
|
||||||
|
if (input.type == TensorType_FLOAT32) {
|
||||||
|
bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
|
||||||
|
} else {
|
||||||
|
// This is a quantized version. The scale of 'bias' depends on the scales
|
||||||
|
// of input and filter. Supposedly this is correctly set during quantized
|
||||||
|
// training.
|
||||||
|
if (filter.per_channel_quantization) {
|
||||||
|
// per channel quantization.
|
||||||
|
std::vector<float> bias_scale(
|
||||||
|
filter.per_channel_quantization_scales.size());
|
||||||
|
std::vector<int64_t> bias_zero_points(
|
||||||
|
filter.per_channel_quantization_scales.size());
|
||||||
|
for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
|
||||||
|
++i) {
|
||||||
|
bias_scale[i] =
|
||||||
|
input.scale * filter.per_channel_quantization_scales[i];
|
||||||
|
bias_zero_points[i] = 0;
|
||||||
|
}
|
||||||
|
tflite::TensorType bias_type = TensorType_INT32;
|
||||||
|
if (input.type == TensorType_INT16) {
|
||||||
|
// In case of 16-bit, the bias type is set to be int 64.
|
||||||
|
bias_type = TensorType_INT64;
|
||||||
|
}
|
||||||
|
TensorData bias{bias_type,
|
||||||
|
{bias_size},
|
||||||
|
/*min=*/0,
|
||||||
|
/*max=*/0,
|
||||||
|
/*scale=*/0,
|
||||||
|
/*zero_point=*/0,
|
||||||
|
true,
|
||||||
|
/*per_channel_quantization_scales=*/bias_scale,
|
||||||
|
/*per_channel_quantization_offsets=*/bias_zero_points,
|
||||||
|
/*channel_index==*/0};
|
||||||
|
bias_ = AddInput(bias);
|
||||||
|
} else {
|
||||||
|
// per tensor quantization.
|
||||||
|
auto bias_scale = GetScale(input_) * GetScale(filter_);
|
||||||
|
TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
|
||||||
|
bias_ = AddInput(bias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output_ = AddOutput(output);
|
||||||
|
|
||||||
|
SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
|
||||||
|
CreateConv2DOptions(
|
||||||
|
builder_, padding, stride_width, stride_height, activation,
|
||||||
|
dilation_width_factor, dilation_height_factor)
|
||||||
|
.Union());
|
||||||
|
|
||||||
|
resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D,
|
||||||
|
registration);
|
||||||
|
BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
|
||||||
|
/*num_threads=*/-1,
|
||||||
|
/* allow_fp32_relax_to_fp16 */ false,
|
||||||
|
/*apply_delegate=*/false);
|
||||||
|
compilation_status_ = ApplyDelegate();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int input_;
|
||||||
|
int filter_;
|
||||||
|
int bias_;
|
||||||
|
int output_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> {
|
||||||
|
public:
|
||||||
|
using BaseConvolutionOpModel::BaseConvolutionOpModel;
|
||||||
|
|
||||||
|
void SetInput(std::initializer_list<float> data) {
|
||||||
|
QuantizeAndPopulate<uint8_t>(input_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetFilter(std::initializer_list<float> data) {
|
||||||
|
QuantizeAndPopulate<uint8_t>(filter_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetBias(std::initializer_list<float> data) {
|
||||||
|
QuantizeAndPopulate<int32_t>(bias_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
|
||||||
|
std::vector<float> GetDequantizedOutput() {
|
||||||
|
return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
|
||||||
|
GetScale(output_), GetZeroPoint(output_));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) {
|
||||||
|
QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
|
||||||
|
ops::builtin::Register_CONVOLUTION_REF(),
|
||||||
|
{TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
|
||||||
|
{TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
|
||||||
|
{TensorType_UINT8, {}, -127, 128});
|
||||||
|
m.SetInput({
|
||||||
|
// First batch
|
||||||
|
1, 1, 1, 1, // row = 1
|
||||||
|
2, 2, 2, 2, // row = 2
|
||||||
|
// Second batch
|
||||||
|
1, 2, 3, 4, // row = 1
|
||||||
|
1, 2, 3, 4, // row = 2
|
||||||
|
});
|
||||||
|
m.SetFilter({
|
||||||
|
1, 2, 3, 4, // first 2x2 filter
|
||||||
|
-1, 1, -1, 1, // second 2x2 filter
|
||||||
|
-1, -1, 1, 1, // third 2x2 filter
|
||||||
|
});
|
||||||
|
m.SetBias({1, 2, 3});
|
||||||
|
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 3);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
3); // input, filter, output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||||
|
4); // padding, stride_width, stride_height, activation
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
|
||||||
|
ops::builtin::Register_CONVOLUTION_REF(),
|
||||||
|
{TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
|
||||||
|
{TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
|
||||||
|
{TensorType_UINT8, {}, -127, 128});
|
||||||
|
m.SetInput({
|
||||||
|
// First batch
|
||||||
|
1, 1, 1, 1, // row = 1
|
||||||
|
2, 2, 2, 2, // row = 2
|
||||||
|
// Second batch
|
||||||
|
1, 2, 3, 4, // row = 1
|
||||||
|
1, 2, 3, 4, // row = 2
|
||||||
|
});
|
||||||
|
m.SetFilter({
|
||||||
|
1, 2, 3, 4, // first 2x2 filter
|
||||||
|
-1, 1, -1, 1, // second 2x2 filter
|
||||||
|
-1, -1, 1, 1, // third 2x2 filter
|
||||||
|
});
|
||||||
|
m.SetBias({1, 2, 3});
|
||||||
|
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 3);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
3); // input, filter, output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||||
|
4); // padding, stride_width, stride_height, activation
|
||||||
|
}
|
||||||
|
|
||||||
|
class PerChannelQuantizedConvolutionOpModel
|
||||||
|
: public BaseConvolutionOpModel<int8_t> {
|
||||||
|
public:
|
||||||
|
using BaseConvolutionOpModel::BaseConvolutionOpModel;
|
||||||
|
|
||||||
|
void SetInput(std::initializer_list<float> data) {
|
||||||
|
QuantizeAndPopulate<int8_t>(input_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetFilter(std::initializer_list<float> data) {
|
||||||
|
PerChannelSymmetricQuantizeAndPopulate(filter_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetBias(std::initializer_list<float> data) {
|
||||||
|
PerChannelQuantizeBias(bias_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
|
||||||
|
std::vector<float> GetDequantizedOutput() {
|
||||||
|
return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
|
||||||
|
GetZeroPoint(output_));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
Conv2dSignedPerTensorMapsToUnsignedOnSdk29) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(29);
|
||||||
|
PerChannelQuantizedConvolutionOpModel m(
|
||||||
|
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||||
|
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||||
|
{TensorType_INT8,
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{2, 2, 2, 2},
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
/*per_channel_quantization=*/true,
|
||||||
|
/*per_channel_quantization_scales=*/{1},
|
||||||
|
/*per_channel_quantization_offsets=*/{0},
|
||||||
|
/*channel_index=*/0},
|
||||||
|
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||||
|
/*stride_width=*/1, /*stride_height=*/1);
|
||||||
|
m.SetInput({
|
||||||
|
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||||
|
3, 2, // batch = 0, y = 0, x = 0
|
||||||
|
1, -1, // batch = 0, y = 0, x = 1
|
||||||
|
-2, -3, // batch = 0, y = 0, x = 2
|
||||||
|
4, 3, // batch = 0, y = 1, x = 0
|
||||||
|
2, -2, // batch = 0, y = 1, x = 1
|
||||||
|
-3, -4, // batch = 0, y = 1, x = 2
|
||||||
|
});
|
||||||
|
m.SetFilter(
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{
|
||||||
|
1, 2, // out channel = 0, y = 0, x = 0
|
||||||
|
3, 4, // out channel = 0, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 0, y = 1, x = 0
|
||||||
|
5, 6, // out channel = 0, y = 1, x = 1
|
||||||
|
7, 8, // out channel = 1, y = 0, x = 0
|
||||||
|
5, 6, // out channel = 1, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 1, y = 1, x = 0
|
||||||
|
1, 2, // out channel = 1, y = 1, x = 1
|
||||||
|
});
|
||||||
|
m.SetBias({3, -2});
|
||||||
|
|
||||||
|
// Invoke and verify output.
|
||||||
|
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 3);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
3); // input, filter, output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||||
|
4); // padding, stride_width, stride_height, activation
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
Conv2dSignedPerTensorMapsToUnsignedOnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
PerChannelQuantizedConvolutionOpModel m(
|
||||||
|
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||||
|
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||||
|
{TensorType_INT8,
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{2, 2, 2, 2},
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
/*per_channel_quantization=*/true,
|
||||||
|
/*per_channel_quantization_scales=*/{1},
|
||||||
|
/*per_channel_quantization_offsets=*/{0},
|
||||||
|
/*channel_index=*/0},
|
||||||
|
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||||
|
/*stride_width=*/1, /*stride_height=*/1);
|
||||||
|
m.SetInput({
|
||||||
|
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||||
|
3, 2, // batch = 0, y = 0, x = 0
|
||||||
|
1, -1, // batch = 0, y = 0, x = 1
|
||||||
|
-2, -3, // batch = 0, y = 0, x = 2
|
||||||
|
4, 3, // batch = 0, y = 1, x = 0
|
||||||
|
2, -2, // batch = 0, y = 1, x = 1
|
||||||
|
-3, -4, // batch = 0, y = 1, x = 2
|
||||||
|
});
|
||||||
|
m.SetFilter(
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{
|
||||||
|
1, 2, // out channel = 0, y = 0, x = 0
|
||||||
|
3, 4, // out channel = 0, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 0, y = 1, x = 0
|
||||||
|
5, 6, // out channel = 0, y = 1, x = 1
|
||||||
|
7, 8, // out channel = 1, y = 0, x = 0
|
||||||
|
5, 6, // out channel = 1, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 1, y = 1, x = 0
|
||||||
|
1, 2, // out channel = 1, y = 1, x = 1
|
||||||
|
});
|
||||||
|
m.SetBias({3, -2});
|
||||||
|
|
||||||
|
// Invoke and verify output.
|
||||||
|
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 3);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
3); // input, filter, output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||||
|
4); // padding, stride_width, stride_height, activation
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
Conv2dSignedPerChannelMapsToUnsignedOnSdk29) {
|
||||||
|
PerChannelQuantizedConvolutionOpModel m(
|
||||||
|
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||||
|
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||||
|
{TensorType_INT8,
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{2, 2, 2, 2},
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
/*per_channel_quantization=*/true,
|
||||||
|
/*per_channel_quantization_scales=*/{1, 2},
|
||||||
|
/*per_channel_quantization_offsets=*/{0, 0},
|
||||||
|
/*channel_index=*/0},
|
||||||
|
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||||
|
/*stride_width=*/1, /*stride_height=*/1);
|
||||||
|
m.SetInput({
|
||||||
|
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||||
|
3, 2, // batch = 0, y = 0, x = 0
|
||||||
|
1, -1, // batch = 0, y = 0, x = 1
|
||||||
|
-2, -3, // batch = 0, y = 0, x = 2
|
||||||
|
4, 3, // batch = 0, y = 1, x = 0
|
||||||
|
2, -2, // batch = 0, y = 1, x = 1
|
||||||
|
-3, -4, // batch = 0, y = 1, x = 2
|
||||||
|
});
|
||||||
|
m.SetFilter(
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{
|
||||||
|
1, 2, // out channel = 0, y = 0, x = 0
|
||||||
|
3, 4, // out channel = 0, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 0, y = 1, x = 0
|
||||||
|
5, 6, // out channel = 0, y = 1, x = 1
|
||||||
|
7, 8, // out channel = 1, y = 0, x = 0
|
||||||
|
5, 6, // out channel = 1, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 1, y = 1, x = 0
|
||||||
|
1, 2, // out channel = 1, y = 1, x = 1
|
||||||
|
});
|
||||||
|
m.SetBias({3, -2});
|
||||||
|
|
||||||
|
// Invoke and verify output.
|
||||||
|
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 4);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
2); // input, output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||||
|
1); // filter
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||||
|
4); // padding, stride_width, stride_height, activation
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
PerChannelQuantizedConvolutionOpModel m(
|
||||||
|
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
|
||||||
|
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
|
||||||
|
{TensorType_INT8,
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{2, 2, 2, 2},
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
/*per_channel_quantization=*/true,
|
||||||
|
/*per_channel_quantization_scales=*/{1, 2},
|
||||||
|
/*per_channel_quantization_offsets=*/{0, 0},
|
||||||
|
/*channel_index=*/0},
|
||||||
|
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
|
||||||
|
/*stride_width=*/1, /*stride_height=*/1);
|
||||||
|
m.SetInput({
|
||||||
|
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
|
||||||
|
3, 2, // batch = 0, y = 0, x = 0
|
||||||
|
1, -1, // batch = 0, y = 0, x = 1
|
||||||
|
-2, -3, // batch = 0, y = 0, x = 2
|
||||||
|
4, 3, // batch = 0, y = 1, x = 0
|
||||||
|
2, -2, // batch = 0, y = 1, x = 1
|
||||||
|
-3, -4, // batch = 0, y = 1, x = 2
|
||||||
|
});
|
||||||
|
m.SetFilter(
|
||||||
|
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
|
||||||
|
{
|
||||||
|
1, 2, // out channel = 0, y = 0, x = 0
|
||||||
|
3, 4, // out channel = 0, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 0, y = 1, x = 0
|
||||||
|
5, 6, // out channel = 0, y = 1, x = 1
|
||||||
|
7, 8, // out channel = 1, y = 0, x = 0
|
||||||
|
5, 6, // out channel = 1, y = 0, x = 1
|
||||||
|
3, 4, // out channel = 1, y = 1, x = 0
|
||||||
|
1, 2, // out channel = 1, y = 1, x = 1
|
||||||
|
});
|
||||||
|
m.SetBias({3, -2});
|
||||||
|
|
||||||
|
// Invoke and verify output.
|
||||||
|
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 4);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
2); // input, output
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
|
||||||
|
1); // filter
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
|
||||||
|
4); // padding, stride_width, stride_height, activation
|
||||||
|
}
|
||||||
|
|
||||||
|
class QuantizeOpModel : public SingleOpModelWithNNAPI {
|
||||||
|
public:
|
||||||
|
QuantizeOpModel(const NnApi* nnapi, const TensorData& input,
|
||||||
|
const TensorData& output) {
|
||||||
|
SingleOpModelWithNNAPI::Init(nnapi);
|
||||||
|
input_ = AddInput(input);
|
||||||
|
output_ = AddOutput(output);
|
||||||
|
SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions,
|
||||||
|
CreateQuantizeOptions(builder_).Union());
|
||||||
|
|
||||||
|
BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
|
||||||
|
/* allow_fp32_relax_to_fp16 */ false,
|
||||||
|
/*apply_delegate=*/false);
|
||||||
|
compilation_status_ = ApplyDelegate();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInput(std::initializer_list<float> data) {
|
||||||
|
PopulateTensor(input_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void SetInputAndQuantize(std::initializer_list<float> data) {
|
||||||
|
QuantizeAndPopulate<T>(input_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::vector<T> GetOutput() {
|
||||||
|
return ExtractVector<T>(output_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int input_;
|
||||||
|
int output_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) {
|
||||||
|
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||||
|
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
|
||||||
|
{TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
|
||||||
|
|
||||||
|
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||||
|
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
|
||||||
|
{TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
|
||||||
|
|
||||||
|
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quantize with Int8 output is only supported since SDK level 30.
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
// [-63.5, 64] -> scale=0.5 zero_point=1 for INT8
|
||||||
|
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
|
||||||
|
{TensorType_INT8, {2, 5}, 0, 0, 0.5, -1});
|
||||||
|
|
||||||
|
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
class DequantizeOpModel : public SingleOpModelWithNNAPI {
|
||||||
|
public:
|
||||||
|
DequantizeOpModel(const NnApi* nnapi, TensorType type,
|
||||||
|
std::initializer_list<int> shape, float scale,
|
||||||
|
int32_t zero_point, int version) {
|
||||||
|
SingleOpModelWithNNAPI::Init(nnapi);
|
||||||
|
const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
|
||||||
|
input_ = AddInput(input_tensor_data);
|
||||||
|
output_ = AddOutput({TensorType_FLOAT32, shape});
|
||||||
|
SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
|
||||||
|
CreateDequantizeOptions(builder_).Union());
|
||||||
|
|
||||||
|
resolver_ = absl::make_unique<SingleOpResolver>(
|
||||||
|
BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(),
|
||||||
|
version);
|
||||||
|
|
||||||
|
BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
|
||||||
|
/* allow_fp32_relax_to_fp16 */ false,
|
||||||
|
/*apply_delegate=*/false);
|
||||||
|
compilation_status_ = ApplyDelegate();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void SetInput(std::initializer_list<T> data) {
|
||||||
|
PopulateTensor(input_, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int input_;
|
||||||
|
int output_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) {
|
||||||
|
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||||
|
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
|
||||||
|
127, 1);
|
||||||
|
|
||||||
|
m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
|
||||||
|
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
|
||||||
|
127, 1);
|
||||||
|
|
||||||
|
m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dequantize with Int8 input is only supported for symmetric quantization on
|
||||||
|
// SDK level 29
|
||||||
|
TEST_F(NnApiSignedQuantizationTest,
|
||||||
|
DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) {
|
||||||
|
// [-63.5, 64] -> scale=0.5, zero_point=0 for INT8
|
||||||
|
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0,
|
||||||
|
2);
|
||||||
|
|
||||||
|
m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dequantize with Int8 input is only supported since SDK level 30.
|
||||||
|
TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) {
|
||||||
|
nnapi_mock_->SetAndroidSdkVersion(30);
|
||||||
|
// [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
|
||||||
|
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1,
|
||||||
|
2);
|
||||||
|
|
||||||
|
m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
|
||||||
|
m.Invoke();
|
||||||
|
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
|
||||||
|
|
||||||
|
ASSERT_EQ(tensors_count->size(), 2);
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
tensors_count->end());
|
||||||
|
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
tensors_count->end());
|
||||||
|
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
|
||||||
|
1); // input
|
||||||
|
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
|
||||||
|
1); // output
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace tflite
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
::tflite::LogToStderr();
|
||||||
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
@ -46,6 +46,7 @@ enum {
|
|||||||
ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
|
ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
|
||||||
ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
|
ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
|
||||||
ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
|
ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
|
||||||
|
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -118,6 +118,11 @@ class NnApiHandler {
|
|||||||
const ANeuralNetworksOperandType* type) { return Value; };
|
const ANeuralNetworksOperandType* type) { return Value; };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StubAddOperandWith(int(stub)(ANeuralNetworksModel* model,
|
||||||
|
const ANeuralNetworksOperandType* type)) {
|
||||||
|
nnapi_->ANeuralNetworksModel_addOperand = stub;
|
||||||
|
}
|
||||||
|
|
||||||
template <int Value>
|
template <int Value>
|
||||||
void SetOperandValueReturns() {
|
void SetOperandValueReturns() {
|
||||||
nnapi_->ANeuralNetworksModel_setOperandValue =
|
nnapi_->ANeuralNetworksModel_setOperandValue =
|
||||||
@ -268,6 +273,23 @@ class NnApiHandler {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int Value>
|
||||||
|
void SetPriorityReturns() {
|
||||||
|
nnapi_->ANeuralNetworksCompilation_setPriority =
|
||||||
|
[](ANeuralNetworksCompilation* compilation, int priority) -> int {
|
||||||
|
return Value;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int Value>
|
||||||
|
void SetOperandSymmPerChannelQuantParamsReturns() {
|
||||||
|
nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams =
|
||||||
|
[](ANeuralNetworksModel* model, int32_t index,
|
||||||
|
const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) {
|
||||||
|
return Value;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sets the SDK Version in the nnapi structure.
|
* Sets the SDK Version in the nnapi structure.
|
||||||
* If set_unsupported_ops_to_null is set to true, all the functions not
|
* If set_unsupported_ops_to_null is set to true, all the functions not
|
||||||
|
Loading…
Reference in New Issue
Block a user