Add support for TENSOR_QUANT8_ASYMM_SIGNED in NNAPI delegate

PiperOrigin-RevId: 317846923
Change-Id: I1c61f53e89228cd2482435e9255e390864bd83e3
This commit is contained in:
Lev Proleev 2020-06-23 05:29:56 -07:00 committed by TensorFlower Gardener
parent 7198070f4d
commit e071e66f03
8 changed files with 1154 additions and 80 deletions

View File

@ -190,6 +190,32 @@ cc_test(
],
)
cc_test(
name = "nnapi_delegate_signed_quantization_test",
size = "small",
srcs = [
"nnapi_delegate_signed_quantization_test.cc",
],
tags = [
"no_mac",
"no_windows",
"tflite_not_portable_ios",
],
deps = [
":nnapi_delegate",
":nnapi_delegate_mock_test",
"//tensorflow/lite:framework",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite/c:common",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/kernels:test_util",
"//tensorflow/lite/nnapi:nnapi_implementation",
"//tensorflow/lite/nnapi:nnapi_lib",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "quant_lstm_sup_test",
size = "small",

View File

@ -60,6 +60,10 @@ FloatActivationsOpTest/Elu,30
FloatActivationsOpTest/HardSwish
QuantizedActivationsOpTest/HardSwish
QuantizedActivationsOpTest/HardSwishBias
QuantizedActivationsOpTest/Relu*
QuantizedActivationsOpTest/PRelu,29
QuantizedActivationsOpTest/PReluSameShapes,29
QuantizedActivationsOpTest/PReluInt8.+,30
# add_test
FloatAddOpModel/.+
@ -145,6 +149,7 @@ ConvolutionOpTest/ConvolutionOpTest/.+/\d+
# dequantize_test
DequantizeOpTest/Uint8
DequantizeOpTest/Int8,30
# depth_to_space_test
DepthToSpaceOpModel/Float32
@ -190,6 +195,7 @@ QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Uin
QuantizedFullyConnectedOpTest/SimpleTestQuantizedOutputMultiplierGreaterThan1Int8/\d+,29
HybridFullyConnectedOpTest/SimpleTestQuantizedUint8,29
HybridFullyConnectedOpTest/SimpleTestQuantizedInt8,29
HybridAsymmetricInputFullyConnectedOpTest.SimpleTestQuantizedUint8,29
FloatFullyConnectedOpTest/FloatFullyConnectedOpTest/SimpleTest4DInput/\d+
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedUint8/\d+
QuantizedFullyConnectedOpTest/QuantizedFullyConnectedOpTest/SimpleTest4dInputQuantizedOutputMultiplierGreaterThan1Uint8/\d+,29
@ -207,6 +213,7 @@ FloatGatherOpTest/LastAxis,29
TypesGatherOpTest/Float32Int32,29
TypesGatherOpTest/Int32Int32,29
TypesGatherOpTest/Uint8Int32,29
TypesGatherOpTest/Int8Int32,29
# hashtable_lookup_test
# All test excepted the string one should be accelerated
@ -286,13 +293,18 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29
# quantize_test
QuantizeOpTest/UINT8,29
QuantizeOpTest/INT8,30
# rank
# reduce_test
-Dynamic.+(Mean|Sum|Prod|Max|Min)OpTest/.+
-ConstUint8(Mean|Sum)OpTest/.+
-ConstInt8MeanOpTest.NonSpecialAxisNonSameScale
-ConstInt8MeanOpTest.QuantizedDifferentScale
ConstUint8(Max|Min)OpTest/.+,29
ConstUint8(Mean)OpTest/.+
Constint8(Mean|Max|Min)OpTest/.+
ConstInt8(Mean|Max|Min)OpTest/.+,29
ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29
ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29
ConstFloat(Mean|Any)OpTest/NotKeepDims

View File

@ -201,6 +201,7 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
case kTfLiteBuiltinConcatenation:
case kTfLiteBuiltinEqual:
case kTfLiteBuiltinExpandDims:
case kTfLiteBuiltinGather:
case kTfLiteBuiltinGreater:
case kTfLiteBuiltinGreaterEqual:
case kTfLiteBuiltinHardSwish:
@ -377,6 +378,7 @@ bool HasZeroes(TfLiteIntArrayView array) {
enum {
NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
};
// Returns the SDK level to target when delegating to the given devices.
@ -1065,6 +1067,8 @@ class NNAPIOpBuilder {
tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
const bool need_int8_conversion =
tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
const bool use_int8_asymm_signed =
tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
if (ann_tensor_index != -1) {
indices->push_back(ann_tensor_index);
@ -1095,12 +1099,25 @@ class NNAPIOpBuilder {
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
break;
case kTfLiteUInt8:
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
scale = tensor->params.scale;
zeroPoint = tensor->params.zero_point;
if (scale == 0) {
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
// NNAPI.
scale = 1;
}
break;
case kTfLiteInt8:
// If explicit int8 conversion is needed, we still need
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
nn_type = (tensor_type == kTfLiteUInt8 || need_int8_conversion)
? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
: ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
if (use_int8_asymm_signed) {
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
} else if (need_int8_conversion) {
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
} else {
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
}
scale = tensor->params.scale;
zeroPoint = tensor->params.zero_point;
if (tensor->quantization.type == kTfLiteAffineQuantization) {
@ -1130,8 +1147,7 @@ class NNAPIOpBuilder {
operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
}
if (scale == 0) {
// TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
// with zero scale are not valid in NNAPI.
// QUANT8 tensors with zero scale are not valid in NNAPI.
scale = 1;
}
}
@ -1248,7 +1264,6 @@ class NNAPIOpBuilder {
"setting new operand value", nnapi_errno_);
}
}
indices->push_back(ann_tensor_index);
return kTfLiteOk;
}
@ -1437,7 +1452,6 @@ bool NNAPIDelegateKernel::Validate(
bool is_accelerator_specified,
std::vector<NNAPIValidationFailure>* map_failures) {
OpValidationContext val_ctx{true, map_failures};
switch (builtin_code) {
case kTfLiteBuiltinAdd: {
ExpectMaxOpVersion(version, 2, &val_ctx);
@ -1789,18 +1803,21 @@ bool NNAPIDelegateKernel::Validate(
"Supported op versions are 1 and 2 only", &val_ctx);
const auto& input = context->tensors[node->inputs->data[0]];
Expect(input.type != kTfLiteFloat16,
NNAPIValidationFailureType::kUnsupportedInputType,
"kTfLiteFloat16 not supported as input", &val_ctx);
if (android_sdk_version < kMinSdkVersionForNNAPI12) {
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
} else {
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
const auto zero_point = input.params.zero_point;
Expect(input.type != kTfLiteInt8 ||
(zero_point == 0 &&
android_sdk_version >= kMinSdkVersionForNNAPI12),
NNAPIValidationFailureType::kUnsupportedInputType,
"NN API supports int8 type since version 1.2 but only for "
"symmetric quantization.",
&val_ctx);
if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
input.type == kTfLiteInt8) {
const auto zero_point = input.params.zero_point;
Expect(zero_point == 0,
NNAPIValidationFailureType::kUnsupportedInputType,
"NN API supports int8 type since version 1.2 but only for "
"symmetric quantization.",
&val_ctx);
}
}
} break;
case kTfLiteBuiltinFloor: {
ExpectOpVersion(version, 1, &val_ctx);
@ -2150,21 +2167,38 @@ bool NNAPIDelegateKernel::Validate(
&val_ctx);
const TfLiteType input_type =
context->tensors[node->inputs->data[0]].type;
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
kTfLiteUInt8);
const TfLiteType output_type =
context->tensors[node->outputs->data[0]].type;
ExpectTypeIn(output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
NNAPIValidationFailureType::kUnsupportedOutputType,
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
"kTfLiteUInt8.",
&val_ctx);
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
kTfLiteUInt8, kTfLiteInt8);
ExpectTypeIn(
output_type,
{kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
NNAPIValidationFailureType::kUnsupportedOutputType,
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
"kTfLiteUInt8, kTfLiteInt8.",
&val_ctx);
} else {
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
kTfLiteUInt8);
ExpectTypeIn(
output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
NNAPIValidationFailureType::kUnsupportedOutputType,
"Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
"kTfLiteUInt8.",
&val_ctx);
}
} break;
case kTfLiteBuiltinPrelu: {
ExpectOpVersion(version, 1, &val_ctx);
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
&val_ctx);
ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
const auto input_type = context->tensors[node->inputs->data[0]].type;
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
kTfLiteInt8);
} break;
case kTfLiteBuiltinTile: {
ExpectOpVersion(version, 1, &val_ctx);
@ -2240,19 +2274,18 @@ bool NNAPIDelegateKernel::Validate(
&val_ctx);
} break;
case kTfLiteBuiltinGather: {
ExpectOpVersion(version, 1, &val_ctx);
ExpectOpVersion(version, 2, &val_ctx);
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
&val_ctx);
const auto input_type = context->tensors[node->inputs->data[0]].type;
const auto& positions = context->tensors[node->inputs->data[1]];
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
kTfLiteInt32, kTfLiteUInt8);
ExpectTypeIn(positions.type,
{kTfLiteFloat32, kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8},
NNAPIValidationFailureType::kUnsupportedInputType,
"Positions type should be one of kTfLiteFloat32, "
"kTfLiteFloat16, kTfLiteInt32, kTfLiteUInt8",
&val_ctx);
kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
Expect(positions.type == kTfLiteInt32,
NNAPIValidationFailureType::kUnsupportedInputType,
"Positions type should be one of kTfLiteInt32", &val_ctx);
Expect(positions.dims->size != 0,
NNAPIValidationFailureType::kUnsupportedOperandRank,
"0-dimension args are not supported by NNAPI.", &val_ctx);
@ -2283,8 +2316,13 @@ bool NNAPIDelegateKernel::Validate(
&val_ctx);
// Tensor indices: split_dim: 0, value: 1
const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
kTfLiteInt32);
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
kTfLiteInt8, kTfLiteInt32);
} else {
EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
kTfLiteInt32);
}
const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
NNAPIValidationFailureType::kUnsupportedInputType,
@ -2308,30 +2346,41 @@ bool NNAPIDelegateKernel::Validate(
NNAPIValidationFailureType::kUnsupportedInputType,
"Value should be Float32.", &val_ctx);
const auto output_type = context->tensors[node->outputs->data[0]].type;
Expect(output_type == kTfLiteUInt8,
NNAPIValidationFailureType::kUnsupportedOutputType,
"Output should be kTfLiteUInt8.", &val_ctx);
if (android_sdk_version < kMinSdkVersionForNNAPI13) {
Expect(output_type == kTfLiteUInt8,
NNAPIValidationFailureType::kUnsupportedOutputType,
"Output should be kTfLiteUInt8.", &val_ctx);
} else {
ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
NNAPIValidationFailureType::kUnsupportedOutputType,
"Output should be kTfLiteUInt8.", &val_ctx);
}
const auto quantization_params =
context->tensors[node->outputs->data[0]].params;
Expect(quantization_params.scale > 0.f,
NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
"Quantization scale should be > 0.", &val_ctx);
} break;
case kTfLiteBuiltinReduceAny:
case kTfLiteBuiltinReduceMin:
case kTfLiteBuiltinReduceMax: {
ExpectOpVersion(version, 1, &val_ctx);
case kTfLiteBuiltinReduceAny: {
ExpectOpVersion(version, 2, &val_ctx);
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
&val_ctx);
Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
NNAPIValidationFailureType::kUnsupportedOutputType,
"NNAPI does not support generating a scalar as output.", &val_ctx);
if (builtin_code == kTfLiteBuiltinReduceProd) {
const auto input_type = context->tensors[node->inputs->data[0]].type;
Expect(input_type == kTfLiteFloat32,
NNAPIValidationFailureType::kUnsupportedInputType,
"NNAPI only supports floating point REDUCE_PROD.", &val_ctx);
}
} break;
case kTfLiteBuiltinReduceMin:
case kTfLiteBuiltinReduceMax: {
ExpectMaxOpVersion(version, 2, &val_ctx);
ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
&val_ctx);
const auto input_tensor = context->tensors[node->inputs->data[0]];
const auto input_type = input_tensor.type;
EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
kTfLiteInt8);
Expect(input_tensor.dims->size != 0,
NNAPIValidationFailureType::kUnsupportedOutputType,
"NNAPI does not support generating a scalar as output.", &val_ctx);
} break;
case kTfLiteBuiltinDepthToSpace: {
const TfLiteType input_type =
@ -3093,16 +3142,10 @@ TfLiteStatus NNAPIDelegateKernel::Map(
case kTfLiteBuiltinGather: {
auto builtin = reinterpret_cast<TfLiteGatherParams*>(
mapping_args.node->builtin_data);
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[0],
/* hybrid_op */ false,
/* scalar_as_tensor */ false);
mapping_args.builder->AddScalarInt32Operand(builtin->axis);
mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
/* hybrid_op */ false,
/* scalar_as_tensor */ false);
/* tensor_flags */ 0);
*nn_op_type = ANEURALNETWORKS_GATHER;
} break;
case kTfLiteBuiltinBidirectionalSequenceLstm: {
@ -3430,6 +3473,9 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
// absolute indices but NN api indices inputs by relative indices.
int relative_input_index = 0;
const bool use_int8_asymm_signed =
target_sdk_version_ >= kMinSdkVersionForNNAPI13;
size_t input_offset = 0;
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
if (absolute_input_index == kTfLiteOptionalTensor) {
@ -3472,9 +3518,16 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
}
} else if (tensor->type == kTfLiteInt8 &&
ann_type_equivalent == kTfLiteInt32) {
for (int i = 0; i < num_elements; ++i) {
reinterpret_cast<int32_t*>(input_ptr)[i] =
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
if (use_int8_asymm_signed) {
for (int i = 0; i < num_elements; ++i) {
reinterpret_cast<int32_t*>(input_ptr)[i] =
static_cast<const int32_t>(tensor->data.int8[i]);
}
} else {
for (int i = 0; i < num_elements; ++i) {
reinterpret_cast<int32_t*>(input_ptr)[i] =
static_cast<const int32_t>(tensor->data.int8[i]) + 128;
}
}
} else {
context->ReportError(
@ -3685,6 +3738,15 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
&dequantize_mapping, &allocation_memory_mapping_,
&nnapi_to_tflite_op_mapping_, nn_model_.get(),
nnapi_errno);
// If we have target accelerators the target SDK version might be
// different than the current android version.
target_sdk_version_ = nnapi_->android_sdk_version;
if (!nnapi_devices_.empty()) {
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno));
}
// Add Tensors.
for (auto node_index : nodes_) {
// Obtain the op and registration.
@ -3696,11 +3758,18 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
const bool need_int8_conversion =
target_sdk_version_ < kMinSdkVersionForNNAPI13 &&
NeedInt8Conversion(context, reg->builtin_code, node);
const bool use_int8_asymm_signed =
target_sdk_version_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
int input_tensor_flags = 0;
if (scalar_as_tensor) {
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
}
if (use_int8_asymm_signed) {
input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
}
// On SDK level less than 30, h_swish will be lowered into supported NNAPI
// operations. Since SDK level 30, h_swish is supported as a single
@ -3807,8 +3876,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
break;
case kTfLiteInt8:
if (constant_value.allocation_type == kTfLiteMmapRo) {
builder.AddScalarInt32Operand(
static_cast<int32_t>(*constant_value.data.int8) + 128);
if (need_int8_conversion) {
builder.AddScalarInt32Operand(
static_cast<int32_t>(*constant_value.data.int8) + 128);
} else {
builder.AddScalarInt32Operand(*constant_value.data.int8);
}
} else {
builder.AddSingleValueTensorAsScalarOperand(
constant_value_id, ANEURALNETWORKS_INT32);
@ -3836,7 +3909,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
// specifying the output height and width, is not added and
// instead the height and width will be added individually as
// scalars by the mapping function returned by Map().
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
input_tensor_flags));
}
} else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
// The K parameter tensor is not handled here but by the functor
@ -3844,8 +3918,12 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
// the else clause below
continue;
} else if (reg->builtin_code == kTfLiteBuiltinGather) {
// Everything is added during Map since input tensors
// Everything else is added during Map since input tensors
// have different order.
if (input_pos == 0) {
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
input_tensor_flags));
}
continue;
} else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
input_pos == 1) {
@ -3862,7 +3940,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
// the axis, needs to be converted to a scalar since TFLite uses a
// tensor but NNAPI uses a scalar as the axis.
if (input_pos == 0) {
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op));
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
input_tensor_flags));
} else {
const int axis_id = node->inputs->data[1];
const TfLiteTensor& axis_tensor = context->tensors[axis_id];
@ -3908,12 +3987,26 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
operand_tensor.params, &tensor_index));
break;
case kTfLiteInt8:
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
ANEURALNETWORKS_TENSOR_QUANT8_SYMM, operand_tensor.type, {1},
std::vector<int8_t>(1, operand_tensor.data.int8[0]),
operand_tensor.params, &tensor_index));
break;
case kTfLiteInt8: {
auto params = operand_tensor.params;
if (params.scale == 0.0) {
params.scale = 1.0;
}
if (use_int8_asymm_signed) {
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
operand_tensor.type, {1},
std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
&tensor_index));
} else {
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
{1},
std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
params, &tensor_index));
}
} break;
case kTfLiteInt32:
TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
@ -3995,19 +4088,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
}
}
// If we have target accelerators the target SDK version might be
// different than the current android version.
int target_sdk_version = nnapi_->android_sdk_version;
if (!nnapi_devices_.empty()) {
TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno));
}
// Get op type and operands
// Fails if the Validate function failed
int nn_op_type;
TF_LITE_ENSURE_STATUS(
Map(context, reg->builtin_code, reg->version, target_sdk_version,
Map(context, reg->builtin_code, reg->version, target_sdk_version_,
{context, &builder, node, &model_state_outputs_,
&model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
&nn_op_type));
@ -4017,6 +4102,9 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
if (need_int8_conversion) {
output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
}
if (use_int8_asymm_signed) {
output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
}
for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
const auto output_index = node->outputs->data[output_pos];

View File

@ -341,6 +341,9 @@ class NNAPIDelegateKernel {
std::vector<int> nnapi_to_tflite_op_mapping_;
// Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
int target_sdk_version_ = 27; // kMinSdkVersionForNNAPI13
void AddDequantizeOperatorsWhereNeeded(
const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);

View File

@ -71,6 +71,8 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
ExecutionStartComputeReturns<ANEURALNETWORKS_NO_ERROR>();
EventWaitReturns<ANEURALNETWORKS_NO_ERROR>();
SetPriorityReturns<ANEURALNETWORKS_NO_ERROR>();
SetOperandSymmPerChannelQuantParamsReturns<ANEURALNETWORKS_NO_ERROR>();
SetNnapiSupportedDevice("test-device", android_sdk_version);
}

View File

@ -0,0 +1,920 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <gtest/gtest.h>
#include "tensorflow/lite/builtin_ops.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/fully_connected.h"
#include "tensorflow/lite/kernels/test_util.h"
#include "tensorflow/lite/minimal_logging.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
namespace tflite {
namespace ops {
namespace builtin {
TfLiteRegistration* Register_CONVOLUTION_REF();
TfLiteRegistration* Register_DEQUANTIZE();
} // namespace builtin
} // namespace ops
namespace {
class SingleOpModelWithNNAPI : public SingleOpModel {
public:
SingleOpModelWithNNAPI() = default;
void Init(const NnApi* nnapi) {
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
SetDelegate(stateful_delegate_.get());
}
StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
void SetBufferHandle(int index, TfLiteBufferHandle handle) {
interpreter_->SetBufferHandle(index, handle, stateful_delegate_.get());
}
TfLiteStatus GetCompilationStatus() { return compilation_status_; }
protected:
std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
TfLiteStatus compilation_status_;
};
class HybridFullyConnectedOpModel : public SingleOpModelWithNNAPI {
public:
HybridFullyConnectedOpModel(const NnApi* nnapi, int units, int batches,
const TensorData& input,
const TensorData& weights,
const TensorData& output = {TensorType_FLOAT32},
bool asymmetric_inputs = false)
: batches_(batches), units_(units) {
SingleOpModelWithNNAPI::Init(nnapi);
int total_input_size = 1;
for (size_t i = 0; i < input.shape.size(); ++i) {
total_input_size *= input.shape[i];
}
input_size_ = total_input_size / batches_;
input_ = AddInput(input);
weights_ = AddInput(weights);
TensorData bias{TensorType_FLOAT32, {units_}};
bias_ = AddInput(bias);
output_ = AddOutput(output);
auto options = CreateFullyConnectedOptions(
builder_, ActivationFunctionType_RELU,
tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
false, asymmetric_inputs)
.Union();
SetBuiltinOp(BuiltinOperator_FULLY_CONNECTED,
BuiltinOptions_FullyConnectedOptions, options);
resolver_ = absl::make_unique<SingleOpResolver>(
BuiltinOperator_FULLY_CONNECTED,
ops::builtin::Register_FULLY_CONNECTED_PIE());
BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)},
/*num_threads=*/-1,
/* allow_fp32_relax_to_fp16 */ false,
/*apply_delegate=*/false);
compilation_status_ = ApplyDelegate();
}
void SetBias(const std::vector<float>& f) { PopulateTensor(bias_, f); }
void SetWeights(const std::vector<float>& data) {
SymmetricQuantizeAndPopulate(weights_, data);
}
void SetSignedWeights(std::initializer_list<float> f) {
SignedSymmetricQuantizeAndPopulate(weights_, f);
}
void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
int input_size() { return input_size_; }
int num_units() { return units_; }
int num_batches() { return batches_; }
protected:
int input_;
int weights_;
int bias_;
int output_;
int batches_;
int units_;
int input_size_;
};
struct NnApiSignedQuantizationTest
: ::tflite::delegate::nnapi::NnApiDelegateMockTest {
static void SetUpTestSuite() { tensors_count = new std::map<int, int>(); }
void SetUp() override {
::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
nnapi_mock_->StubAddOperandWith(
[](ANeuralNetworksModel* model,
const ANeuralNetworksOperandType* type) -> int {
const auto nn_tensor_type = type->type;
if (tensors_count->find(nn_tensor_type) == tensors_count->end()) {
tensors_count->insert({nn_tensor_type, 0});
}
tensors_count->at(nn_tensor_type)++;
return ANEURALNETWORKS_NO_ERROR;
});
}
void TearDown() override { tensors_count->clear(); }
static void TearDownTestSuite() {
delete tensors_count;
tensors_count = nullptr;
}
static std::map<int, int>* tensors_count;
};
std::map<int, int>* NnApiSignedQuantizationTest::tensors_count = nullptr;
TEST_F(NnApiSignedQuantizationTest,
HybridFullyConnectedMapsToSignedSymmOnSdk29) {
nnapi_mock_->SetAndroidSdkVersion(29);
HybridFullyConnectedOpModel m(
nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
/*input=*/{TensorType_FLOAT32, {2, 10}},
/*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
m.SetSignedWeights({
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
});
m.SetBias({1, 2, 3});
m.SetInput({
1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 3);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
4); // fc_input, fc_weights, fc_bias, fc_output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
1); // dequantize_weights_input
}
TEST_F(NnApiSignedQuantizationTest,
HybridFullyConnectedMapsToSignedSymmOnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
HybridFullyConnectedOpModel m(
nnapi_mock_->GetNnApi(), /*units=*/3, /*batches=*/2,
/*input=*/{TensorType_FLOAT32, {2, 10}},
/*weights=*/{TensorType_INT8, {3, 10}, 0, 0, 10.0 / 127.0, 0});
m.SetSignedWeights({
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 2
});
m.SetBias({1, 2, 3});
m.SetInput({
1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 3);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
4); // fc_input, fc_weights, fc_bias, fc_output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32), 1); // activation
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
1); // dequantize_weights_input
}
template <typename FilterType>
class BaseConvolutionOpModel : public SingleOpModelWithNNAPI {
public:
BaseConvolutionOpModel(
const NnApi* nnapi, TfLiteRegistration* registration,
const TensorData& input, const TensorData& filter,
const TensorData& output, int stride_width = 2, int stride_height = 2,
enum Padding padding = Padding_VALID,
enum ActivationFunctionType activation = ActivationFunctionType_NONE,
int dilation_width_factor = 1, int dilation_height_factor = 1,
std::initializer_list<FilterType> filter_data = {}) {
SingleOpModelWithNNAPI::Init(nnapi);
input_ = AddInput(input);
if (filter_data.size()) {
filter_ = AddConstInput(filter, filter_data);
} else {
filter_ = AddInput(filter);
}
int bias_size = GetShape(filter_)[0];
if (input.type == TensorType_FLOAT32) {
bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
} else {
// This is a quantized version. The scale of 'bias' depends on the scales
// of input and filter. Supposedly this is correctly set during quantized
// training.
if (filter.per_channel_quantization) {
// per channel quantization.
std::vector<float> bias_scale(
filter.per_channel_quantization_scales.size());
std::vector<int64_t> bias_zero_points(
filter.per_channel_quantization_scales.size());
for (size_t i = 0; i < filter.per_channel_quantization_scales.size();
++i) {
bias_scale[i] =
input.scale * filter.per_channel_quantization_scales[i];
bias_zero_points[i] = 0;
}
tflite::TensorType bias_type = TensorType_INT32;
if (input.type == TensorType_INT16) {
// In case of 16-bit, the bias type is set to be int 64.
bias_type = TensorType_INT64;
}
TensorData bias{bias_type,
{bias_size},
/*min=*/0,
/*max=*/0,
/*scale=*/0,
/*zero_point=*/0,
true,
/*per_channel_quantization_scales=*/bias_scale,
/*per_channel_quantization_offsets=*/bias_zero_points,
/*channel_index==*/0};
bias_ = AddInput(bias);
} else {
// per tensor quantization.
auto bias_scale = GetScale(input_) * GetScale(filter_);
TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
bias_ = AddInput(bias);
}
}
output_ = AddOutput(output);
SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
CreateConv2DOptions(
builder_, padding, stride_width, stride_height, activation,
dilation_width_factor, dilation_height_factor)
.Union());
resolver_ = absl::make_unique<SingleOpResolver>(BuiltinOperator_CONV_2D,
registration);
BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)},
/*num_threads=*/-1,
/* allow_fp32_relax_to_fp16 */ false,
/*apply_delegate=*/false);
compilation_status_ = ApplyDelegate();
}
protected:
int input_;
int filter_;
int bias_;
int output_;
};
class QuantizedConvolutionOpModel : public BaseConvolutionOpModel<uint8_t> {
public:
using BaseConvolutionOpModel::BaseConvolutionOpModel;
void SetInput(std::initializer_list<float> data) {
QuantizeAndPopulate<uint8_t>(input_, data);
}
void SetFilter(std::initializer_list<float> data) {
QuantizeAndPopulate<uint8_t>(filter_, data);
}
void SetBias(std::initializer_list<float> data) {
QuantizeAndPopulate<int32_t>(bias_, data);
}
std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
std::vector<float> GetDequantizedOutput() {
return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
GetScale(output_), GetZeroPoint(output_));
}
};
TEST_F(NnApiSignedQuantizationTest,
Conv2DUnsignedPerTensorMapsToUnsignedOnSdk29) {
QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
ops::builtin::Register_CONVOLUTION_REF(),
{TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
{TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
{TensorType_UINT8, {}, -127, 128});
m.SetInput({
// First batch
1, 1, 1, 1, // row = 1
2, 2, 2, 2, // row = 2
// Second batch
1, 2, 3, 4, // row = 1
1, 2, 3, 4, // row = 2
});
m.SetFilter({
1, 2, 3, 4, // first 2x2 filter
-1, 1, -1, 1, // second 2x2 filter
-1, -1, 1, 1, // third 2x2 filter
});
m.SetBias({1, 2, 3});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 3);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
3); // input, filter, output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
4); // padding, stride_width, stride_height, activation
}
TEST_F(NnApiSignedQuantizationTest,
Conv2dUnsignedPerTensorMapsToUnsignedOnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
QuantizedConvolutionOpModel m(nnapi_mock_->GetNnApi(),
ops::builtin::Register_CONVOLUTION_REF(),
{TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
{TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
{TensorType_UINT8, {}, -127, 128});
m.SetInput({
// First batch
1, 1, 1, 1, // row = 1
2, 2, 2, 2, // row = 2
// Second batch
1, 2, 3, 4, // row = 1
1, 2, 3, 4, // row = 2
});
m.SetFilter({
1, 2, 3, 4, // first 2x2 filter
-1, 1, -1, 1, // second 2x2 filter
-1, -1, 1, 1, // third 2x2 filter
});
m.SetBias({1, 2, 3});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 3);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
3); // input, filter, output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
4); // padding, stride_width, stride_height, activation
}
class PerChannelQuantizedConvolutionOpModel
: public BaseConvolutionOpModel<int8_t> {
public:
using BaseConvolutionOpModel::BaseConvolutionOpModel;
void SetInput(std::initializer_list<float> data) {
QuantizeAndPopulate<int8_t>(input_, data);
}
void SetFilter(std::initializer_list<float> data) {
PerChannelSymmetricQuantizeAndPopulate(filter_, data);
}
void SetBias(std::initializer_list<float> data) {
PerChannelQuantizeBias(bias_, data);
}
std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
std::vector<float> GetDequantizedOutput() {
return Dequantize<int8_t>(ExtractVector<int8_t>(output_), GetScale(output_),
GetZeroPoint(output_));
}
};
TEST_F(NnApiSignedQuantizationTest,
Conv2dSignedPerTensorMapsToUnsignedOnSdk29) {
nnapi_mock_->SetAndroidSdkVersion(29);
PerChannelQuantizedConvolutionOpModel m(
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
{TensorType_INT8,
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{2, 2, 2, 2},
0,
0,
0,
0,
/*per_channel_quantization=*/true,
/*per_channel_quantization_scales=*/{1},
/*per_channel_quantization_offsets=*/{0},
/*channel_index=*/0},
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
/*stride_width=*/1, /*stride_height=*/1);
m.SetInput({
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
3, 2, // batch = 0, y = 0, x = 0
1, -1, // batch = 0, y = 0, x = 1
-2, -3, // batch = 0, y = 0, x = 2
4, 3, // batch = 0, y = 1, x = 0
2, -2, // batch = 0, y = 1, x = 1
-3, -4, // batch = 0, y = 1, x = 2
});
m.SetFilter(
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{
1, 2, // out channel = 0, y = 0, x = 0
3, 4, // out channel = 0, y = 0, x = 1
3, 4, // out channel = 0, y = 1, x = 0
5, 6, // out channel = 0, y = 1, x = 1
7, 8, // out channel = 1, y = 0, x = 0
5, 6, // out channel = 1, y = 0, x = 1
3, 4, // out channel = 1, y = 1, x = 0
1, 2, // out channel = 1, y = 1, x = 1
});
m.SetBias({3, -2});
// Invoke and verify output.
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 3);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
3); // input, filter, output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
4); // padding, stride_width, stride_height, activation
}
TEST_F(NnApiSignedQuantizationTest,
Conv2dSignedPerTensorMapsToUnsignedOnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
PerChannelQuantizedConvolutionOpModel m(
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
{TensorType_INT8,
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{2, 2, 2, 2},
0,
0,
0,
0,
/*per_channel_quantization=*/true,
/*per_channel_quantization_scales=*/{1},
/*per_channel_quantization_offsets=*/{0},
/*channel_index=*/0},
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
/*stride_width=*/1, /*stride_height=*/1);
m.SetInput({
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
3, 2, // batch = 0, y = 0, x = 0
1, -1, // batch = 0, y = 0, x = 1
-2, -3, // batch = 0, y = 0, x = 2
4, 3, // batch = 0, y = 1, x = 0
2, -2, // batch = 0, y = 1, x = 1
-3, -4, // batch = 0, y = 1, x = 2
});
m.SetFilter(
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{
1, 2, // out channel = 0, y = 0, x = 0
3, 4, // out channel = 0, y = 0, x = 1
3, 4, // out channel = 0, y = 1, x = 0
5, 6, // out channel = 0, y = 1, x = 1
7, 8, // out channel = 1, y = 0, x = 0
5, 6, // out channel = 1, y = 0, x = 1
3, 4, // out channel = 1, y = 1, x = 0
1, 2, // out channel = 1, y = 1, x = 1
});
m.SetBias({3, -2});
// Invoke and verify output.
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 3);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
3); // input, filter, output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
4); // padding, stride_width, stride_height, activation
}
TEST_F(NnApiSignedQuantizationTest,
Conv2dSignedPerChannelMapsToUnsignedOnSdk29) {
PerChannelQuantizedConvolutionOpModel m(
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
{TensorType_INT8,
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{2, 2, 2, 2},
0,
0,
0,
0,
/*per_channel_quantization=*/true,
/*per_channel_quantization_scales=*/{1, 2},
/*per_channel_quantization_offsets=*/{0, 0},
/*channel_index=*/0},
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
/*stride_width=*/1, /*stride_height=*/1);
m.SetInput({
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
3, 2, // batch = 0, y = 0, x = 0
1, -1, // batch = 0, y = 0, x = 1
-2, -3, // batch = 0, y = 0, x = 2
4, 3, // batch = 0, y = 1, x = 0
2, -2, // batch = 0, y = 1, x = 1
-3, -4, // batch = 0, y = 1, x = 2
});
m.SetFilter(
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{
1, 2, // out channel = 0, y = 0, x = 0
3, 4, // out channel = 0, y = 0, x = 1
3, 4, // out channel = 0, y = 1, x = 0
5, 6, // out channel = 0, y = 1, x = 1
7, 8, // out channel = 1, y = 0, x = 0
5, 6, // out channel = 1, y = 0, x = 1
3, 4, // out channel = 1, y = 1, x = 0
1, 2, // out channel = 1, y = 1, x = 1
});
m.SetBias({3, -2});
// Invoke and verify output.
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 4);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
2); // input, output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
1); // filter
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
4); // padding, stride_width, stride_height, activation
}
TEST_F(NnApiSignedQuantizationTest, Conv2dSignedPerChannelMapsToSignedOnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
PerChannelQuantizedConvolutionOpModel m(
nnapi_mock_->GetNnApi(), ops::builtin::Register_CONVOLUTION_REF(),
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
{TensorType_INT8,
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{2, 2, 2, 2},
0,
0,
0,
0,
/*per_channel_quantization=*/true,
/*per_channel_quantization_scales=*/{1, 2},
/*per_channel_quantization_offsets=*/{0, 0},
/*channel_index=*/0},
{TensorType_INT8, {}, -63.5, 64, 0.5, -1},
/*stride_width=*/1, /*stride_height=*/1);
m.SetInput({
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
3, 2, // batch = 0, y = 0, x = 0
1, -1, // batch = 0, y = 0, x = 1
-2, -3, // batch = 0, y = 0, x = 2
4, 3, // batch = 0, y = 1, x = 0
2, -2, // batch = 0, y = 1, x = 1
-3, -4, // batch = 0, y = 1, x = 2
});
m.SetFilter(
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{
1, 2, // out channel = 0, y = 0, x = 0
3, 4, // out channel = 0, y = 0, x = 1
3, 4, // out channel = 0, y = 1, x = 0
5, 6, // out channel = 0, y = 1, x = 1
7, 8, // out channel = 1, y = 0, x = 0
5, 6, // out channel = 1, y = 0, x = 1
3, 4, // out channel = 1, y = 1, x = 0
1, 2, // out channel = 1, y = 1, x = 1
});
m.SetBias({3, -2});
// Invoke and verify output.
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 4);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_INT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_INT32), tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
2); // input, output
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL),
1); // filter
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_INT32), 1); // bias
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_INT32),
4); // padding, stride_width, stride_height, activation
}
class QuantizeOpModel : public SingleOpModelWithNNAPI {
public:
QuantizeOpModel(const NnApi* nnapi, const TensorData& input,
const TensorData& output) {
SingleOpModelWithNNAPI::Init(nnapi);
input_ = AddInput(input);
output_ = AddOutput(output);
SetBuiltinOp(BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions,
CreateQuantizeOptions(builder_).Union());
BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
/* allow_fp32_relax_to_fp16 */ false,
/*apply_delegate=*/false);
compilation_status_ = ApplyDelegate();
}
void SetInput(std::initializer_list<float> data) {
PopulateTensor(input_, data);
}
template <typename T>
void SetInputAndQuantize(std::initializer_list<float> data) {
QuantizeAndPopulate<T>(input_, data);
}
template <typename T>
std::vector<T> GetOutput() {
return ExtractVector<T>(output_);
}
private:
int input_;
int output_;
};
TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk29) {
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
{TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
1); // output
}
TEST_F(NnApiSignedQuantizationTest, QuantizeUint8MapsToUint8OnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
{TensorType_UINT8, {2, 5}, 0, 0, 0.5, 127});
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
1); // output
}
// Quantize with Int8 output is only supported since SDK level 30.
TEST_F(NnApiSignedQuantizationTest, QuantizeInt8MapsToInt8OnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
// [-63.5, 64] -> scale=0.5 zero_point=1 for INT8
QuantizeOpModel m(nnapi_mock_->GetNnApi(), {TensorType_FLOAT32, {2, 5}},
{TensorType_INT8, {2, 5}, 0, 0, 0.5, -1});
m.SetInput({-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
1); // output
}
class DequantizeOpModel : public SingleOpModelWithNNAPI {
public:
DequantizeOpModel(const NnApi* nnapi, TensorType type,
std::initializer_list<int> shape, float scale,
int32_t zero_point, int version) {
SingleOpModelWithNNAPI::Init(nnapi);
const TensorData input_tensor_data = {type, shape, 0, 0, scale, zero_point};
input_ = AddInput(input_tensor_data);
output_ = AddOutput({TensorType_FLOAT32, shape});
SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
CreateDequantizeOptions(builder_).Union());
resolver_ = absl::make_unique<SingleOpResolver>(
BuiltinOperator_DEQUANTIZE, ops::builtin::Register_DEQUANTIZE(),
version);
BuildInterpreter({GetShape(input_)}, /*num_threads=*/-1,
/* allow_fp32_relax_to_fp16 */ false,
/*apply_delegate=*/false);
compilation_status_ = ApplyDelegate();
}
template <typename T>
void SetInput(std::initializer_list<T> data) {
PopulateTensor(input_, data);
}
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
private:
int input_;
int output_;
};
TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk29) {
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
127, 1);
m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // output
}
TEST_F(NnApiSignedQuantizationTest, DequantizeUint8MapsToUint8OnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
// [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_UINT8, {2, 5}, 0.5,
127, 1);
m.SetInput<uint8_t>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // output
}
// Dequantize with Int8 input is only supported for symmetric quantization on
// SDK level 29
TEST_F(NnApiSignedQuantizationTest,
DequantizeTestInt8SymmMapsToInt8SymmOnSdk29) {
// [-63.5, 64] -> scale=0.5, zero_point=0 for INT8
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, 0,
2);
m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_SYMM),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // output
}
// Dequantize with Int8 input is only supported since SDK level 30.
TEST_F(NnApiSignedQuantizationTest, DequantizeTestInt8MapsToInt8OnSdk30) {
nnapi_mock_->SetAndroidSdkVersion(30);
// [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
DequantizeOpModel m(nnapi_mock_->GetNnApi(), TensorType_INT8, {2, 5}, 0.5, -1,
2);
m.SetInput<int8_t>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
m.Invoke();
EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
ASSERT_EQ(tensors_count->size(), 2);
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
tensors_count->end());
ASSERT_NE(tensors_count->find(ANEURALNETWORKS_TENSOR_FLOAT32),
tensors_count->end());
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED),
1); // input
EXPECT_EQ(tensors_count->at(ANEURALNETWORKS_TENSOR_FLOAT32),
1); // output
}
} // namespace
} // namespace tflite
int main(int argc, char** argv) {
::tflite::LogToStderr();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -46,6 +46,7 @@ enum {
ANEURALNETWORKS_TENSOR_QUANT16_SYMM = 7,
ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL = 11,
ANEURALNETWORKS_TENSOR_QUANT8_SYMM = 13,
ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED = 14,
};
/**

View File

@ -118,6 +118,11 @@ class NnApiHandler {
const ANeuralNetworksOperandType* type) { return Value; };
}
void StubAddOperandWith(int(stub)(ANeuralNetworksModel* model,
const ANeuralNetworksOperandType* type)) {
nnapi_->ANeuralNetworksModel_addOperand = stub;
}
template <int Value>
void SetOperandValueReturns() {
nnapi_->ANeuralNetworksModel_setOperandValue =
@ -268,6 +273,23 @@ class NnApiHandler {
};
}
template <int Value>
void SetPriorityReturns() {
nnapi_->ANeuralNetworksCompilation_setPriority =
[](ANeuralNetworksCompilation* compilation, int priority) -> int {
return Value;
};
}
template <int Value>
void SetOperandSymmPerChannelQuantParamsReturns() {
nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams =
[](ANeuralNetworksModel* model, int32_t index,
const ANeuralNetworksSymmPerChannelQuantParams* channelQuant) {
return Value;
};
}
/*
* Sets the SDK Version in the nnapi structure.
* If set_unsupported_ops_to_null is set to true, all the functions not