NNAPI delegate: fix handling of int8 inputs in TransposeConv

PiperOrigin-RevId: 351350783
Change-Id: I7cb5a55dd76b46dc23e856487ee048e50663ac0c
This commit is contained in:
Lev Proleev 2021-01-12 05:27:11 -08:00 committed by TensorFlower Gardener
parent 6fd0691083
commit 915c0b2bbc
3 changed files with 55 additions and 40 deletions

View File

@ -421,9 +421,8 @@ TopKV2OpTest/TopKV2OpTest/.+/0,29
TransposeTest/.+
# transpose_conv_test
-TransposeConvOpTest/TransposeConvOpTest.SimpleTestQuantizedPerChannelSingleChannel/0
-TransposeConvOpTest/TransposeConvOpTest.SimpleTestQuantizedPerChannel16x8/0
-TransposeConvOpTest/TransposeConvOpTest.TestQuantizedPerChannelMultiChannel/0
-TransposeConvOpTest/TransposeConvOpTest.SimpleTestQuantizedPerChannel16x8/.+
-TransposeConvOpTest/TransposeConvOpTest..*Bias.*
# Const tensor only
TransposeConvOpTest/TransposeConvOpTest/.+/0,29

View File

@ -250,6 +250,16 @@ bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
}
return false;
}
case kTfLiteBuiltinTransposeConv: {
// Transpose convolution has a different order of inputs:
// 0: output_shape, 1: filter, 2: input, 3: bias.
const int input_id = 2;
const TfLiteType input_type = context->tensors[input_id].type;
if (input_type == kTfLiteInt8) {
return true;
}
return false;
}
case kTfLiteBuiltinSelect: {
const auto value_type = context->tensors[node->inputs->data[1]].type;
return value_type == kTfLiteInt8;
@ -521,6 +531,7 @@ enum {
NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
};
// Returns the SDK level to target when delegating to the given devices.
@ -1248,6 +1259,8 @@ class NNAPIOpBuilder {
tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
const bool use_int8_asymm_signed =
tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
const bool force_per_channel =
tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
if (ann_tensor_index != -1) {
indices->push_back(ann_tensor_index);
@ -1303,7 +1316,7 @@ class NNAPIOpBuilder {
TfLiteAffineQuantization* quantization_params =
static_cast<TfLiteAffineQuantization*>(
tensor->quantization.params);
if (quantization_params->scale->size > 1) {
if (quantization_params->scale->size > 1 || force_per_channel) {
// Set up per-channel quantization.
ann_perchannel_params = {
.channelDim = static_cast<uint32_t>(
@ -3043,26 +3056,32 @@ TfLiteStatus NNAPIDelegateKernel::Map(
*nn_op_type = ANEURALNETWORKS_SIN;
} break;
case kTfLiteBuiltinTransposeConv: {
const bool hybrid_op = IsHybridOperator(
mapping_args.context, kTfLiteBuiltinTransposeConv, mapping_args.node);
int input_tensor_flags = 0;
const int input_tensor_id =
mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
const int weight_tensor_id =
mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
if (context->tensors[input_tensor_id].type == kTfLiteInt8) {
const auto& weights_tensor = context->tensors[weight_tensor_id];
if ((weights_tensor.type == kTfLiteInt8 ||
weights_tensor.type == kTfLiteUInt8) &&
weights_tensor.quantization.type == kTfLiteAffineQuantization) {
input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
}
}
mapping_args.builder->AddTensorInput(input_tensor_id, hybrid_op,
input_tensor_flags);
mapping_args.builder->AddTensorInput(weight_tensor_id, hybrid_op,
input_tensor_flags);
// Transpose convolution doesn't have hybrid variation.
const bool hybrid_op = false;
if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
mapping_args.builder->AddTensorInput(
input_tensor_id, hybrid_op,
input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
} else {
mapping_args.builder->AddTensorInput(
input_tensor_id, hybrid_op,
input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
}
// Transpose convlution uses per-channel quantization with int8 inputs
// even if the number of channels in quantization parameters is equal to 1
// (as opposed to conv2d, which uses per-tensor quantization in this
// case).
mapping_args.builder->AddTensorInput(
weight_tensor_id, hybrid_op,
input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
// NNAPI requires a bias tensor, so we allocate a new tensor to fill
// it with zeroes. It is deleted with other tensors in the context
@ -4285,6 +4304,11 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
}
// Map inputs to NN API tensor indices.
for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
// Everything is added during Map since input tensors
// have different order.
continue;
}
const auto input_index = node->inputs->data[input_pos];
if (need_int8_conversion &&
(input_pos == 0 ||
@ -4341,11 +4365,6 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
// by the Map() mapping.
continue;
}
if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
// Everything is added during Map since input tensors
// have different order.
continue;
}
// Pad and Padv2 have an optional parameter for a pad value which has
// to be converted to a scalar type in NN API.

View File

@ -397,14 +397,19 @@ class PerChannelQuantizedTransposeConvOpModel
TEST_P(TransposeConvOpTest, SimpleTestQuantizedPerChannelSingleChannel) {
const std::initializer_list<float> filter_data = {1, 2, 3, 4, 5, 6, 7, 8, 9};
const std::initializer_list<int8_t> const_filter_data = {14, 28, 42, 56, 71,
85, 99, 113, 127};
PerChannelQuantizedTransposeConvOpModel model(
GetRegistration(), {1, 4, 4, 1},
{TensorType_INT8, {1, 3, 3, 1}, 0, 0, 0, 0, true, {9.0 / 127}, {0}, 0},
{}, {TensorType_INT8, {1, 4, 4, 1}, 0, 0, 16.0 / 255, -128},
const_filter_data,
{TensorType_INT8, {1, 4, 4, 1}, 0, 0, 16.0 / 255, -128},
{TensorType_INT8, {}, 0, 0, 2, -128}, Padding_SAME, 1, 1, GetTestType(),
/* version */ 2);
model.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
model.SetFilter(filter_data);
if (GetTestType() == TestType::kDynamic) {
model.SetFilter(filter_data);
}
model.Invoke();
EXPECT_THAT(
@ -421,6 +426,9 @@ TEST_P(TransposeConvOpTest, SimpleTestQuantizedPerChannelSingleChannel) {
TEST_P(TransposeConvOpTest, TestQuantizedPerChannelMultiChannel) {
const std::initializer_list<float> filter_data = {
1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
const std::initializer_list<int8_t> const_filter_data = {
7, 22, 37, 52, 67, 82, 97, 112, 127,
14, 28, 42, 56, 71, 85, 99, 113, 127};
PerChannelQuantizedTransposeConvOpModel model(
GetRegistration(), {1, 5, 5, 2},
{TensorType_INT8,
@ -433,11 +441,13 @@ TEST_P(TransposeConvOpTest, TestQuantizedPerChannelMultiChannel) {
{17.0 / 127, 18.0 / 127},
{0, 0},
0},
{}, {TensorType_INT8, {1, 2, 2, 1}, 0, 0, 4.0 / 255, -128},
const_filter_data, {TensorType_INT8, {1, 2, 2, 1}, 0, 0, 4.0 / 255, -128},
{TensorType_INT8, {}, 0, 0, 1, -128}, Padding_VALID, 2, 2, GetTestType(),
/* version */ 2);
model.SetInput({1, 2, 3, 4});
model.SetFilter(filter_data);
if (GetTestType() == TestType::kDynamic) {
model.SetFilter(filter_data);
}
model.Invoke();
EXPECT_THAT(
@ -645,10 +655,6 @@ class TransposeConvOpBiasModel : public BaseTransposeConvBiasOpModel<float> {
// model.layers[1].set_weights([filter_data, bias_data])
// output = model.predict(input_data)
TEST_P(TransposeConvOpTest, MultiChannelBiasTest) {
// TODO(b/138722124): Enable these tests on NNAPI.
if (SingleOpModel::GetForceUseNnapi()) {
return;
}
TransposeConvOpBiasModel model(
GetRegistration(), /*output_shape=*/{1, 5, 5, 2},
/*filter=*/{TensorType_FLOAT32, {2, 3, 3, 1}},
@ -682,10 +688,6 @@ class QuantizedTransposeConvBiasOpModel
};
TEST_P(TransposeConvOpTest, SimpleBiasTestQuantized) {
// TODO(b/138722124): Enable these tests on NNAPI.
if (SingleOpModel::GetForceUseNnapi()) {
return;
}
// Float would be {1, 2, 3, 4, 5, 6, 7, 8, 9}
std::initializer_list<uint8_t> filter_data = {129, 131, 133, 135, 137,
139, 141, 143, 145};
@ -729,11 +731,6 @@ class PerChannelQuantizedTransposeConvBiasOpModel
};
TEST_P(TransposeConvOpTest, SimpleBiasTestQuantizedPerChannelSingleChannel) {
// TODO(b/138722124): Enable these tests on NNAPI.
if (SingleOpModel::GetForceUseNnapi()) {
return;
}
const std::initializer_list<float> filter_data = {1, 2, 3, 4, 5, 6, 7, 8, 9};
PerChannelQuantizedTransposeConvBiasOpModel model(
GetRegistration(), {1, 4, 4, 1},