Avoid converting tensors with per-channel quantization to UINT8 in NNAPI delegate.

PiperOrigin-RevId: 315349649
Change-Id: I47627d190351e43c09d567936d14ef96992a4a3b
This commit is contained in:
A. Unique TensorFlower 2020-06-08 14:10:26 -07:00 committed by TensorFlower Gardener
parent da37e9874b
commit 04a5c1d9b4
2 changed files with 111 additions and 1 deletions

View File

@ -1180,7 +1180,8 @@ class NNAPIOpBuilder {
"setting new operand per channel quantization params", nnapi_errno_);
}
if (tensor->allocation_type == kTfLiteMmapRo) {
if (IsQuantized(tensor_type) && need_int8_conversion) {
if (IsQuantized(tensor_type) && need_int8_conversion &&
nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
// We need to to add a tensor and convert the weights into uint8.
// Currently this is only needed for fully_connected. The new_tensor is
// needed for lifetime management for the converted weights.

View File

@ -16,6 +16,8 @@ limitations under the License.
#include <sys/mman.h>
#include <initializer_list>
#include <gtest/gtest.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/interpreter.h"
@ -845,6 +847,113 @@ TEST(ConvolutionOpTest, SimpleTestQuantizedWithDilation) {
ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5}));
}
class PerChannelQuantizedConvolutionWithConstantFilterOpModel
: public SingleOpModelWithNNAPI {
public:
PerChannelQuantizedConvolutionWithConstantFilterOpModel(
const TensorData& input, const TensorData& filter,
std::initializer_list<int8_t> filter_data,
std::initializer_list<int32_t> bias_data, const TensorData& output,
int stride_width = 2, int stride_height = 2,
enum Padding padding = Padding_VALID,
enum ActivationFunctionType activation = ActivationFunctionType_NONE,
int dilation_width_factor = 1, int dilation_height_factor = 1)
: input_type_(input.type), filter_type_(filter.type) {
CHECK(filter.per_channel_quantization);
input_ = AddInput(input);
filter_ = AddConstInput(filter, filter_data);
const int bias_size = GetShape(filter_)[0];
const int num_channels = filter.per_channel_quantization_scales.size();
const std::vector<int64_t> bias_offsets(num_channels, 0);
std::vector<float> bias_scales(num_channels);
for (int i = 0; i < num_channels; i++) {
bias_scales[i] = input.scale * filter.per_channel_quantization_scales[i];
}
const TensorData bias{TensorType_INT32,
{bias_size},
/*min=*/0,
/*max=*/0,
/*scale=*/0,
/*zero_point=*/0,
/*per_channel_quantization=*/true,
/*per_channel_quantization_scales=*/bias_scales,
/*per_channel_quantization_offsets=*/bias_offsets,
/*channel_index==*/0};
bias_ = AddConstInput(bias, bias_data);
output_ = AddOutput(output);
SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
CreateConv2DOptions(
builder_, padding, stride_width, stride_height, activation,
dilation_width_factor, dilation_height_factor)
.Union());
BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
}
void SetInput(std::initializer_list<float> data) {
QuantizeAndPopulate<int8_t>(input_, data);
}
std::vector<int8_t> GetOutput() { return ExtractVector<int8_t>(output_); }
protected:
int input_;
int filter_;
int bias_;
int output_;
const TensorType input_type_;
const TensorType filter_type_;
};
TEST(ConvolutionOpTest, SimplePerChannelTest) {
PerChannelQuantizedConvolutionWithConstantFilterOpModel m(
{TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1},
{TensorType_INT8,
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
{2, 2, 2, 2},
/*min=*/0,
/*max=*/0,
/*scale=*/0,
/*zero_point=*/0,
/*per_channel_quantization=*/true,
/*per_channel_quantization_scales=*/{1, 2},
/*per_channel_quantization_offsets=*/{0, 0},
/*channel_index=*/0},
/*filter_data=*/
{
// [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel]
1, 2, // out channel = 0, y = 0, x = 0
3, 4, // out channel = 0, y = 0, x = 1
3, 4, // out channel = 0, y = 1, x = 0
5, 6, // out channel = 0, y = 1, x = 1
4, 4, // out channel = 1, y = 0, x = 0
3, 3, // out channel = 1, y = 0, x = 1
2, 2, // out channel = 1, y = 1, x = 0
1, 1, // out channel = 1, y = 1, x = 1
},
/*bias_data=*/{6, -2}, {TensorType_INT8, {}, -63.5, 64, 0.5, -1},
/*stride_width=*/1, /*stride_height=*/1);
m.SetInput({
// [1 * 2 * 3 * 2] as [batch, y, x, input_channel]
3, 2, // batch = 0, y = 0, x = 0
1, -1, // batch = 0, y = 0, x = 1
-2, -3, // batch = 0, y = 0, x = 2
4, 3, // batch = 0, y = 1, x = 0
2, -2, // batch = 0, y = 1, x = 1
-3, -4, // batch = 0, y = 1, x = 2
});
// Invoke and verify output.
// output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel]
m.Invoke();
EXPECT_THAT(m.GetOutput(),
testing::Pointwise(QuantizedNear(), {61, 127, -115, -93}));
}
class DepthwiseConvolutionOpModel : public SingleOpModelWithNNAPI {
public:
DepthwiseConvolutionOpModel(const TensorData& input, const TensorData& filter,