Add quantizer for SVDF.

- state is quantized to 16 bits.
- time weight is also quantized to 16bits but the target range is [-512, 512] instead of [-32767, 32767] because there is a "reduce" operation right after element-wise mul between state and time weight. The "reduce" can be as high as 30 so we reserve 6 bits (64) for the summation, leaving only 10 bits for time weight.

PiperOrigin-RevId: 286920154
Change-Id: I45104b69452a1033db230d1db6bd280fa289b733
This commit is contained in:
Jian Li 2019-12-23 11:20:32 -08:00 committed by TensorFlower Gardener
parent 741da13f8b
commit c2670013c8
8 changed files with 112 additions and 2 deletions

View File

@ -247,6 +247,8 @@ tf_cc_test(
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin",
"//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin",
"//tensorflow/lite/tools/optimize:testdata/split.bin",
"//tensorflow/lite/tools/optimize:testdata/svdf_calibrated.bin",
"//tensorflow/lite/tools/optimize:testdata/svdf_quantized.bin",
"//tensorflow/lite/tools/optimize:testdata/unpack.bin",
],
tags = [

View File

@ -871,6 +871,29 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
property.version = 2;
break;
}
case BuiltinOperator_SVDF: {
TensorProperty tensor_property_time;
// Only 10bits are needed because 6bits are reserved for the reduce
// operation after elemement-wise multiplication between state and time
// weights.
tensor_property_time.number_of_bits = 10;
TensorProperty tensor_property_bias;
tensor_property_bias.use_derived_scale = true;
tensor_property_bias.number_of_bits = 32;
tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
TensorProperty tensor_property_state;
tensor_property_state.number_of_bits = 16;
tensor_property_state.state_tensor = true;
property.inputs = {{0, {}},
{1, {}},
{2, tensor_property_time},
{4, tensor_property_state},
{3, tensor_property_bias}};
property.outputs = {{0, {}}};
property.version = 2;
break;
}
case BuiltinOperator_TRANSPOSE:
property.inputs = {{0, {}}};
property.outputs = {{0, {}}};

View File

@ -479,8 +479,26 @@ TfLiteStatus QuantizeOpInput(
return utils::SymmetricPerLayerBiasQuantize(model, tensor, scale,
error_reporter);
} else if (tensor_property.number_of_bits == 10) {
// When the number of bits is 10 (instead of 16), quantize the tensor to
// [-512, 512], instead of [-32767, 32767].
TensorT* tensor = subgraph->tensors[tensor_idx].get();
int total_size = 1;
for (int i = 0; i < tensor->shape.size(); ++i) {
total_size *= tensor->shape[i];
}
BufferT* buffer = model->buffers[tensor->buffer].get();
float* buffer_data = reinterpret_cast<float*>(buffer->data.data());
auto minmax =
std::minmax_element(buffer_data, buffer_data + total_size);
const float range =
std::max(std::abs(*minmax.first), std::abs(*minmax.second));
const float quantized_range = 512.0;
const float scale = range / quantized_range;
return utils::SymmetricQuantizeFloatsToInt16(model, tensor, scale,
error_reporter);
} else {
// Only 8, 16, 32 are supported.
// Only 8, 16, 32, 10 are supported.
// TODO(jianlijianli): extend this to support arbitrary bits.
error_reporter->Report(
"Unable to quantize buffer or min/max value for input %d "
@ -499,14 +517,15 @@ TfLiteStatus QuantizeOpInput(
utils::QuantizeActivation(tensor);
} else if (tensor_property.number_of_bits == 16) {
TensorT* tensor = subgraph->tensors[tensor_idx].get();
float quantized_range = 32767.0;
float range = std::max(std::abs(tensor->quantization->min[0]),
std::abs(tensor->quantization->max[0]));
if (tensor_property.extend_to_power_of_two) {
const int power_of_two_scale = utils::GetPowerOfTwoScale(
tensor->quantization->min[0], tensor->quantization->max[0]);
range = std::pow(2, power_of_two_scale);
quantized_range = 32768.0;
}
const float quantized_range = 32768.0;
const float scale = range / quantized_range;
utils::QuantizeActivationToInt16(tensor, scale);
}

View File

@ -1115,6 +1115,65 @@ TEST_F(QuantizeLSTM2Test, VerifyLSTM) {
}
}
class QuantizeSVDFTest : public QuantizeModelTest {
protected:
QuantizeSVDFTest() {
input_model_ = ReadModel(internal::kSvdfCalibrated);
readonly_model_ = input_model_->GetModel();
readonly_model_->UnPackTo(&model_);
}
};
TEST_F(QuantizeSVDFTest, VerifySVDF) {
// Quantize model.
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
TensorType_INT8, &error_reporter_);
ASSERT_EQ(kTfLiteOk, status);
// Read expected model.
auto expected_fb_model = ReadModel(internal::kSvdfQuantized);
auto expected_read_only_model = expected_fb_model->GetModel();
ModelT expected_model;
expected_read_only_model->UnPackTo(&expected_model);
// Comparison.
ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size());
for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size();
subgraph_idx++) {
const auto graph = model_.subgraphs[subgraph_idx].get();
const auto expected_graph = expected_model.subgraphs[subgraph_idx].get();
ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size());
for (size_t i = 0; i < graph->tensors.size(); i++) {
const auto tensor = graph->tensors[i].get();
const auto expected_tensor = expected_graph->tensors[i].get();
EXPECT_EQ(tensor->buffer, expected_tensor->buffer);
EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable);
EXPECT_EQ(tensor->shape, expected_tensor->shape);
EXPECT_EQ(tensor->name, expected_tensor->name);
EXPECT_EQ(tensor->type, expected_tensor->type);
const auto quantization_params = tensor->quantization.get();
const auto expected_quantization_params =
expected_tensor->quantization.get();
if (quantization_params != nullptr ||
expected_quantization_params != nullptr) {
EXPECT_NE(quantization_params, nullptr);
EXPECT_NE(expected_quantization_params, nullptr);
EXPECT_EQ(quantization_params->scale,
expected_quantization_params->scale);
EXPECT_EQ(quantization_params->zero_point,
expected_quantization_params->zero_point);
}
}
}
ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size());
for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size();
++buffer_idx) {
const auto buffer = model_.buffers[buffer_idx].get()->data;
const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data;
EXPECT_EQ(buffer, expected_buffer);
}
}
class QuantizeFCTest : public QuantizeModelTest {
protected:
QuantizeFCTest() {

View File

@ -59,6 +59,9 @@ const char* kModelWithMaximumOp = "maximum.bin";
const char* kLstmCalibrated2 = "lstm_calibrated2.bin";
const char* kLstmQuantized2 = "lstm_quantized2.bin";
const char* kSvdfCalibrated = "svdf_calibrated.bin";
const char* kSvdfQuantized = "svdf_quantized.bin";
const char* kModelWithUnpack = "unpack.bin";
int FailOnErrorReporter::Report(const char* format, va_list args) {

View File

@ -95,6 +95,10 @@ extern const char* kModelWithMaximumOp;
extern const char* kLstmCalibrated2;
extern const char* kLstmQuantized2;
// Test model with SVDF op.
extern const char* kSvdfCalibrated;
extern const char* kSvdfQuantized;
// Test model with an unpack op.
extern const char* kModelWithUnpack;

Binary file not shown.

Binary file not shown.