Add quantizer for SVDF.
- state is quantized to 16 bits. - time weight is also quantized to 16bits but the target range is [-512, 512] instead of [-32767, 32767] because there is a "reduce" operation right after element-wise mul between state and time weight. The "reduce" can be as high as 30 so we reserve 6 bits (64) for the summation, leaving only 10 bits for time weight. PiperOrigin-RevId: 286920154 Change-Id: I45104b69452a1033db230d1db6bd280fa289b733
This commit is contained in:
parent
741da13f8b
commit
c2670013c8
@ -247,6 +247,8 @@ tf_cc_test(
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/split.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/svdf_calibrated.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/svdf_quantized.bin",
|
||||
"//tensorflow/lite/tools/optimize:testdata/unpack.bin",
|
||||
],
|
||||
tags = [
|
||||
|
@ -871,6 +871,29 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
|
||||
property.version = 2;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_SVDF: {
|
||||
TensorProperty tensor_property_time;
|
||||
// Only 10bits are needed because 6bits are reserved for the reduce
|
||||
// operation after elemement-wise multiplication between state and time
|
||||
// weights.
|
||||
tensor_property_time.number_of_bits = 10;
|
||||
TensorProperty tensor_property_bias;
|
||||
tensor_property_bias.use_derived_scale = true;
|
||||
tensor_property_bias.number_of_bits = 32;
|
||||
tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
|
||||
TensorProperty tensor_property_state;
|
||||
tensor_property_state.number_of_bits = 16;
|
||||
tensor_property_state.state_tensor = true;
|
||||
|
||||
property.inputs = {{0, {}},
|
||||
{1, {}},
|
||||
{2, tensor_property_time},
|
||||
{4, tensor_property_state},
|
||||
{3, tensor_property_bias}};
|
||||
property.outputs = {{0, {}}};
|
||||
property.version = 2;
|
||||
break;
|
||||
}
|
||||
case BuiltinOperator_TRANSPOSE:
|
||||
property.inputs = {{0, {}}};
|
||||
property.outputs = {{0, {}}};
|
||||
|
@ -479,8 +479,26 @@ TfLiteStatus QuantizeOpInput(
|
||||
return utils::SymmetricPerLayerBiasQuantize(model, tensor, scale,
|
||||
error_reporter);
|
||||
|
||||
} else if (tensor_property.number_of_bits == 10) {
|
||||
// When the number of bits is 10 (instead of 16), quantize the tensor to
|
||||
// [-512, 512], instead of [-32767, 32767].
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
int total_size = 1;
|
||||
for (int i = 0; i < tensor->shape.size(); ++i) {
|
||||
total_size *= tensor->shape[i];
|
||||
}
|
||||
BufferT* buffer = model->buffers[tensor->buffer].get();
|
||||
float* buffer_data = reinterpret_cast<float*>(buffer->data.data());
|
||||
auto minmax =
|
||||
std::minmax_element(buffer_data, buffer_data + total_size);
|
||||
const float range =
|
||||
std::max(std::abs(*minmax.first), std::abs(*minmax.second));
|
||||
const float quantized_range = 512.0;
|
||||
const float scale = range / quantized_range;
|
||||
return utils::SymmetricQuantizeFloatsToInt16(model, tensor, scale,
|
||||
error_reporter);
|
||||
} else {
|
||||
// Only 8, 16, 32 are supported.
|
||||
// Only 8, 16, 32, 10 are supported.
|
||||
// TODO(jianlijianli): extend this to support arbitrary bits.
|
||||
error_reporter->Report(
|
||||
"Unable to quantize buffer or min/max value for input %d "
|
||||
@ -499,14 +517,15 @@ TfLiteStatus QuantizeOpInput(
|
||||
utils::QuantizeActivation(tensor);
|
||||
} else if (tensor_property.number_of_bits == 16) {
|
||||
TensorT* tensor = subgraph->tensors[tensor_idx].get();
|
||||
float quantized_range = 32767.0;
|
||||
float range = std::max(std::abs(tensor->quantization->min[0]),
|
||||
std::abs(tensor->quantization->max[0]));
|
||||
if (tensor_property.extend_to_power_of_two) {
|
||||
const int power_of_two_scale = utils::GetPowerOfTwoScale(
|
||||
tensor->quantization->min[0], tensor->quantization->max[0]);
|
||||
range = std::pow(2, power_of_two_scale);
|
||||
quantized_range = 32768.0;
|
||||
}
|
||||
const float quantized_range = 32768.0;
|
||||
const float scale = range / quantized_range;
|
||||
utils::QuantizeActivationToInt16(tensor, scale);
|
||||
}
|
||||
|
@ -1115,6 +1115,65 @@ TEST_F(QuantizeLSTM2Test, VerifyLSTM) {
|
||||
}
|
||||
}
|
||||
|
||||
class QuantizeSVDFTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeSVDFTest() {
|
||||
input_model_ = ReadModel(internal::kSvdfCalibrated);
|
||||
readonly_model_ = input_model_->GetModel();
|
||||
readonly_model_->UnPackTo(&model_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(QuantizeSVDFTest, VerifySVDF) {
|
||||
// Quantize model.
|
||||
auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
|
||||
TensorType_INT8, &error_reporter_);
|
||||
ASSERT_EQ(kTfLiteOk, status);
|
||||
|
||||
// Read expected model.
|
||||
auto expected_fb_model = ReadModel(internal::kSvdfQuantized);
|
||||
auto expected_read_only_model = expected_fb_model->GetModel();
|
||||
ModelT expected_model;
|
||||
expected_read_only_model->UnPackTo(&expected_model);
|
||||
|
||||
// Comparison.
|
||||
ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size());
|
||||
for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size();
|
||||
subgraph_idx++) {
|
||||
const auto graph = model_.subgraphs[subgraph_idx].get();
|
||||
const auto expected_graph = expected_model.subgraphs[subgraph_idx].get();
|
||||
ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size());
|
||||
for (size_t i = 0; i < graph->tensors.size(); i++) {
|
||||
const auto tensor = graph->tensors[i].get();
|
||||
const auto expected_tensor = expected_graph->tensors[i].get();
|
||||
EXPECT_EQ(tensor->buffer, expected_tensor->buffer);
|
||||
EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable);
|
||||
EXPECT_EQ(tensor->shape, expected_tensor->shape);
|
||||
EXPECT_EQ(tensor->name, expected_tensor->name);
|
||||
EXPECT_EQ(tensor->type, expected_tensor->type);
|
||||
const auto quantization_params = tensor->quantization.get();
|
||||
const auto expected_quantization_params =
|
||||
expected_tensor->quantization.get();
|
||||
if (quantization_params != nullptr ||
|
||||
expected_quantization_params != nullptr) {
|
||||
EXPECT_NE(quantization_params, nullptr);
|
||||
EXPECT_NE(expected_quantization_params, nullptr);
|
||||
EXPECT_EQ(quantization_params->scale,
|
||||
expected_quantization_params->scale);
|
||||
EXPECT_EQ(quantization_params->zero_point,
|
||||
expected_quantization_params->zero_point);
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size());
|
||||
for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size();
|
||||
++buffer_idx) {
|
||||
const auto buffer = model_.buffers[buffer_idx].get()->data;
|
||||
const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data;
|
||||
EXPECT_EQ(buffer, expected_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
class QuantizeFCTest : public QuantizeModelTest {
|
||||
protected:
|
||||
QuantizeFCTest() {
|
||||
|
@ -59,6 +59,9 @@ const char* kModelWithMaximumOp = "maximum.bin";
|
||||
const char* kLstmCalibrated2 = "lstm_calibrated2.bin";
|
||||
const char* kLstmQuantized2 = "lstm_quantized2.bin";
|
||||
|
||||
const char* kSvdfCalibrated = "svdf_calibrated.bin";
|
||||
const char* kSvdfQuantized = "svdf_quantized.bin";
|
||||
|
||||
const char* kModelWithUnpack = "unpack.bin";
|
||||
|
||||
int FailOnErrorReporter::Report(const char* format, va_list args) {
|
||||
|
@ -95,6 +95,10 @@ extern const char* kModelWithMaximumOp;
|
||||
extern const char* kLstmCalibrated2;
|
||||
extern const char* kLstmQuantized2;
|
||||
|
||||
// Test model with SVDF op.
|
||||
extern const char* kSvdfCalibrated;
|
||||
extern const char* kSvdfQuantized;
|
||||
|
||||
// Test model with an unpack op.
|
||||
extern const char* kModelWithUnpack;
|
||||
|
||||
|
BIN
tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin
vendored
Normal file
BIN
tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin
vendored
Normal file
Binary file not shown.
BIN
tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin
vendored
Normal file
BIN
tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin
vendored
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user