diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD index bdc1baf892e..c7c50437d8f 100644 --- a/tensorflow/lite/tools/optimize/BUILD +++ b/tensorflow/lite/tools/optimize/BUILD @@ -247,6 +247,8 @@ tf_cc_test( "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin", "//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin", "//tensorflow/lite/tools/optimize:testdata/split.bin", + "//tensorflow/lite/tools/optimize:testdata/svdf_calibrated.bin", + "//tensorflow/lite/tools/optimize:testdata/svdf_quantized.bin", "//tensorflow/lite/tools/optimize:testdata/unpack.bin", ], tags = [ diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 9b94bd3d44f..b2044c27f12 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -871,6 +871,29 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.version = 2; break; } + case BuiltinOperator_SVDF: { + TensorProperty tensor_property_time; + // Only 10bits are needed because 6bits are reserved for the reduce + // operation after elemement-wise multiplication between state and time + // weights. + tensor_property_time.number_of_bits = 10; + TensorProperty tensor_property_bias; + tensor_property_bias.use_derived_scale = true; + tensor_property_bias.number_of_bits = 32; + tensor_property_bias.derived_scale = {{2, 4}, {}, {}}; + TensorProperty tensor_property_state; + tensor_property_state.number_of_bits = 16; + tensor_property_state.state_tensor = true; + + property.inputs = {{0, {}}, + {1, {}}, + {2, tensor_property_time}, + {4, tensor_property_state}, + {3, tensor_property_bias}}; + property.outputs = {{0, {}}}; + property.version = 2; + break; + } case BuiltinOperator_TRANSPOSE: property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc index 26d595947cd..6fc19ff2a56 100644 --- a/tensorflow/lite/tools/optimize/quantize_model.cc +++ b/tensorflow/lite/tools/optimize/quantize_model.cc @@ -479,8 +479,26 @@ TfLiteStatus QuantizeOpInput( return utils::SymmetricPerLayerBiasQuantize(model, tensor, scale, error_reporter); + } else if (tensor_property.number_of_bits == 10) { + // When the number of bits is 10 (instead of 16), quantize the tensor to + // [-512, 512], instead of [-32767, 32767]. + TensorT* tensor = subgraph->tensors[tensor_idx].get(); + int total_size = 1; + for (int i = 0; i < tensor->shape.size(); ++i) { + total_size *= tensor->shape[i]; + } + BufferT* buffer = model->buffers[tensor->buffer].get(); + float* buffer_data = reinterpret_cast(buffer->data.data()); + auto minmax = + std::minmax_element(buffer_data, buffer_data + total_size); + const float range = + std::max(std::abs(*minmax.first), std::abs(*minmax.second)); + const float quantized_range = 512.0; + const float scale = range / quantized_range; + return utils::SymmetricQuantizeFloatsToInt16(model, tensor, scale, + error_reporter); } else { - // Only 8, 16, 32 are supported. + // Only 8, 16, 32, 10 are supported. // TODO(jianlijianli): extend this to support arbitrary bits. error_reporter->Report( "Unable to quantize buffer or min/max value for input %d " @@ -499,14 +517,15 @@ TfLiteStatus QuantizeOpInput( utils::QuantizeActivation(tensor); } else if (tensor_property.number_of_bits == 16) { TensorT* tensor = subgraph->tensors[tensor_idx].get(); + float quantized_range = 32767.0; float range = std::max(std::abs(tensor->quantization->min[0]), std::abs(tensor->quantization->max[0])); if (tensor_property.extend_to_power_of_two) { const int power_of_two_scale = utils::GetPowerOfTwoScale( tensor->quantization->min[0], tensor->quantization->max[0]); range = std::pow(2, power_of_two_scale); + quantized_range = 32768.0; } - const float quantized_range = 32768.0; const float scale = range / quantized_range; utils::QuantizeActivationToInt16(tensor, scale); } diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc index 89038ad764f..344a6054bd6 100644 --- a/tensorflow/lite/tools/optimize/quantize_model_test.cc +++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc @@ -1115,6 +1115,65 @@ TEST_F(QuantizeLSTM2Test, VerifyLSTM) { } } +class QuantizeSVDFTest : public QuantizeModelTest { + protected: + QuantizeSVDFTest() { + input_model_ = ReadModel(internal::kSvdfCalibrated); + readonly_model_ = input_model_->GetModel(); + readonly_model_->UnPackTo(&model_); + } +}; + +TEST_F(QuantizeSVDFTest, VerifySVDF) { + // Quantize model. + auto status = QuantizeModel(&builder_, &model_, TensorType_INT8, + TensorType_INT8, &error_reporter_); + ASSERT_EQ(kTfLiteOk, status); + + // Read expected model. + auto expected_fb_model = ReadModel(internal::kSvdfQuantized); + auto expected_read_only_model = expected_fb_model->GetModel(); + ModelT expected_model; + expected_read_only_model->UnPackTo(&expected_model); + + // Comparison. + ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size()); + for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size(); + subgraph_idx++) { + const auto graph = model_.subgraphs[subgraph_idx].get(); + const auto expected_graph = expected_model.subgraphs[subgraph_idx].get(); + ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size()); + for (size_t i = 0; i < graph->tensors.size(); i++) { + const auto tensor = graph->tensors[i].get(); + const auto expected_tensor = expected_graph->tensors[i].get(); + EXPECT_EQ(tensor->buffer, expected_tensor->buffer); + EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable); + EXPECT_EQ(tensor->shape, expected_tensor->shape); + EXPECT_EQ(tensor->name, expected_tensor->name); + EXPECT_EQ(tensor->type, expected_tensor->type); + const auto quantization_params = tensor->quantization.get(); + const auto expected_quantization_params = + expected_tensor->quantization.get(); + if (quantization_params != nullptr || + expected_quantization_params != nullptr) { + EXPECT_NE(quantization_params, nullptr); + EXPECT_NE(expected_quantization_params, nullptr); + EXPECT_EQ(quantization_params->scale, + expected_quantization_params->scale); + EXPECT_EQ(quantization_params->zero_point, + expected_quantization_params->zero_point); + } + } + } + ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size()); + for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size(); + ++buffer_idx) { + const auto buffer = model_.buffers[buffer_idx].get()->data; + const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data; + EXPECT_EQ(buffer, expected_buffer); + } +} + class QuantizeFCTest : public QuantizeModelTest { protected: QuantizeFCTest() { diff --git a/tensorflow/lite/tools/optimize/test_util.cc b/tensorflow/lite/tools/optimize/test_util.cc index be99f9e9ddb..0d7cfd6622a 100644 --- a/tensorflow/lite/tools/optimize/test_util.cc +++ b/tensorflow/lite/tools/optimize/test_util.cc @@ -59,6 +59,9 @@ const char* kModelWithMaximumOp = "maximum.bin"; const char* kLstmCalibrated2 = "lstm_calibrated2.bin"; const char* kLstmQuantized2 = "lstm_quantized2.bin"; +const char* kSvdfCalibrated = "svdf_calibrated.bin"; +const char* kSvdfQuantized = "svdf_quantized.bin"; + const char* kModelWithUnpack = "unpack.bin"; int FailOnErrorReporter::Report(const char* format, va_list args) { diff --git a/tensorflow/lite/tools/optimize/test_util.h b/tensorflow/lite/tools/optimize/test_util.h index 0d394b0badc..525fbd0c573 100644 --- a/tensorflow/lite/tools/optimize/test_util.h +++ b/tensorflow/lite/tools/optimize/test_util.h @@ -95,6 +95,10 @@ extern const char* kModelWithMaximumOp; extern const char* kLstmCalibrated2; extern const char* kLstmQuantized2; +// Test model with SVDF op. +extern const char* kSvdfCalibrated; +extern const char* kSvdfQuantized; + // Test model with an unpack op. extern const char* kModelWithUnpack; diff --git a/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin b/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin new file mode 100644 index 00000000000..e363b4a6d83 Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin differ diff --git a/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin b/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin new file mode 100644 index 00000000000..fd30ba72cd2 Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin differ