Add quantizer for SVDF.

- state is quantized to 16 bits. - time weight is also quantized to 16bits but the target range is [-512, 512] instead of [-32767, 32767] because there is a "reduce" operation right after element-wise mul between state and time weight. The "reduce" can be as high as 30 so we reserve 6 bits (64) for the summation, leaving only 10 bits for time weight. PiperOrigin-RevId: 286920154 Change-Id: I45104b69452a1033db230d1db6bd280fa289b733
2019-12-23 11:20:32 -08:00 · 2019-12-23 11:20:32 -08:00 · c2670013c8
commit c2670013c8
parent 741da13f8b
8 changed files with 112 additions and 2 deletions
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD
@ -247,6 +247,8 @@ tf_cc_test(
        "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_minus_127_max_plus_127.bin",
        "//tensorflow/lite/tools/optimize:testdata/single_softmax_min_minus_5_max_plus_5.bin",
        "//tensorflow/lite/tools/optimize:testdata/split.bin",
+        "//tensorflow/lite/tools/optimize:testdata/svdf_calibrated.bin",
+        "//tensorflow/lite/tools/optimize:testdata/svdf_quantized.bin",
        "//tensorflow/lite/tools/optimize:testdata/unpack.bin",
    ],
    tags = [
--- a/tensorflow/lite/tools/optimize/operator_property.cc
+++ b/tensorflow/lite/tools/optimize/operator_property.cc
@ -871,6 +871,29 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.version = 2;
      break;
    }
+    case BuiltinOperator_SVDF: {
+      TensorProperty tensor_property_time;
+      // Only 10bits are needed because 6bits are reserved for the reduce
+      // operation after elemement-wise multiplication between state and time
+      // weights.
+      tensor_property_time.number_of_bits = 10;
+      TensorProperty tensor_property_bias;
+      tensor_property_bias.use_derived_scale = true;
+      tensor_property_bias.number_of_bits = 32;
+      tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
+      TensorProperty tensor_property_state;
+      tensor_property_state.number_of_bits = 16;
+      tensor_property_state.state_tensor = true;
+
+      property.inputs = {{0, {}},
+                         {1, {}},
+                         {2, tensor_property_time},
+                         {4, tensor_property_state},
+                         {3, tensor_property_bias}};
+      property.outputs = {{0, {}}};
+      property.version = 2;
+      break;
+    }
    case BuiltinOperator_TRANSPOSE:
      property.inputs = {{0, {}}};
      property.outputs = {{0, {}}};
--- a/tensorflow/lite/tools/optimize/quantize_model.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model.cc
@ -479,8 +479,26 @@ TfLiteStatus QuantizeOpInput(
        return utils::SymmetricPerLayerBiasQuantize(model, tensor, scale,
                                                    error_reporter);

+      } else if (tensor_property.number_of_bits == 10) {
+        // When the number of bits is 10 (instead of 16), quantize the tensor to
+        // [-512, 512], instead of [-32767, 32767].
+        TensorT* tensor = subgraph->tensors[tensor_idx].get();
+        int total_size = 1;
+        for (int i = 0; i < tensor->shape.size(); ++i) {
+          total_size *= tensor->shape[i];
+        }
+        BufferT* buffer = model->buffers[tensor->buffer].get();
+        float* buffer_data = reinterpret_cast<float*>(buffer->data.data());
+        auto minmax =
+            std::minmax_element(buffer_data, buffer_data + total_size);
+        const float range =
+            std::max(std::abs(*minmax.first), std::abs(*minmax.second));
+        const float quantized_range = 512.0;
+        const float scale = range / quantized_range;
+        return utils::SymmetricQuantizeFloatsToInt16(model, tensor, scale,
+                                                     error_reporter);
      } else {
-        // Only 8, 16, 32 are supported.
+        // Only 8, 16, 32, 10 are supported.
        // TODO(jianlijianli): extend this to support arbitrary bits.
        error_reporter->Report(
            "Unable to quantize buffer or min/max value for input %d "
@ -499,14 +517,15 @@ TfLiteStatus QuantizeOpInput(
          utils::QuantizeActivation(tensor);
        } else if (tensor_property.number_of_bits == 16) {
          TensorT* tensor = subgraph->tensors[tensor_idx].get();
+          float quantized_range = 32767.0;
          float range = std::max(std::abs(tensor->quantization->min[0]),
                                 std::abs(tensor->quantization->max[0]));
          if (tensor_property.extend_to_power_of_two) {
            const int power_of_two_scale = utils::GetPowerOfTwoScale(
                tensor->quantization->min[0], tensor->quantization->max[0]);
            range = std::pow(2, power_of_two_scale);
+            quantized_range = 32768.0;
          }
-          const float quantized_range = 32768.0;
          const float scale = range / quantized_range;
          utils::QuantizeActivationToInt16(tensor, scale);
        }
--- a/tensorflow/lite/tools/optimize/quantize_model_test.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc
@ -1115,6 +1115,65 @@ TEST_F(QuantizeLSTM2Test, VerifyLSTM) {
  }
 }

+class QuantizeSVDFTest : public QuantizeModelTest {
+ protected:
+  QuantizeSVDFTest() {
+    input_model_ = ReadModel(internal::kSvdfCalibrated);
+    readonly_model_ = input_model_->GetModel();
+    readonly_model_->UnPackTo(&model_);
+  }
+};
+
+TEST_F(QuantizeSVDFTest, VerifySVDF) {
+  // Quantize model.
+  auto status = QuantizeModel(&builder_, &model_, TensorType_INT8,
+                              TensorType_INT8, &error_reporter_);
+  ASSERT_EQ(kTfLiteOk, status);
+
+  // Read expected model.
+  auto expected_fb_model = ReadModel(internal::kSvdfQuantized);
+  auto expected_read_only_model = expected_fb_model->GetModel();
+  ModelT expected_model;
+  expected_read_only_model->UnPackTo(&expected_model);
+
+  // Comparison.
+  ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size());
+  for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size();
+       subgraph_idx++) {
+    const auto graph = model_.subgraphs[subgraph_idx].get();
+    const auto expected_graph = expected_model.subgraphs[subgraph_idx].get();
+    ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size());
+    for (size_t i = 0; i < graph->tensors.size(); i++) {
+      const auto tensor = graph->tensors[i].get();
+      const auto expected_tensor = expected_graph->tensors[i].get();
+      EXPECT_EQ(tensor->buffer, expected_tensor->buffer);
+      EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable);
+      EXPECT_EQ(tensor->shape, expected_tensor->shape);
+      EXPECT_EQ(tensor->name, expected_tensor->name);
+      EXPECT_EQ(tensor->type, expected_tensor->type);
+      const auto quantization_params = tensor->quantization.get();
+      const auto expected_quantization_params =
+          expected_tensor->quantization.get();
+      if (quantization_params != nullptr ||
+          expected_quantization_params != nullptr) {
+        EXPECT_NE(quantization_params, nullptr);
+        EXPECT_NE(expected_quantization_params, nullptr);
+        EXPECT_EQ(quantization_params->scale,
+                  expected_quantization_params->scale);
+        EXPECT_EQ(quantization_params->zero_point,
+                  expected_quantization_params->zero_point);
+      }
+    }
+  }
+  ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size());
+  for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size();
+       ++buffer_idx) {
+    const auto buffer = model_.buffers[buffer_idx].get()->data;
+    const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data;
+    EXPECT_EQ(buffer, expected_buffer);
+  }
+}
+
 class QuantizeFCTest : public QuantizeModelTest {
 protected:
  QuantizeFCTest() {
--- a/tensorflow/lite/tools/optimize/test_util.cc
+++ b/tensorflow/lite/tools/optimize/test_util.cc
@ -59,6 +59,9 @@ const char* kModelWithMaximumOp = "maximum.bin";
 const char* kLstmCalibrated2 = "lstm_calibrated2.bin";
 const char* kLstmQuantized2 = "lstm_quantized2.bin";

+const char* kSvdfCalibrated = "svdf_calibrated.bin";
+const char* kSvdfQuantized = "svdf_quantized.bin";
+
 const char* kModelWithUnpack = "unpack.bin";

 int FailOnErrorReporter::Report(const char* format, va_list args) {
--- a/tensorflow/lite/tools/optimize/test_util.h
+++ b/tensorflow/lite/tools/optimize/test_util.h
@ -95,6 +95,10 @@ extern const char* kModelWithMaximumOp;
 extern const char* kLstmCalibrated2;
 extern const char* kLstmQuantized2;

+// Test model with SVDF op.
+extern const char* kSvdfCalibrated;
+extern const char* kSvdfQuantized;
+
 // Test model with an unpack op.
 extern const char* kModelWithUnpack;

--- a/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin
+++ b/tensorflow/lite/tools/optimize/testdata/svdf_calibrated.bin
--- a/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin
+++ b/tensorflow/lite/tools/optimize/testdata/svdf_quantized.bin