Add utility methods to quantization utils for model quantization.

PiperOrigin-RevId: 243201849
2019-04-11 21:52:54 -07:00 · 2019-04-11 21:52:54 -07:00 · 7e4c43bbe8
commit 7e4c43bbe8
parent 6d43308884
4 changed files with 205 additions and 1 deletions
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD
@ -78,6 +78,7 @@ tf_cc_test(
        "//tensorflow/core:lib",
        "//tensorflow/lite:framework",
        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_absl//absl/memory",
        "@com_google_googletest//:gtest",
        "@flatbuffers",
    ],
--- a/tensorflow/lite/tools/optimize/quantization_utils.cc
+++ b/tensorflow/lite/tools/optimize/quantization_utils.cc
@ -171,7 +171,7 @@ TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
  }
  float* float_data = reinterpret_cast<float*>(buffer->data.data());
  uint64_t num_elements;
-  TF_LITE_ENSURE_STATUS(utils::NumElements(*tensor, &num_elements));
+  TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));

  std::vector<int8_t> quantized_buffer;
  quantized_buffer.resize(num_elements);
@ -197,6 +197,116 @@ TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
  return kTfLiteOk;
 }

+TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
+                                   const std::vector<int64_t>& zero_point,
+                                   int quantized_dimension,
+                                   const uint8_t* buffer_data,
+                                   size_t buffer_size, TensorType output_type,
+                                   ModelT* model, TensorT* tensor) {
+  tensor->quantization = absl::make_unique<QuantizationParametersT>();
+  tensor->quantization->scale.assign(scales.begin(), scales.end());
+  if (zero_point.size() != scales.size()) {
+    return kTfLiteError;
+  }
+  tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
+  tensor->quantization->quantized_dimension = quantized_dimension;
+  model->buffers[tensor->buffer]->data.assign(buffer_data,
+                                              buffer_data + buffer_size);
+
+  // Update the tensor type.
+  tensor->type = output_type;
+  return kTfLiteOk;
+}
+
+TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
+                                               int32_t channel_dim_index) {
+  if (tensor->shape.size() != 4) {
+    return kTfLiteError;
+  }
+
+  // Get dimensions.
+  uint64_t num_elements;
+  TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
+  const int32_t channel_dim_size = tensor->shape[channel_dim_index];
+
+  // Get input float data.
+  BufferT* buffer = model->buffers[tensor->buffer].get();
+  float* float_input_data = reinterpret_cast<float*>(buffer->data.data());
+
+  // Create container for output scale and output data.
+  std::vector<float> scales(channel_dim_size);
+  std::vector<int8_t> final_buffer(num_elements);
+
+  // Quantize the input data with respect to channel_dim_index.
+  const std::vector<int> tensor_dims = {tensor->shape[0], tensor->shape[1],
+                                        tensor->shape[2], tensor->shape[3]};
+  SymmetricPerChannelQuantization(float_input_data, tensor_dims,
+                                  channel_dim_index, &scales, &final_buffer);
+
+  // Set the buffers and output type.
+  uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
+  const size_t buffer_size = num_elements * sizeof(int8_t);
+  std::vector<int64_t> zero_point(scales.size(), 0);
+  return AddQuantizationParams(scales, zero_point, channel_dim_index,
+                               uint8_buffer, buffer_size, TensorType_INT8,
+                               model, tensor);
+}
+
+TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
+                                             float input_scale,
+                                             const float* weight_scales,
+                                             int number_of_dimension,
+                                             int dimension_index) {
+  // Compute scales.
+  std::vector<float> scales(number_of_dimension);
+  for (size_t i = 0; i < number_of_dimension; i++) {
+    scales[i] = input_scale * weight_scales[i];
+  }
+
+  BufferT* buffer = model->buffers[tensor->buffer].get();
+  float* float_data = reinterpret_cast<float*>(buffer->data.data());
+  uint64_t num_elements;
+  TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
+
+  std::vector<int32_t> final_buffer(num_elements);
+  const int32_t kScale = std::numeric_limits<int32_t>::max();
+
+  for (int32_t channel_idx = 0; channel_idx < number_of_dimension;
+       channel_idx++) {
+    float scaling_factor = scales[channel_idx];
+    float scaling_factor_inv = (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
+    const int32_t quantized_value = static_cast<int32_t>(
+        TfLiteRound(float_data[channel_idx] * scaling_factor_inv));
+    final_buffer[channel_idx] =
+        std::min(kScale, std::max(-kScale, quantized_value));
+  }
+
+  // Set the buffers and output type.
+  uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
+  size_t buffer_size = num_elements * sizeof(int32_t);
+  std::vector<int64_t> zero_point(scales.size(), 0);
+  return AddQuantizationParams(scales, zero_point, dimension_index,
+                               uint8_buffer, buffer_size, TensorType_INT32,
+                               model, tensor);
+}
+
+TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
+                            int per_axis_index) {
+  if (per_channel) {
+    return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index);
+  } else {
+    return SymmetricQuantizeTensor(model, tensor);
+  }
+}
+
+void QuantizeActivation(TensorT* tensor) {
+  GetAsymmetricQuantizationParams(
+      tensor->quantization->min[0], tensor->quantization->max[0],
+      std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
+      tensor->quantization.get());
+  tensor->type = TensorType_INT8;
+}
+
 }  // namespace utils
 }  // namespace optimize
 }  // namespace tflite
--- a/tensorflow/lite/tools/optimize/quantization_utils.h
+++ b/tensorflow/lite/tools/optimize/quantization_utils.h
@ -64,6 +64,33 @@ void SymmetricPerChannelQuantizeValues(const float* const input,
 // of the tensor.
 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor);

+// Add quantization parameters.
+TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
+                                   const std::vector<int64_t>& zero_point,
+                                   int quantized_dimension,
+                                   const uint8_t* buffer_data,
+                                   size_t buffer_size, TensorType output_type,
+                                   ModelT* model, TensorT* tensor);
+
+// Quantize tensor with per channel.
+TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
+                                               int32_t channel_dim_index);
+
+// Symmetrically quantizes the bias for ops like Conv and DepthwiseConv.
+// The scale of bias if weight_per_channel_scale[channel] * input_scale
+TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
+                                             float input_scale,
+                                             const float* weight_scales,
+                                             int number_of_dimension,
+                                             int dimension_index);
+
+// Quantize weight with or without per channel.
+TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
+                            int per_axis_index);
+
+// Quantize activation.
+void QuantizeActivation(TensorT* tensor);
+
 }  // namespace utils
 }  // namespace optimize
 }  // namespace tflite
--- a/tensorflow/lite/tools/optimize/quantization_utils_test.cc
+++ b/tensorflow/lite/tools/optimize/quantization_utils_test.cc
@ -13,8 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
+
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "absl/memory/memory.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/util/command_line_flags.h"
@ -256,6 +258,70 @@ TEST(QuantizationUtilsTest, SymmetricQuantizeTensor) {
  EXPECT_EQ(quant_buffer_size * 4, float_buffer_size);
 }

+TEST(QuantizationUtilsTest, AddQuantizationParams) {
+  // Create data.
+  auto model = absl::make_unique<ModelT>();
+  auto subgraph = absl::make_unique<tflite::SubGraphT>();
+  auto tensor = absl::make_unique<TensorT>();
+  auto buffer = absl::make_unique<tflite::BufferT>();
+  const std::vector<float> scales = {0.5, 1.0, 1.5};
+  const std::vector<int64_t> zero_points = {5, 10, 15};
+  const int32_t quantizated_dimension = 3;
+  const std::vector<uint8_t> buffer_data = {1, 2, 3, 4};
+  const int32_t buffer_size = 4;
+  tensor->buffer = 0;
+
+  // Wire the model.
+  model->subgraphs.push_back(std::move(subgraph));
+  model->subgraphs[0]->tensors.push_back(std::move(tensor));
+  model->buffers.push_back(std::move(buffer));
+
+  // Call and verify.
+  EXPECT_EQ(
+      AddQuantizationParams(scales, zero_points, quantizated_dimension,
+                            buffer_data.data(), buffer_size, TensorType_INT8,
+                            model.get(), model->subgraphs[0]->tensors[0].get()),
+      kTfLiteOk);
+  EXPECT_THAT(model->subgraphs[0]->tensors[0]->quantization->scale,
+              ElementsAreArray(scales));
+  EXPECT_THAT(model->subgraphs[0]->tensors[0]->quantization->zero_point,
+              ElementsAreArray(zero_points));
+  EXPECT_THAT(model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data,
+              ElementsAreArray(buffer_data));
+  EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT8);
+}
+
+TEST(QuantizationUtilsTest, SymmetricPerChannelBiasQuantize) {
+  // Create data.
+  auto model = absl::make_unique<ModelT>();
+  auto subgraph = absl::make_unique<tflite::SubGraphT>();
+  auto tensor = absl::make_unique<TensorT>();
+  auto buffer = absl::make_unique<tflite::BufferT>();
+  const std::vector<float> weight_scales = {0.5, 1.0};
+  const float input_scale = 0.5;
+  std::vector<float> bias_data = {4.0, 1.0};
+  auto bias_reinterpreted_data =
+      reinterpret_cast<const unsigned char*>(bias_data.data());
+  buffer->data.assign(bias_reinterpreted_data, bias_reinterpreted_data + 4 * 4);
+  tensor->buffer = 0;
+  tensor->shape = {2, 1, 1, 1};
+  tensor->quantization = absl::make_unique<QuantizationParametersT>();
+
+  // Wire the model.
+  model->subgraphs.push_back(std::move(subgraph));
+  model->subgraphs[0]->tensors.push_back(std::move(tensor));
+  model->buffers.push_back(std::move(buffer));
+
+  // Call and verify.
+  EXPECT_EQ(SymmetricPerChannelBiasQuantize(
+                model.get(), model->subgraphs[0]->tensors[0].get(), input_scale,
+                weight_scales.data(), 2, 0),
+            kTfLiteOk);
+  EXPECT_THAT(model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data,
+              ElementsAreArray({16, 0, 0, 0, 2, 0, 0, 0}));
+  EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT32);
+}
+
 }  // namespace
 }  // namespace utils
 }  // namespace optimize