Move DequantizeInputs and QuantizeOutputs into common utility

PiperOrigin-RevId: 314093553 Change-Id: Ib15015f738c3e0bbaa1363ba7df9808940075d2c
2020-06-01 01:14:56 -07:00 · 2020-06-01 01:14:56 -07:00 · 626bb2c4d0
commit 626bb2c4d0
parent 3ffb4ad2d4
6 changed files with 345 additions and 66 deletions
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@ -244,6 +244,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/common:model",
        "//tensorflow/lite/delegates/gpu/common:model_builder",
        "//tensorflow/lite/delegates/gpu/common:model_transformer",
        "//tensorflow/lite/delegates/gpu/common:quantization_util",
        "//tensorflow/lite/delegates/gpu/common:status",
        "//tensorflow/lite/delegates/gpu/gl:api2",
        "//tensorflow/lite/kernels/internal:optimized_base",
--- a/tensorflow/lite/delegates/gpu/common/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/BUILD
@ -203,6 +203,30 @@ cc_library(
    ],
 )
 cc_library(
    name = "quantization_util",
    srcs = ["quantization_util.cc"],
    hdrs = ["quantization_util.h"],
    deps = [
        ":status",
        "//tensorflow/lite:kernel_api",
        "//tensorflow/lite/c:common",
        "//tensorflow/lite/kernels/internal:optimized_base",
        "//tensorflow/lite/kernels/internal:types",
    ],
 )
 cc_test(
    name = "quantization_util_test",
    srcs = ["quantization_util_test.cc"],
    deps = [
        ":quantization_util",
        "//tensorflow/lite:util",
        "//tensorflow/lite/micro/testing:micro_test",
        "@com_google_googletest//:gtest_main",
    ],
 )
 # TODO(impjdi): Add unit test for operations.
 cc_library(
--- a/tensorflow/lite/delegates/gpu/common/quantization_util.cc
+++ b/tensorflow/lite/delegates/gpu/common/quantization_util.cc
@ -0,0 +1,120 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
 #include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 namespace tflite {
 namespace gpu {
 namespace {
 void DequantizeInput(TfLiteContext* context, int input_index,
                     const std::unordered_map<int, int>& quant_conversion_map) {
  if (quant_conversion_map.find(input_index) == quant_conversion_map.end()) {
    return;
  }
  int original_tensor_idx = quant_conversion_map.at(input_index);
  const TfLiteTensor& dequantized_tflite_tensor = context->tensors[input_index];
  const TfLiteTensor& original_tflite_tensor =
      context->tensors[original_tensor_idx];
  DequantizationParams op_params;
  op_params.zero_point = original_tflite_tensor.params.zero_point;
  op_params.scale = original_tflite_tensor.params.scale;
  if (original_tflite_tensor.type == kTfLiteInt8) {
    optimized_ops::Dequantize(op_params,
                              GetTensorShape(&original_tflite_tensor),
                              original_tflite_tensor.data.int8,
                              GetTensorShape(&original_tflite_tensor),
                              dequantized_tflite_tensor.data.f);
  } else if (original_tflite_tensor.type == kTfLiteUInt8) {
    optimized_ops::Dequantize(op_params,
                              GetTensorShape(&original_tflite_tensor),
                              original_tflite_tensor.data.uint8,
                              GetTensorShape(&original_tflite_tensor),
                              dequantized_tflite_tensor.data.f);
  }
 }
 void QuantizeOutput(TfLiteContext* context, int output_index,
                    const std::unordered_map<int, int>& quant_conversion_map) {
  if (quant_conversion_map.find(output_index) == quant_conversion_map.end()) {
    return;
  }
  int original_tensor_idx = quant_conversion_map.at(output_index);
  const TfLiteTensor& dequantized_tflite_tensor =
      context->tensors[output_index];
  const TfLiteTensor& original_tflite_tensor =
      context->tensors[original_tensor_idx];
  tflite::QuantizationParams op_params;
  op_params.zero_point = original_tflite_tensor.params.zero_point;
  op_params.scale = original_tflite_tensor.params.scale;
  if (original_tflite_tensor.type == kTfLiteInt8) {
    optimized_ops::AffineQuantize(op_params,
                                  GetTensorShape(&original_tflite_tensor),
                                  dequantized_tflite_tensor.data.f,
                                  GetTensorShape(&original_tflite_tensor),
                                  original_tflite_tensor.data.int8);
  } else if (original_tflite_tensor.type == kTfLiteUInt8) {
    optimized_ops::AffineQuantize(op_params,
                                  GetTensorShape(&original_tflite_tensor),
                                  dequantized_tflite_tensor.data.f,
                                  GetTensorShape(&original_tflite_tensor),
                                  original_tflite_tensor.data.uint8);
  }
 }
 }  // namespace
 absl::Status DequantizeInputs(
    TfLiteContext* context, const std::vector<uint32_t>& input_indices,
    const std::unordered_map<int, int>& quant_conversion_map) {
  for (auto index : input_indices) {
    DequantizeInput(context, static_cast<int>(index), quant_conversion_map);
  }
  return absl::OkStatus();
 }
 absl::Status DequantizeInputs(
    TfLiteContext* context, const std::vector<int64_t>& input_indices,
    const std::unordered_map<int, int>& quant_conversion_map) {
  for (auto index : input_indices) {
    DequantizeInput(context, static_cast<int>(index), quant_conversion_map);
  }
  return absl::OkStatus();
 }
 absl::Status QuantizeOutputs(
    TfLiteContext* context, const std::vector<uint32_t>& output_indices,
    const std::unordered_map<int, int>& quant_conversion_map) {
  for (auto index : output_indices) {
    QuantizeOutput(context, static_cast<int>(index), quant_conversion_map);
  }
  return absl::OkStatus();
 }
 absl::Status QuantizeOutputs(
    TfLiteContext* context, const std::vector<int64_t>& output_indices,
    const std::unordered_map<int, int>& quant_conversion_map) {
  for (auto index : output_indices) {
    QuantizeOutput(context, static_cast<int>(index), quant_conversion_map);
  }
  return absl::OkStatus();
 }
 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/common/quantization_util.h
+++ b/tensorflow/lite/delegates/gpu/common/quantization_util.h
@ -0,0 +1,56 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_QUANTIZATION_UTIL_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_QUANTIZATION_UTIL_H_
 #include <unordered_map>
 #include <vector>
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
 namespace gpu {
 // Dequantizes input tensors pre-inference, leaving float tensors intact.
 // input_indices contains dequantized (fp32) outputs, that are used as
 // inputs to GPU delegate.
 // quant_conversion_map contains bidirectional mapping between dequantized
 // tensor and its original quantized one.
 absl::Status DequantizeInputs(
    TfLiteContext* context, const std::vector<uint32_t>& input_indices,
    const std::unordered_map<int, int>& quant_conversion_map);
 absl::Status DequantizeInputs(
    TfLiteContext* context, const std::vector<int64_t>& input_indices,
    const std::unordered_map<int, int>& quant_conversion_map);
 // Quantizes output tensors post-inference, leaving float tensors intact.
 // output_indices contains (fp32) inputs to be quantized, which are outputs of
 // GPU delegate.
 // quant_conversion_map contains bidirectional mapping between dequantized
 // tensor and its original quantized one.
 absl::Status QuantizeOutputs(
    TfLiteContext* context, const std::vector<uint32_t>& output_indices,
    const std::unordered_map<int, int>& quant_conversion_map);
 absl::Status QuantizeOutputs(
    TfLiteContext* context, const std::vector<int64_t>& output_indices,
    const std::unordered_map<int, int>& quant_conversion_map);
 }  // namespace gpu
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_QUANTIZATION_UTIL_H_
--- a/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc
+++ b/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc
@ -0,0 +1,139 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include "tensorflow/lite/micro/testing/test_utils.h"
 #include "tensorflow/lite/util.h"
 using ::testing::Eq;
 using ::testing::FloatNear;
 using ::testing::Pointwise;
 namespace tflite {
 namespace gpu {
 namespace {
 std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> BuildTfLiteIntArray(
    const std::vector<int>& data) {
  std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> result(
      TfLiteIntArrayCreate(data.size()));
  std::copy(data.begin(), data.end(), result->data);
  return result;
 }
 TEST(DequantizeInputs, Int8) {
  TfLiteContext context;
  auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
  std::vector<int8_t> data = {-3, -2, -1, 1, 2, 3};
  std::vector<float> dequantized_data(data.size());
  TfLiteTensor input = tflite::testing::CreateQuantizedTensor(
      data.data(), input_dims.get(), "input",
      /*min=*/-12.8f, /*max=*/12.7f, /*is_variable=*/false);
  TfLiteTensor dequantized_input = tflite::testing::CreateFloatTensor(
      dequantized_data.data(), input_dims.get(), "input_dequant",
      /*is_variable=*/true);
  std::vector<TfLiteTensor> tensors{input, dequantized_input};
  tflite::testing::PopulateContext(tensors.data(), tensors.size(),
                                   /*error_reporter=*/nullptr, &context);
  std::vector<uint32_t> input_indices = {1};
  std::unordered_map<int, int> quant_conversion_map = {{1, 0}};
  auto status = DequantizeInputs(&context, input_indices, quant_conversion_map);
  EXPECT_TRUE(status.ok());
  EXPECT_THAT(dequantized_data,
              Pointwise(FloatNear(1e-6), {-0.3, -0.2, -0.1, 0.1, 0.2, 0.3}));
 }
 TEST(DequantizeInputs, UInt8) {
  TfLiteContext context;
  auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
  std::vector<uint8_t> data = {0, 1, 2, 3, 4, 5};
  std::vector<float> dequantized_data(data.size());
  TfLiteTensor input = tflite::testing::CreateQuantizedTensor(
      data.data(), input_dims.get(), "input",
      /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/false);
  TfLiteTensor dequantized_input = tflite::testing::CreateFloatTensor(
      dequantized_data.data(), input_dims.get(), "input_dequant",
      /*is_variable=*/true);
  std::vector<TfLiteTensor> tensors{input, dequantized_input};
  tflite::testing::PopulateContext(tensors.data(), tensors.size(),
                                   /*error_reporter=*/nullptr, &context);
  std::vector<int64_t> input_indices = {1};
  std::unordered_map<int, int> quant_conversion_map = {{1, 0}};
  auto status = DequantizeInputs(&context, input_indices, quant_conversion_map);
  EXPECT_TRUE(status.ok());
  EXPECT_THAT(dequantized_data,
              Pointwise(FloatNear(1e-6), {0.0, 0.1, 0.2, 0.3, 0.4, 0.5}));
 }
 TEST(QuantizeOutputs, Int8) {
  TfLiteContext context;
  auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
  std::vector<float> data = {-0.3, -0.2, -0.1, 0.1, 0.2, 0.3};
  std::vector<int8_t> quantized_data(data.size());
  TfLiteTensor output = tflite::testing::CreateFloatTensor(
      data.data(), input_dims.get(), "output", /*is_variable=*/false);
  TfLiteTensor quantized_output = tflite::testing::CreateQuantizedTensor(
      quantized_data.data(), input_dims.get(), "output_quant",
      /*min=*/-12.8f, /*max=*/12.7f, /*is_variable=*/true);
  std::vector<TfLiteTensor> tensors{output, quantized_output};
  tflite::testing::PopulateContext(tensors.data(), tensors.size(),
                                   /*error_reporter=*/nullptr, &context);
  std::vector<uint32_t> output_indices = {0};
  std::unordered_map<int, int> quant_conversion_map = {{0, 1}};
  auto status = QuantizeOutputs(&context, output_indices, quant_conversion_map);
  EXPECT_TRUE(status.ok());
  EXPECT_THAT(quantized_data, Pointwise(Eq(), {-3, -2, -1, 1, 2, 3}));
 }
 TEST(QuantizeOutputs, UInt8) {
  TfLiteContext context;
  auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1});
  std::vector<float> data = {0.0, 0.1, 0.2, 0.3, 0.4, 0.5};
  std::vector<uint8_t> quantized_data(data.size());
  TfLiteTensor output = tflite::testing::CreateFloatTensor(
      data.data(), input_dims.get(), "output", /*is_variable=*/false);
  TfLiteTensor quantized_output = tflite::testing::CreateQuantizedTensor(
      quantized_data.data(), input_dims.get(), "output_quant",
      /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/true);
  std::vector<TfLiteTensor> tensors{output, quantized_output};
  tflite::testing::PopulateContext(tensors.data(), tensors.size(),
                                   /*error_reporter=*/nullptr, &context);
  std::vector<int64_t> output_indices = {0};
  std::unordered_map<int, int> quant_conversion_map = {{0, 1}};
  auto status = QuantizeOutputs(&context, output_indices, quant_conversion_map);
  EXPECT_TRUE(status.ok());
  EXPECT_THAT(quantized_data, Pointwise(Eq(), {0, 1, 2, 3, 4, 5}));
 }
 }  // namespace
 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/delegate.cc
+++ b/tensorflow/lite/delegates/gpu/delegate.cc
@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/model_builder.h"
 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
 #include "tensorflow/lite/delegates/gpu/common/quantization_util.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/gl/api2.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@ -210,12 +211,14 @@ class DelegateKernel {
    const bool is_dequant_required = !quant_conversion_map_.empty();
    if (is_dequant_required) {
-      RETURN_IF_ERROR(DequantizeInputs(context));
+      RETURN_IF_ERROR(
          DequantizeInputs(context, input_indices_, quant_conversion_map_));
    }
    RETURN_IF_ERROR(SetInputsAndOutputs(context));
    RETURN_IF_ERROR(runner_->Run());
    if (is_dequant_required) {
-      RETURN_IF_ERROR(QuantizeOutputs(context));
+      RETURN_IF_ERROR(
          QuantizeOutputs(context, output_indices_, quant_conversion_map_));
    }
    return absl::OkStatus();
  }
@ -277,70 +280,6 @@ class DelegateKernel {
    return absl::OkStatus();
  }
  // TODO(b/150798231): Refactor these two into common utils when generalizing
  // to other backends.
  // Dequantizes input tensors pre-inference, leaving float tensors intact.
  absl::Status DequantizeInputs(TfLiteContext* context) {
    for (auto index : input_indices_) {
      if (quant_conversion_map_.find(index) == quant_conversion_map_.end()) {
        continue;
      }
      int original_tensor_idx = quant_conversion_map_[index];
      const TfLiteTensor& dequantized_tflite_tensor = context->tensors[index];
      const TfLiteTensor& original_tflite_tensor =
          context->tensors[original_tensor_idx];
      DequantizationParams op_params;
      op_params.zero_point = original_tflite_tensor.params.zero_point;
      op_params.scale = original_tflite_tensor.params.scale;
      if (original_tflite_tensor.type == kTfLiteInt8) {
        optimized_ops::Dequantize(op_params,
                                  GetTensorShape(&original_tflite_tensor),
                                  original_tflite_tensor.data.int8,
                                  GetTensorShape(&original_tflite_tensor),
                                  dequantized_tflite_tensor.data.f);
      } else if (original_tflite_tensor.type == kTfLiteUInt8) {
        optimized_ops::Dequantize(op_params,
                                  GetTensorShape(&original_tflite_tensor),
                                  original_tflite_tensor.data.uint8,
                                  GetTensorShape(&original_tflite_tensor),
                                  dequantized_tflite_tensor.data.f);
      }
    }
    return absl::OkStatus();
  }
  // Quantizes output tensors post-inference, leaving float tensors intact.
  absl::Status QuantizeOutputs(TfLiteContext* context) {
    for (auto index : output_indices_) {
      if (quant_conversion_map_.find(index) == quant_conversion_map_.end()) {
        continue;
      }
      int original_tensor_idx = quant_conversion_map_[index];
      const TfLiteTensor& dequantized_tflite_tensor = context->tensors[index];
      const TfLiteTensor& original_tflite_tensor =
          context->tensors[original_tensor_idx];
      tflite::QuantizationParams op_params;
      op_params.zero_point = original_tflite_tensor.params.zero_point;
      op_params.scale = original_tflite_tensor.params.scale;
      if (original_tflite_tensor.type == kTfLiteInt8) {
        optimized_ops::AffineQuantize(op_params,
                                      GetTensorShape(&original_tflite_tensor),
                                      dequantized_tflite_tensor.data.f,
                                      GetTensorShape(&original_tflite_tensor),
                                      original_tflite_tensor.data.int8);
      } else if (original_tflite_tensor.type == kTfLiteUInt8) {
        optimized_ops::AffineQuantize(op_params,
                                      GetTensorShape(&original_tflite_tensor),
                                      dequantized_tflite_tensor.data.f,
                                      GetTensorShape(&original_tflite_tensor),
                                      original_tflite_tensor.data.uint8);
      }
    }
    return absl::OkStatus();
  }
  absl::Status InitializeOpenClApi(GraphFloat32* graph,
                                   std::unique_ptr<InferenceBuilder>* builder,
                                   bool* graph_is_destroyed) {