Add an utility method to decompose a double to two integers

PiperOrigin-RevId: 306472779 Change-Id: Iaf079951f492235c27ecdb5146849f3345a8276a
2020-04-14 10:57:06 -07:00 · 2020-04-14 10:57:06 -07:00 · 7c115c16e0
commit 7c115c16e0
parent 9f693e35a2
4 changed files with 167 additions and 1 deletions
--- a/tensorflow/compiler/mlir/lite/quantization/BUILD
+++ b/tensorflow/compiler/mlir/lite/quantization/BUILD
@ -1,4 +1,4 @@
-load("//tensorflow:tensorflow.bzl", "tf_native_cc_binary")
+load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_native_cc_binary")
 load(
    "//tensorflow/core/platform:build_config.bzl",
    "tf_proto_library",
@ -115,6 +115,12 @@ tf_native_cc_binary(
    ],
 )

+cc_library(
+    name = "numerical_utils",
+    srcs = ["numerical_utils.cc"],
+    hdrs = ["numerical_utils.h"],
+)
+
 cc_library(
    name = "device_target",
    srcs = ["device_target.cc"],
@ -142,3 +148,12 @@ cc_library(
        "@llvm-project//mlir:Support",
    ],
 )
+
+tf_cc_test(
+    name = "numerical_utils_test",
+    srcs = ["numerical_utils_test.cc"],
+    deps = [
+        ":numerical_utils",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
--- a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc
@ -0,0 +1,59 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
+
+#include <assert.h>
+
+#include <cmath>
+#include <limits>
+
+namespace mlir {
+namespace quant {
+
+// This method is adopted from TFLite:
+// ["tensorflow/lite/kernels/internal/quantization_util.cc"]
+QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
+  if (double_multiplier < 1e-6) {
+    return {0, 0};
+  }
+
+  int32_t shift;
+  const double q = frexp(double_multiplier, &shift);
+  auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
+  assert(q_fixed <= (1ll << 31));
+  if (q_fixed == (1ll << 31)) {
+    q_fixed /= 2;
+    ++shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  // A shift amount smaller than -31 would cause all bits to be shifted out
+  // and thus all results would be zero. We implement that instead with
+  // q_fixed==0, so as to avoid hitting issues with right-shift
+  // operations with shift amounts greater than 31. Note that this happens
+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+  // that we're effectively flushing tiny double_multiplier's to zero.
+  // We could conceivably handle values in the range (roughly) [32, 63]
+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+  // the present handling is just doing 'flush denormals to zero'. We could
+  // reconsider and actually generate nonzero denormals if a need arises.
+  if (shift < -31) {
+    shift = 0;
+    q_fixed = 0;
+  }
+  return {static_cast<int32_t>(q_fixed), shift};
+}
+
+}  // namespace quant
+}  // namespace mlir
--- a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h
+++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h
@ -0,0 +1,35 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
+#define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
+
+#include <cstdint>
+#include <utility>
+
+namespace mlir {
+namespace quant {
+
+using QuantizedMultiplier = std::pair<int32_t, int32_t>;
+
+// Decompose double precision multiplier to integer multiplier and exponent.
+//    double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
+// int_multiplier will be range of (2^31, 2^30].
+QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
+
+}  // namespace quant
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
--- a/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc
@ -0,0 +1,57 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
+
+#include <cmath>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace mlir {
+namespace quant {
+
+namespace {
+
+double ComposeScale(const QuantizedMultiplier& input) {
+  return input.first * exp2(-31 + input.second);
+}
+
+TEST(DecomposeScale, QuantizeMultiplier) {
+  // Decompose multiplier larger than 1.
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(10.)), 10.);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(5.)), 5.);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(2.)), 2.);
+
+  // Decompose multiplier between 1.0 and 1e-6.
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(0.0)), 0.0);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0)), 1.0);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-1)), 1.0e-1);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-2)), 1.0e-2);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-3)), 1.0e-3);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-4)), 1.0e-4);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-5)), 1.0e-5);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-6)), 1.0e-6);
+
+  // When scale is smaller than 1.0e-6, it is decomposed to {0, 0}.
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-7)), 0.0);
+  ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0);
+}
+
+}  // namespace
+}  // namespace quant
+}  // namespace mlir