diff --git a/tensorflow/compiler/mlir/lite/quantization/BUILD b/tensorflow/compiler/mlir/lite/quantization/BUILD index 22b4d36e6ad..7d2b5c66236 100644 --- a/tensorflow/compiler/mlir/lite/quantization/BUILD +++ b/tensorflow/compiler/mlir/lite/quantization/BUILD @@ -119,6 +119,9 @@ cc_library( name = "numerical_utils", srcs = ["numerical_utils.cc"], hdrs = ["numerical_utils.h"], + deps = [ + "@com_google_absl//absl/types:optional", + ], ) cc_library( @@ -154,6 +157,7 @@ tf_cc_test( srcs = ["numerical_utils_test.cc"], deps = [ ":numerical_utils", + "@com_google_absl//absl/types:optional", "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc index c9e6c4c19ac..417013f5f84 100644 --- a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc +++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.cc @@ -16,9 +16,12 @@ limitations under the License. #include <assert.h> +#include <algorithm> #include <cmath> #include <limits> +#include "absl/types/optional.h" + namespace mlir { namespace quant { @@ -55,5 +58,25 @@ QuantizedMultiplier QuantizeMultiplier(double double_multiplier) { return {static_cast<int32_t>(q_fixed), shift}; } +QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point, + absl::optional<double> rmin, + absl::optional<double> rmax, + int32_t qmin, int32_t qmax) { + auto quantize = [scale, zero_point](float f) { + return zero_point + static_cast<int32_t>(std::round(f / scale)); + }; + + if (rmin.has_value() && rmax.has_value()) { + return {std::max(qmin, quantize(rmin.value())), + std::min(qmax, quantize(rmax.value()))}; + } else if (rmin.has_value()) { + return {std::max(qmin, quantize(rmin.value())), qmax}; + } else if (rmax.has_value()) { + return {qmin, std::min(qmax, quantize(rmax.value()))}; + } else { + return {qmin, qmax}; + } +} + } // namespace quant } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h index 3f12f2c3fd6..9a818dbbe0e 100644 --- a/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h +++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils.h @@ -19,16 +19,26 @@ limitations under the License. #include <cstdint> #include <utility> +#include "absl/types/optional.h" + namespace mlir { namespace quant { using QuantizedMultiplier = std::pair<int32_t, int32_t>; +using QuantizedRange = std::pair<int32_t, int32_t>; // Decompose double precision multiplier to integer multiplier and exponent. // double_multiplier = int_multiplier * 2 ^ (-31 + exponent) // int_multiplier will be range of (2^31, 2^30]. QuantizedMultiplier QuantizeMultiplier(double double_multiplier); +// Calculate the effective quantized value range for the scale, zero point. The +// range is the minimum range defined by [rmin, rmax] and [qmin, qmax]. +QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point, + absl::optional<double> rmin, + absl::optional<double> rmax, + int32_t qmin, int32_t qmax); + } // namespace quant } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc b/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc index 0c22ec905fe..05b38a8ae0c 100644 --- a/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc +++ b/tensorflow/compiler/mlir/lite/quantization/numerical_utils_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include <gmock/gmock.h> #include <gtest/gtest.h> +#include "absl/types/optional.h" namespace mlir { namespace quant { @@ -29,7 +30,7 @@ double ComposeScale(const QuantizedMultiplier& input) { return input.first * exp2(-31 + input.second); } -TEST(DecomposeScale, QuantizeMultiplier) { +TEST(NumericalUtils, QuantizeMultiplier) { // Decompose multiplier larger than 1. ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6); ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3); @@ -52,6 +53,62 @@ TEST(DecomposeScale, QuantizeMultiplier) { ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0); } +TEST(NumericalUtils, ActivationRange) { + // zero point = 0 + auto a = + CalculateQuantizedRange(1e-6, 0, absl::nullopt, absl::nullopt, -128, 127); + ASSERT_EQ(a.first, -128); + ASSERT_EQ(a.second, 127); + + auto b = CalculateQuantizedRange(1e-6, 0, 0.0, absl::nullopt, -128, 127); + ASSERT_EQ(b.first, 0); + ASSERT_EQ(b.second, 127); + + auto c = CalculateQuantizedRange(1e-6, 0, -1.0, 1.0, -128, 127); + ASSERT_EQ(c.first, -128); + ASSERT_EQ(c.second, 127); + + auto d = CalculateQuantizedRange(1e-6, 0, 0.0, 6.0, -128, 127); + ASSERT_EQ(d.first, 0); + ASSERT_EQ(d.second, 127); + + // zero point = 100 + auto e = CalculateQuantizedRange(1e-6, 100, absl::nullopt, absl::nullopt, + -128, 127); + ASSERT_EQ(e.first, -128); + ASSERT_EQ(e.second, 127); + + auto f = CalculateQuantizedRange(1e-6, 100, 0.0, absl::nullopt, -128, 127); + ASSERT_EQ(f.first, 100); + ASSERT_EQ(f.second, 127); + + auto g = CalculateQuantizedRange(1e-6, 100, -1.0, 1.0, -128, 127); + ASSERT_EQ(g.first, -128); + ASSERT_EQ(g.second, 127); + + auto h = CalculateQuantizedRange(1e-6, 100, 0.0, 6.0, -128, 127); + ASSERT_EQ(h.first, 100); + ASSERT_EQ(h.second, 127); + + // zero point = -100 + auto i = CalculateQuantizedRange(1e-6, -100, absl::nullopt, absl::nullopt, + -128, 127); + ASSERT_EQ(i.first, -128); + ASSERT_EQ(i.second, 127); + + auto j = CalculateQuantizedRange(1e-6, -100, 0.0, absl::nullopt, -128, 127); + ASSERT_EQ(j.first, -100); + ASSERT_EQ(j.second, 127); + + auto k = CalculateQuantizedRange(1e-6, -100, -1.0, 1.0, -128, 127); + ASSERT_EQ(k.first, -128); + ASSERT_EQ(k.second, 127); + + auto l = CalculateQuantizedRange(1e-6, -100, 0.0, 6.0, -128, 127); + ASSERT_EQ(l.first, -100); + ASSERT_EQ(l.second, 127); +} + } // namespace } // namespace quant } // namespace mlir