Add utility method to calculate the final activation range

This activation range is determined by the default min/max, scale and zero
point from the UniformQuantizedType, and the activation function.

PiperOrigin-RevId: 306561114
Change-Id: Ib48414263931b921295239499cc86cf2c92baa1b
This commit is contained in:
Feng Liu 2020-04-14 19:19:19 -07:00 committed by TensorFlower Gardener
parent e77122f406
commit 5c182c2502
4 changed files with 95 additions and 1 deletions

View File

@ -119,6 +119,9 @@ cc_library(
name = "numerical_utils",
srcs = ["numerical_utils.cc"],
hdrs = ["numerical_utils.h"],
deps = [
"@com_google_absl//absl/types:optional",
],
)
cc_library(
@ -154,6 +157,7 @@ tf_cc_test(
srcs = ["numerical_utils_test.cc"],
deps = [
":numerical_utils",
"@com_google_absl//absl/types:optional",
"@com_google_googletest//:gtest_main",
],
)

View File

@ -16,9 +16,12 @@ limitations under the License.
#include <assert.h>
#include <algorithm>
#include <cmath>
#include <limits>
#include "absl/types/optional.h"
namespace mlir {
namespace quant {
@ -55,5 +58,25 @@ QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
return {static_cast<int32_t>(q_fixed), shift};
}
QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point,
absl::optional<double> rmin,
absl::optional<double> rmax,
int32_t qmin, int32_t qmax) {
auto quantize = [scale, zero_point](float f) {
return zero_point + static_cast<int32_t>(std::round(f / scale));
};
if (rmin.has_value() && rmax.has_value()) {
return {std::max(qmin, quantize(rmin.value())),
std::min(qmax, quantize(rmax.value()))};
} else if (rmin.has_value()) {
return {std::max(qmin, quantize(rmin.value())), qmax};
} else if (rmax.has_value()) {
return {qmin, std::min(qmax, quantize(rmax.value()))};
} else {
return {qmin, qmax};
}
}
} // namespace quant
} // namespace mlir

View File

@ -19,16 +19,26 @@ limitations under the License.
#include <cstdint>
#include <utility>
#include "absl/types/optional.h"
namespace mlir {
namespace quant {
using QuantizedMultiplier = std::pair<int32_t, int32_t>;
using QuantizedRange = std::pair<int32_t, int32_t>;
// Decompose double precision multiplier to integer multiplier and exponent.
// double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
// int_multiplier will be range of (2^31, 2^30].
QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
// Calculate the effective quantized value range for the scale, zero point. The
// range is the minimum range defined by [rmin, rmax] and [qmin, qmax].
QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point,
absl::optional<double> rmin,
absl::optional<double> rmax,
int32_t qmin, int32_t qmax);
} // namespace quant
} // namespace mlir

View File

@ -19,6 +19,7 @@ limitations under the License.
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "absl/types/optional.h"
namespace mlir {
namespace quant {
@ -29,7 +30,7 @@ double ComposeScale(const QuantizedMultiplier& input) {
return input.first * exp2(-31 + input.second);
}
TEST(DecomposeScale, QuantizeMultiplier) {
TEST(NumericalUtils, QuantizeMultiplier) {
// Decompose multiplier larger than 1.
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3);
@ -52,6 +53,62 @@ TEST(DecomposeScale, QuantizeMultiplier) {
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0);
}
TEST(NumericalUtils, ActivationRange) {
// zero point = 0
auto a =
CalculateQuantizedRange(1e-6, 0, absl::nullopt, absl::nullopt, -128, 127);
ASSERT_EQ(a.first, -128);
ASSERT_EQ(a.second, 127);
auto b = CalculateQuantizedRange(1e-6, 0, 0.0, absl::nullopt, -128, 127);
ASSERT_EQ(b.first, 0);
ASSERT_EQ(b.second, 127);
auto c = CalculateQuantizedRange(1e-6, 0, -1.0, 1.0, -128, 127);
ASSERT_EQ(c.first, -128);
ASSERT_EQ(c.second, 127);
auto d = CalculateQuantizedRange(1e-6, 0, 0.0, 6.0, -128, 127);
ASSERT_EQ(d.first, 0);
ASSERT_EQ(d.second, 127);
// zero point = 100
auto e = CalculateQuantizedRange(1e-6, 100, absl::nullopt, absl::nullopt,
-128, 127);
ASSERT_EQ(e.first, -128);
ASSERT_EQ(e.second, 127);
auto f = CalculateQuantizedRange(1e-6, 100, 0.0, absl::nullopt, -128, 127);
ASSERT_EQ(f.first, 100);
ASSERT_EQ(f.second, 127);
auto g = CalculateQuantizedRange(1e-6, 100, -1.0, 1.0, -128, 127);
ASSERT_EQ(g.first, -128);
ASSERT_EQ(g.second, 127);
auto h = CalculateQuantizedRange(1e-6, 100, 0.0, 6.0, -128, 127);
ASSERT_EQ(h.first, 100);
ASSERT_EQ(h.second, 127);
// zero point = -100
auto i = CalculateQuantizedRange(1e-6, -100, absl::nullopt, absl::nullopt,
-128, 127);
ASSERT_EQ(i.first, -128);
ASSERT_EQ(i.second, 127);
auto j = CalculateQuantizedRange(1e-6, -100, 0.0, absl::nullopt, -128, 127);
ASSERT_EQ(j.first, -100);
ASSERT_EQ(j.second, 127);
auto k = CalculateQuantizedRange(1e-6, -100, -1.0, 1.0, -128, 127);
ASSERT_EQ(k.first, -128);
ASSERT_EQ(k.second, 127);
auto l = CalculateQuantizedRange(1e-6, -100, 0.0, 6.0, -128, 127);
ASSERT_EQ(l.first, -100);
ASSERT_EQ(l.second, 127);
}
} // namespace
} // namespace quant
} // namespace mlir