Add utility method to calculate the final activation range
This activation range is determined by the default min/max, scale and zero point from the UniformQuantizedType, and the activation function. PiperOrigin-RevId: 306561114 Change-Id: Ib48414263931b921295239499cc86cf2c92baa1b
This commit is contained in:
parent
e77122f406
commit
5c182c2502
@ -119,6 +119,9 @@ cc_library(
|
||||
name = "numerical_utils",
|
||||
srcs = ["numerical_utils.cc"],
|
||||
hdrs = ["numerical_utils.h"],
|
||||
deps = [
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
@ -154,6 +157,7 @@ tf_cc_test(
|
||||
srcs = ["numerical_utils_test.cc"],
|
||||
deps = [
|
||||
":numerical_utils",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
@ -16,9 +16,12 @@ limitations under the License.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace quant {
|
||||
|
||||
@ -55,5 +58,25 @@ QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
|
||||
return {static_cast<int32_t>(q_fixed), shift};
|
||||
}
|
||||
|
||||
QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point,
|
||||
absl::optional<double> rmin,
|
||||
absl::optional<double> rmax,
|
||||
int32_t qmin, int32_t qmax) {
|
||||
auto quantize = [scale, zero_point](float f) {
|
||||
return zero_point + static_cast<int32_t>(std::round(f / scale));
|
||||
};
|
||||
|
||||
if (rmin.has_value() && rmax.has_value()) {
|
||||
return {std::max(qmin, quantize(rmin.value())),
|
||||
std::min(qmax, quantize(rmax.value()))};
|
||||
} else if (rmin.has_value()) {
|
||||
return {std::max(qmin, quantize(rmin.value())), qmax};
|
||||
} else if (rmax.has_value()) {
|
||||
return {qmin, std::min(qmax, quantize(rmax.value()))};
|
||||
} else {
|
||||
return {qmin, qmax};
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace quant
|
||||
} // namespace mlir
|
||||
|
@ -19,16 +19,26 @@ limitations under the License.
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace quant {
|
||||
|
||||
using QuantizedMultiplier = std::pair<int32_t, int32_t>;
|
||||
using QuantizedRange = std::pair<int32_t, int32_t>;
|
||||
|
||||
// Decompose double precision multiplier to integer multiplier and exponent.
|
||||
// double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
|
||||
// int_multiplier will be range of (2^31, 2^30].
|
||||
QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
|
||||
|
||||
// Calculate the effective quantized value range for the scale, zero point. The
|
||||
// range is the minimum range defined by [rmin, rmax] and [qmin, qmax].
|
||||
QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point,
|
||||
absl::optional<double> rmin,
|
||||
absl::optional<double> rmax,
|
||||
int32_t qmin, int32_t qmax);
|
||||
|
||||
} // namespace quant
|
||||
} // namespace mlir
|
||||
|
||||
|
@ -19,6 +19,7 @@ limitations under the License.
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "absl/types/optional.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace quant {
|
||||
@ -29,7 +30,7 @@ double ComposeScale(const QuantizedMultiplier& input) {
|
||||
return input.first * exp2(-31 + input.second);
|
||||
}
|
||||
|
||||
TEST(DecomposeScale, QuantizeMultiplier) {
|
||||
TEST(NumericalUtils, QuantizeMultiplier) {
|
||||
// Decompose multiplier larger than 1.
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3);
|
||||
@ -52,6 +53,62 @@ TEST(DecomposeScale, QuantizeMultiplier) {
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0);
|
||||
}
|
||||
|
||||
TEST(NumericalUtils, ActivationRange) {
|
||||
// zero point = 0
|
||||
auto a =
|
||||
CalculateQuantizedRange(1e-6, 0, absl::nullopt, absl::nullopt, -128, 127);
|
||||
ASSERT_EQ(a.first, -128);
|
||||
ASSERT_EQ(a.second, 127);
|
||||
|
||||
auto b = CalculateQuantizedRange(1e-6, 0, 0.0, absl::nullopt, -128, 127);
|
||||
ASSERT_EQ(b.first, 0);
|
||||
ASSERT_EQ(b.second, 127);
|
||||
|
||||
auto c = CalculateQuantizedRange(1e-6, 0, -1.0, 1.0, -128, 127);
|
||||
ASSERT_EQ(c.first, -128);
|
||||
ASSERT_EQ(c.second, 127);
|
||||
|
||||
auto d = CalculateQuantizedRange(1e-6, 0, 0.0, 6.0, -128, 127);
|
||||
ASSERT_EQ(d.first, 0);
|
||||
ASSERT_EQ(d.second, 127);
|
||||
|
||||
// zero point = 100
|
||||
auto e = CalculateQuantizedRange(1e-6, 100, absl::nullopt, absl::nullopt,
|
||||
-128, 127);
|
||||
ASSERT_EQ(e.first, -128);
|
||||
ASSERT_EQ(e.second, 127);
|
||||
|
||||
auto f = CalculateQuantizedRange(1e-6, 100, 0.0, absl::nullopt, -128, 127);
|
||||
ASSERT_EQ(f.first, 100);
|
||||
ASSERT_EQ(f.second, 127);
|
||||
|
||||
auto g = CalculateQuantizedRange(1e-6, 100, -1.0, 1.0, -128, 127);
|
||||
ASSERT_EQ(g.first, -128);
|
||||
ASSERT_EQ(g.second, 127);
|
||||
|
||||
auto h = CalculateQuantizedRange(1e-6, 100, 0.0, 6.0, -128, 127);
|
||||
ASSERT_EQ(h.first, 100);
|
||||
ASSERT_EQ(h.second, 127);
|
||||
|
||||
// zero point = -100
|
||||
auto i = CalculateQuantizedRange(1e-6, -100, absl::nullopt, absl::nullopt,
|
||||
-128, 127);
|
||||
ASSERT_EQ(i.first, -128);
|
||||
ASSERT_EQ(i.second, 127);
|
||||
|
||||
auto j = CalculateQuantizedRange(1e-6, -100, 0.0, absl::nullopt, -128, 127);
|
||||
ASSERT_EQ(j.first, -100);
|
||||
ASSERT_EQ(j.second, 127);
|
||||
|
||||
auto k = CalculateQuantizedRange(1e-6, -100, -1.0, 1.0, -128, 127);
|
||||
ASSERT_EQ(k.first, -128);
|
||||
ASSERT_EQ(k.second, 127);
|
||||
|
||||
auto l = CalculateQuantizedRange(1e-6, -100, 0.0, 6.0, -128, 127);
|
||||
ASSERT_EQ(l.first, -100);
|
||||
ASSERT_EQ(l.second, 127);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace quant
|
||||
} // namespace mlir
|
||||
|
Loading…
Reference in New Issue
Block a user