Merge pull request from Tessil:toupstream/fix_64_bit_multiply_by_quantized_multiplier_overflow

PiperOrigin-RevId: 346877418
Change-Id: I82190beaa201c24472b604a43edf59f79b59718a
This commit is contained in:
TensorFlower Gardener 2020-12-10 15:33:49 -08:00
commit d79e6fa523
3 changed files with 75 additions and 1 deletions
tensorflow/lite/kernels/internal

View File

@ -385,6 +385,10 @@ cc_library(
hdrs = ["quantization_util.h"],
compatible_with = get_compatible_with_portable(),
copts = tflite_copts() + micro_copts(),
linkopts = select({
"//tensorflow:windows": [],
"//conditions:default": ["-lm"],
}),
deps = [
":compatibility",
":cppmath",

View File

@ -178,8 +178,12 @@ inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
// - input x is in the range -(1<<47) <= x < (1<<47)
assert(quantized_multiplier >= 0);
assert(shift >= -31 && shift < 8);
assert(x >= -(static_cast<int64_t>(1) << 47) &&
x < (static_cast<int64_t>(1) << 47));
int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16;
int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000)
? ((quantized_multiplier + (1 << 15)) >> 16)
: 0x7FFF;
int total_shift = 15 - shift;
x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
int32_t result = x >> total_shift;

View File

@ -422,6 +422,72 @@ TEST(QuantizationUtilTest, GetInvSqrtQuantizedMultiplierExp) {
EXPECT_THAT(inv_sqrt(kInt32Max), Pair(189812531, 12));
}
TEST(QuantizationUtilTest, MultiplyByQuantizedMultiplierInt32) {
auto quant_and_multiply = [](int32_t x, double multiplier) {
int32_t quantized_multiplier;
int shift;
QuantizeMultiplier(multiplier, &quantized_multiplier, &shift);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
};
EXPECT_EQ(quant_and_multiply(0, 0.1), 0);
EXPECT_EQ(quant_and_multiply(1, 0), 0);
EXPECT_EQ(quant_and_multiply(10000, 0.00097656), 10);
EXPECT_EQ(quant_and_multiply(10000, -0.00097656), -10);
EXPECT_EQ(quant_and_multiply(-10000, 0.00097656), -10);
EXPECT_EQ(quant_and_multiply(-10000, -0.00097656), 10);
EXPECT_EQ(quant_and_multiply(std::numeric_limits<int32_t>::min(), 0.00001),
-21475);
EXPECT_EQ(quant_and_multiply(std::numeric_limits<int32_t>::min(), -0.00001),
21475);
EXPECT_EQ(quant_and_multiply(std::numeric_limits<int32_t>::max(), 0.00001),
21475);
EXPECT_EQ(quant_and_multiply(std::numeric_limits<int32_t>::max(), -0.00001),
-21475);
// Test with maximum possible x and quantized_multiplier
const int32_t x = std::numeric_limits<int32_t>::max();
const int32_t quantized_multiplier = std::numeric_limits<int32_t>::max();
const int shift = -3;
const int32_t expected = static_cast<int32_t>(
TfLiteRound(static_cast<int64_t>(x) * quantized_multiplier /
static_cast<double>(1ll << (31 - shift))));
EXPECT_EQ(MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift),
expected);
EXPECT_EQ(MultiplyByQuantizedMultiplier(-x, quantized_multiplier, shift),
-expected);
}
TEST(QuantizationUtilTest, MultiplyByQuantizedMultiplierInt64) {
auto quant_and_multiply = [](int64_t x, double multiplier) {
int32_t quantized_multiplier;
int shift;
QuantizeMultiplier(multiplier, &quantized_multiplier, &shift);
return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
};
// Negative multipliers are not supported by the 64-bit
// MultiplyByQuantizedMultiplier, only use >= 0 multipliers.
EXPECT_EQ(quant_and_multiply(0, 0.1), 0);
EXPECT_EQ(quant_and_multiply(1, 0), 0);
EXPECT_EQ(quant_and_multiply(10000, 0.00097656), 10);
EXPECT_EQ(quant_and_multiply(-10000, 0.00097656), -10);
EXPECT_EQ(quant_and_multiply(-(1ll << 47), 0.00001), -1407385600);
EXPECT_EQ(quant_and_multiply((1ll << 47) - 1, 0.00001), 1407385600);
// Test with maximum possible x and quantized_multiplier
const int64_t x = (1ll << 47) - 1;
const int32_t quantized_multiplier = std::numeric_limits<int32_t>::max();
const int shift = -31;
// Expected is around 'x * quantized_multiplier / 2**(31 - shift)' ~= 65536
// As there is some rounding error, expected is a bit smaller.
const int32_t expected = 65534;
EXPECT_EQ(MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift),
expected);
EXPECT_EQ(MultiplyByQuantizedMultiplier(-x, quantized_multiplier, shift),
-expected);
}
TEST(QuantizationUtilTest, PreprocessSoftmaxScaling) {
auto quantize = [](double beta, double scale, int integer_bits) {
int32_t q;