Add an utility method to decompose a double to two integers

PiperOrigin-RevId: 306472779
Change-Id: Iaf079951f492235c27ecdb5146849f3345a8276a
This commit is contained in:
Feng Liu 2020-04-14 10:57:06 -07:00 committed by TensorFlower Gardener
parent 9f693e35a2
commit 7c115c16e0
4 changed files with 167 additions and 1 deletions

View File

@ -1,4 +1,4 @@
load("//tensorflow:tensorflow.bzl", "tf_native_cc_binary")
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_native_cc_binary")
load(
"//tensorflow/core/platform:build_config.bzl",
"tf_proto_library",
@ -115,6 +115,12 @@ tf_native_cc_binary(
],
)
cc_library(
name = "numerical_utils",
srcs = ["numerical_utils.cc"],
hdrs = ["numerical_utils.h"],
)
cc_library(
name = "device_target",
srcs = ["device_target.cc"],
@ -142,3 +148,12 @@ cc_library(
"@llvm-project//mlir:Support",
],
)
tf_cc_test(
name = "numerical_utils_test",
srcs = ["numerical_utils_test.cc"],
deps = [
":numerical_utils",
"@com_google_googletest//:gtest_main",
],
)

View File

@ -0,0 +1,59 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
#include <assert.h>
#include <cmath>
#include <limits>
namespace mlir {
namespace quant {
// This method is adopted from TFLite:
// ["tensorflow/lite/kernels/internal/quantization_util.cc"]
QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
if (double_multiplier < 1e-6) {
return {0, 0};
}
int32_t shift;
const double q = frexp(double_multiplier, &shift);
auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
assert(q_fixed <= (1ll << 31));
if (q_fixed == (1ll << 31)) {
q_fixed /= 2;
++shift;
}
assert(q_fixed <= std::numeric_limits<int32_t>::max());
// A shift amount smaller than -31 would cause all bits to be shifted out
// and thus all results would be zero. We implement that instead with
// q_fixed==0, so as to avoid hitting issues with right-shift
// operations with shift amounts greater than 31. Note that this happens
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
// that we're effectively flushing tiny double_multiplier's to zero.
// We could conceivably handle values in the range (roughly) [32, 63]
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
// the present handling is just doing 'flush denormals to zero'. We could
// reconsider and actually generate nonzero denormals if a need arises.
if (shift < -31) {
shift = 0;
q_fixed = 0;
}
return {static_cast<int32_t>(q_fixed), shift};
}
} // namespace quant
} // namespace mlir

View File

@ -0,0 +1,35 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
#define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
#include <cstdint>
#include <utility>
namespace mlir {
namespace quant {
using QuantizedMultiplier = std::pair<int32_t, int32_t>;
// Decompose double precision multiplier to integer multiplier and exponent.
// double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
// int_multiplier will be range of (2^31, 2^30].
QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
} // namespace quant
} // namespace mlir
#endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_

View File

@ -0,0 +1,57 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
#include <cmath>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
namespace mlir {
namespace quant {
namespace {
double ComposeScale(const QuantizedMultiplier& input) {
return input.first * exp2(-31 + input.second);
}
TEST(DecomposeScale, QuantizeMultiplier) {
// Decompose multiplier larger than 1.
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(10.)), 10.);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(5.)), 5.);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(2.)), 2.);
// Decompose multiplier between 1.0 and 1e-6.
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(0.0)), 0.0);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0)), 1.0);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-1)), 1.0e-1);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-2)), 1.0e-2);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-3)), 1.0e-3);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-4)), 1.0e-4);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-5)), 1.0e-5);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-6)), 1.0e-6);
// When scale is smaller than 1.0e-6, it is decomposed to {0, 0}.
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-7)), 0.0);
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0);
}
} // namespace
} // namespace quant
} // namespace mlir