Add an utility method to decompose a double to two integers
PiperOrigin-RevId: 306472779 Change-Id: Iaf079951f492235c27ecdb5146849f3345a8276a
This commit is contained in:
parent
9f693e35a2
commit
7c115c16e0
@ -1,4 +1,4 @@
|
||||
load("//tensorflow:tensorflow.bzl", "tf_native_cc_binary")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_native_cc_binary")
|
||||
load(
|
||||
"//tensorflow/core/platform:build_config.bzl",
|
||||
"tf_proto_library",
|
||||
@ -115,6 +115,12 @@ tf_native_cc_binary(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "numerical_utils",
|
||||
srcs = ["numerical_utils.cc"],
|
||||
hdrs = ["numerical_utils.h"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "device_target",
|
||||
srcs = ["device_target.cc"],
|
||||
@ -142,3 +148,12 @@ cc_library(
|
||||
"@llvm-project//mlir:Support",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "numerical_utils_test",
|
||||
srcs = ["numerical_utils_test.cc"],
|
||||
deps = [
|
||||
":numerical_utils",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
@ -0,0 +1,59 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
namespace mlir {
|
||||
namespace quant {
|
||||
|
||||
// This method is adopted from TFLite:
|
||||
// ["tensorflow/lite/kernels/internal/quantization_util.cc"]
|
||||
QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
|
||||
if (double_multiplier < 1e-6) {
|
||||
return {0, 0};
|
||||
}
|
||||
|
||||
int32_t shift;
|
||||
const double q = frexp(double_multiplier, &shift);
|
||||
auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
|
||||
assert(q_fixed <= (1ll << 31));
|
||||
if (q_fixed == (1ll << 31)) {
|
||||
q_fixed /= 2;
|
||||
++shift;
|
||||
}
|
||||
assert(q_fixed <= std::numeric_limits<int32_t>::max());
|
||||
// A shift amount smaller than -31 would cause all bits to be shifted out
|
||||
// and thus all results would be zero. We implement that instead with
|
||||
// q_fixed==0, so as to avoid hitting issues with right-shift
|
||||
// operations with shift amounts greater than 31. Note that this happens
|
||||
// roughly when abs(double_multiplier) < 2^-31 and the present handling means
|
||||
// that we're effectively flushing tiny double_multiplier's to zero.
|
||||
// We could conceivably handle values in the range (roughly) [32, 63]
|
||||
// as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
|
||||
// the present handling is just doing 'flush denormals to zero'. We could
|
||||
// reconsider and actually generate nonzero denormals if a need arises.
|
||||
if (shift < -31) {
|
||||
shift = 0;
|
||||
q_fixed = 0;
|
||||
}
|
||||
return {static_cast<int32_t>(q_fixed), shift};
|
||||
}
|
||||
|
||||
} // namespace quant
|
||||
} // namespace mlir
|
35
tensorflow/compiler/mlir/lite/quantization/numerical_utils.h
Normal file
35
tensorflow/compiler/mlir/lite/quantization/numerical_utils.h
Normal file
@ -0,0 +1,35 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
|
||||
#define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
namespace mlir {
|
||||
namespace quant {
|
||||
|
||||
using QuantizedMultiplier = std::pair<int32_t, int32_t>;
|
||||
|
||||
// Decompose double precision multiplier to integer multiplier and exponent.
|
||||
// double_multiplier = int_multiplier * 2 ^ (-31 + exponent)
|
||||
// int_multiplier will be range of (2^31, 2^30].
|
||||
QuantizedMultiplier QuantizeMultiplier(double double_multiplier);
|
||||
|
||||
} // namespace quant
|
||||
} // namespace mlir
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_NUMERICAL_UTILS_H_
|
@ -0,0 +1,57 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace mlir {
|
||||
namespace quant {
|
||||
|
||||
namespace {
|
||||
|
||||
double ComposeScale(const QuantizedMultiplier& input) {
|
||||
return input.first * exp2(-31 + input.second);
|
||||
}
|
||||
|
||||
TEST(DecomposeScale, QuantizeMultiplier) {
|
||||
// Decompose multiplier larger than 1.
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e6)), 1.0e6);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e3)), 1.0e3);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(10.)), 10.);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(5.)), 5.);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(2.)), 2.);
|
||||
|
||||
// Decompose multiplier between 1.0 and 1e-6.
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(0.0)), 0.0);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0)), 1.0);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-1)), 1.0e-1);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-2)), 1.0e-2);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-3)), 1.0e-3);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-4)), 1.0e-4);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-5)), 1.0e-5);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-6)), 1.0e-6);
|
||||
|
||||
// When scale is smaller than 1.0e-6, it is decomposed to {0, 0}.
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-7)), 0.0);
|
||||
ASSERT_FLOAT_EQ(ComposeScale(QuantizeMultiplier(1.0e-8)), 0.0);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace quant
|
||||
} // namespace mlir
|
Loading…
x
Reference in New Issue
Block a user