Create quantize multiplier for array.

PiperOrigin-RevId: 229185162
This commit is contained in:
Jian Li 2019-01-14 08:12:13 -08:00 committed by TensorFlower Gardener
parent 6dae7e3ebb
commit 22e458382d
3 changed files with 67 additions and 0 deletions

View File

@ -366,4 +366,13 @@ bool CheckedLog2(const float x, int* log2_result) {
return std::abs(x_log2_fracpart) < 1e-3;
}
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift) {
for (size_t i = 0; i < size; ++i) {
QuantizeMultiplier(effective_scales[i], &effective_scale_significand[i],
&effective_shift[i]);
}
}
} // namespace tflite

View File

@ -275,6 +275,17 @@ void FakeQuantizeArray(const float nudged_scale, const float nudged_min,
// returns false.
bool CheckedLog2(const float x, int* log2_result);
// Decomposes an array of double multipliers into a Q0.31 int32 representation
// of its significand, and shift representation of its exponent.
//
// Handles an arbitrary multiplier. The 'shift' output-value is
// basically the 'floating-point exponent' of the multiplier:
// Negative for a right-shift (when the multiplier is <1), positive for a
// left-shift (when the multiplier is >1)
void QuantizeMultiplierArray(const double* effective_scales, size_t size,
int32_t* effective_scale_significand,
int* effective_shift);
} // namespace tflite
#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_

View File

@ -20,6 +20,7 @@ limitations under the License.
namespace tflite {
namespace {
using ::testing::ElementsAreArray;
using ::testing::Pair;
template <class FloatIn, class IntOut>
@ -406,6 +407,52 @@ TEST(QuantizationUtilTest, CalculateInputRadius) {
EXPECT_EQ(CalculateInputRadius(4, 2), 503316480);
}
TEST(QuantizationUtilTest, QuantizeMultiplierArray) {
const std::vector<double> weights = {-4, -2, -1, -0.5, -0.25, -0.125, 0,
0.125, 0.25, 0.5, 1, 2, 4};
const int size = weights.size();
std::vector<int32> effective_scale_significand(size);
std::vector<int> effective_scale_shift(size);
QuantizeMultiplierArray(weights.data(), size,
effective_scale_significand.data(),
effective_scale_shift.data());
const std::vector<int32> expected_effective_scale_significand = {
-1073741824, // float scale = -4
-1073741824, // float scale = -2
-1073741824, // float scale = -1
-1073741824, // float scale = -0.5
-1073741824, // float scale = -0.25
-1073741824, // float scale = -0.125
0, // float scale = 0
1073741824, // float scale = 0.125
1073741824, // float scale = 0.25
1073741824, // float scale = 0.5
1073741824, // float scale = 1
1073741824, // float scale = 2
1073741824, // float scale = 4
};
const std::vector<int> expected_effective_scale_shift = {
3, // float scale = -4
2, // float scale = -2
1, // float scale = -1
0, // float scale = -0.5
-1, // float scale = -0.25
-2, // float scale = -0.125
0, // float scale = 0
-2, // float scale = 0.125
-1, // float scale = 0.25
0, // float scale = 0.5
1, // float scale = 1
2, // float scale = 2
3, // float scale = 4
};
EXPECT_THAT(effective_scale_significand,
ElementsAreArray(expected_effective_scale_significand));
EXPECT_THAT(effective_scale_shift,
ElementsAreArray(expected_effective_scale_shift));
}
} // namespace
} // namespace tflite