Clamp f32->f16 quantization to max/min range of float16

PiperOrigin-RevId: 339569171
Change-Id: Ic9695ef175aca449ec905b9d9e5d3893ca07fbd4
This commit is contained in:
T.J. Alumbaugh 2020-10-28 17:14:33 -07:00 committed by TensorFlower Gardener
parent 669993ebe8
commit 612a5fb91e
3 changed files with 46 additions and 4 deletions

View File

@ -181,7 +181,7 @@ tf_cc_test(
"--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
],
data = [
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
":testdata/single_conv_weights_min_0_max_plus_10.bin",
],
tags = [
"tflite_not_portable_android",
@ -196,6 +196,7 @@ tf_cc_test(
"//tensorflow/lite/schema:schema_fbs",
"//tensorflow/lite/schema:schema_utils",
"//tensorflow/lite/testing:util",
"//third_party/eigen3",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest",

View File

@ -502,9 +502,14 @@ TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
// Transform float data to float16.
std::vector<Eigen::half> quantized_buffer;
quantized_buffer.resize(num_elements);
std::transform(
float_vector.begin(), float_vector.end(), quantized_buffer.begin(),
[](float a) { return Eigen::half_impl::float_to_half_rtne(a); });
constexpr float kMaxFloat16Value = 65504.f;
constexpr float kMinFloat16Value = -65504.f;
std::transform(float_vector.begin(), float_vector.end(),
quantized_buffer.begin(), [=](float a) {
float clamped = std::min(std::max(a, kMinFloat16Value),
kMaxFloat16Value);
return Eigen::half_impl::float_to_half_rtne(clamped);
});
char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
model->buffers[tensor->buffer]->data.assign(

View File

@ -575,6 +575,42 @@ TEST_F(QuantizationUtilsTest, SymmetricQuantizeTensor) {
EXPECT_EQ(quant_buffer_size * 4, float_buffer_size);
}
TEST_F(QuantizationUtilsTest, QuantizeFloat16Clamp) {
// Create data.
auto model = absl::make_unique<ModelT>();
auto subgraph = absl::make_unique<tflite::SubGraphT>();
auto tensor = absl::make_unique<TensorT>();
auto buffer = absl::make_unique<tflite::BufferT>();
constexpr int kNumElements = 6;
const std::vector<float> weights = {2.0, 1.0, 65504., 65505, -65504., -99999};
auto weights_reinterpreted_data =
reinterpret_cast<const unsigned char*>(weights.data());
buffer->data.assign(weights_reinterpreted_data,
weights_reinterpreted_data + weights.size() * 4);
tensor->buffer = 0;
tensor->shape = {1, kNumElements};
// Wire the model.
model->subgraphs.push_back(std::move(subgraph));
model->subgraphs[0]->tensors.push_back(std::move(tensor));
model->buffers.push_back(std::move(buffer));
// Call and verify.
EXPECT_EQ(
QuantizeTensorFloat16(model.get(), model->subgraphs[0]->tensors[0].get()),
kTfLiteOk);
auto weightsf16 = reinterpret_cast<Eigen::half*>(
model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data.data());
std::vector<float> wf32(kNumElements);
std::transform(weightsf16, weightsf16 + 6, wf32.begin(), [](Eigen::half a) {
return Eigen::half_impl::half_to_float(a);
});
EXPECT_THAT(wf32,
ElementsAreArray({2.0, 1.0, 65504., 65504., -65504., -65504.}));
EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT16);
}
TEST_F(QuantizationUtilsTest, QuantizeFloat16) {
// Conv model has weights between 0 and 10.
// Quantize the weights tensor.