diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD index 185d1d69ce0..b7487b33fae 100644 --- a/tensorflow/lite/tools/optimize/BUILD +++ b/tensorflow/lite/tools/optimize/BUILD @@ -181,7 +181,7 @@ tf_cc_test( "--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)", ], data = [ - "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin", + ":testdata/single_conv_weights_min_0_max_plus_10.bin", ], tags = [ "tflite_not_portable_android", @@ -196,6 +196,7 @@ tf_cc_test( "//tensorflow/lite/schema:schema_fbs", "//tensorflow/lite/schema:schema_utils", "//tensorflow/lite/testing:util", + "//third_party/eigen3", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest", diff --git a/tensorflow/lite/tools/optimize/quantization_utils.cc b/tensorflow/lite/tools/optimize/quantization_utils.cc index 81110071dc9..d2e04338375 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils.cc +++ b/tensorflow/lite/tools/optimize/quantization_utils.cc @@ -502,9 +502,14 @@ TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) { // Transform float data to float16. std::vector quantized_buffer; quantized_buffer.resize(num_elements); - std::transform( - float_vector.begin(), float_vector.end(), quantized_buffer.begin(), - [](float a) { return Eigen::half_impl::float_to_half_rtne(a); }); + constexpr float kMaxFloat16Value = 65504.f; + constexpr float kMinFloat16Value = -65504.f; + std::transform(float_vector.begin(), float_vector.end(), + quantized_buffer.begin(), [=](float a) { + float clamped = std::min(std::max(a, kMinFloat16Value), + kMaxFloat16Value); + return Eigen::half_impl::float_to_half_rtne(clamped); + }); char* half_buffer = reinterpret_cast(quantized_buffer.data()); model->buffers[tensor->buffer]->data.assign( diff --git a/tensorflow/lite/tools/optimize/quantization_utils_test.cc b/tensorflow/lite/tools/optimize/quantization_utils_test.cc index 4ce0d01fd12..76648eaafac 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils_test.cc +++ b/tensorflow/lite/tools/optimize/quantization_utils_test.cc @@ -575,6 +575,42 @@ TEST_F(QuantizationUtilsTest, SymmetricQuantizeTensor) { EXPECT_EQ(quant_buffer_size * 4, float_buffer_size); } +TEST_F(QuantizationUtilsTest, QuantizeFloat16Clamp) { + // Create data. + auto model = absl::make_unique(); + auto subgraph = absl::make_unique(); + auto tensor = absl::make_unique(); + auto buffer = absl::make_unique(); + constexpr int kNumElements = 6; + const std::vector weights = {2.0, 1.0, 65504., 65505, -65504., -99999}; + auto weights_reinterpreted_data = + reinterpret_cast(weights.data()); + buffer->data.assign(weights_reinterpreted_data, + weights_reinterpreted_data + weights.size() * 4); + tensor->buffer = 0; + tensor->shape = {1, kNumElements}; + + // Wire the model. + model->subgraphs.push_back(std::move(subgraph)); + model->subgraphs[0]->tensors.push_back(std::move(tensor)); + model->buffers.push_back(std::move(buffer)); + + // Call and verify. + EXPECT_EQ( + QuantizeTensorFloat16(model.get(), model->subgraphs[0]->tensors[0].get()), + kTfLiteOk); + auto weightsf16 = reinterpret_cast( + model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data.data()); + std::vector wf32(kNumElements); + std::transform(weightsf16, weightsf16 + 6, wf32.begin(), [](Eigen::half a) { + return Eigen::half_impl::half_to_float(a); + }); + + EXPECT_THAT(wf32, + ElementsAreArray({2.0, 1.0, 65504., 65504., -65504., -65504.})); + EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT16); +} + TEST_F(QuantizationUtilsTest, QuantizeFloat16) { // Conv model has weights between 0 and 10. // Quantize the weights tensor.