Clamp f32->f16 quantization to max/min range of float16
PiperOrigin-RevId: 339569171 Change-Id: Ic9695ef175aca449ec905b9d9e5d3893ca07fbd4
This commit is contained in:
parent
669993ebe8
commit
612a5fb91e
@ -181,7 +181,7 @@ tf_cc_test(
|
||||
"--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)",
|
||||
],
|
||||
data = [
|
||||
"//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin",
|
||||
":testdata/single_conv_weights_min_0_max_plus_10.bin",
|
||||
],
|
||||
tags = [
|
||||
"tflite_not_portable_android",
|
||||
@ -196,6 +196,7 @@ tf_cc_test(
|
||||
"//tensorflow/lite/schema:schema_fbs",
|
||||
"//tensorflow/lite/schema:schema_utils",
|
||||
"//tensorflow/lite/testing:util",
|
||||
"//third_party/eigen3",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_googletest//:gtest",
|
||||
|
@ -502,9 +502,14 @@ TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
|
||||
// Transform float data to float16.
|
||||
std::vector<Eigen::half> quantized_buffer;
|
||||
quantized_buffer.resize(num_elements);
|
||||
std::transform(
|
||||
float_vector.begin(), float_vector.end(), quantized_buffer.begin(),
|
||||
[](float a) { return Eigen::half_impl::float_to_half_rtne(a); });
|
||||
constexpr float kMaxFloat16Value = 65504.f;
|
||||
constexpr float kMinFloat16Value = -65504.f;
|
||||
std::transform(float_vector.begin(), float_vector.end(),
|
||||
quantized_buffer.begin(), [=](float a) {
|
||||
float clamped = std::min(std::max(a, kMinFloat16Value),
|
||||
kMaxFloat16Value);
|
||||
return Eigen::half_impl::float_to_half_rtne(clamped);
|
||||
});
|
||||
|
||||
char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
|
||||
model->buffers[tensor->buffer]->data.assign(
|
||||
|
@ -575,6 +575,42 @@ TEST_F(QuantizationUtilsTest, SymmetricQuantizeTensor) {
|
||||
EXPECT_EQ(quant_buffer_size * 4, float_buffer_size);
|
||||
}
|
||||
|
||||
TEST_F(QuantizationUtilsTest, QuantizeFloat16Clamp) {
|
||||
// Create data.
|
||||
auto model = absl::make_unique<ModelT>();
|
||||
auto subgraph = absl::make_unique<tflite::SubGraphT>();
|
||||
auto tensor = absl::make_unique<TensorT>();
|
||||
auto buffer = absl::make_unique<tflite::BufferT>();
|
||||
constexpr int kNumElements = 6;
|
||||
const std::vector<float> weights = {2.0, 1.0, 65504., 65505, -65504., -99999};
|
||||
auto weights_reinterpreted_data =
|
||||
reinterpret_cast<const unsigned char*>(weights.data());
|
||||
buffer->data.assign(weights_reinterpreted_data,
|
||||
weights_reinterpreted_data + weights.size() * 4);
|
||||
tensor->buffer = 0;
|
||||
tensor->shape = {1, kNumElements};
|
||||
|
||||
// Wire the model.
|
||||
model->subgraphs.push_back(std::move(subgraph));
|
||||
model->subgraphs[0]->tensors.push_back(std::move(tensor));
|
||||
model->buffers.push_back(std::move(buffer));
|
||||
|
||||
// Call and verify.
|
||||
EXPECT_EQ(
|
||||
QuantizeTensorFloat16(model.get(), model->subgraphs[0]->tensors[0].get()),
|
||||
kTfLiteOk);
|
||||
auto weightsf16 = reinterpret_cast<Eigen::half*>(
|
||||
model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data.data());
|
||||
std::vector<float> wf32(kNumElements);
|
||||
std::transform(weightsf16, weightsf16 + 6, wf32.begin(), [](Eigen::half a) {
|
||||
return Eigen::half_impl::half_to_float(a);
|
||||
});
|
||||
|
||||
EXPECT_THAT(wf32,
|
||||
ElementsAreArray({2.0, 1.0, 65504., 65504., -65504., -65504.}));
|
||||
EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT16);
|
||||
}
|
||||
|
||||
TEST_F(QuantizationUtilsTest, QuantizeFloat16) {
|
||||
// Conv model has weights between 0 and 10.
|
||||
// Quantize the weights tensor.
|
||||
|
Loading…
x
Reference in New Issue
Block a user