diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD index 7dfd8b7a6ae..ad9c992f790 100644 --- a/tensorflow/lite/tools/optimize/BUILD +++ b/tensorflow/lite/tools/optimize/BUILD @@ -178,6 +178,7 @@ tf_cc_test( "//tensorflow/lite/tools/optimize:testdata/add_with_const_input.bin", "//tensorflow/lite/tools/optimize:testdata/argmax.bin", "//tensorflow/lite/tools/optimize:testdata/concat.bin", + "//tensorflow/lite/tools/optimize:testdata/fc.bin", "//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin", "//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin", "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin", diff --git a/tensorflow/lite/tools/optimize/quantization_utils.cc b/tensorflow/lite/tools/optimize/quantization_utils.cc index 6c3afe4ef4d..80c14fc0676 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils.cc +++ b/tensorflow/lite/tools/optimize/quantization_utils.cc @@ -252,6 +252,37 @@ TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor, model, tensor); } +TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor, + float input_scale, + float weight_scale) { + // Compute scales. + float scaling_factor = input_scale * weight_scale; + + BufferT* buffer = model->buffers[tensor->buffer].get(); + float* float_data = reinterpret_cast(buffer->data.data()); + int32_t float_data_size = buffer->data.size() / sizeof(float); + uint64_t num_elements; + TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); + + std::vector final_buffer(num_elements); + const int32_t kScale = std::numeric_limits::max(); + + for (int32_t i = 0; i < float_data_size; i++) { + float scaling_factor_inv = (scaling_factor == 0) ? 0 : 1.0 / scaling_factor; + const int32_t quantized_value = + static_cast(TfLiteRound(float_data[i] * scaling_factor_inv)); + final_buffer[i] = std::min(kScale, std::max(-kScale, quantized_value)); + } + + // Set the buffers and output type. + uint8_t* uint8_buffer = reinterpret_cast(final_buffer.data()); + size_t buffer_size = num_elements * sizeof(int32_t); + std::vector scales(1, scaling_factor); + std::vector zero_points(1, 0); + return AddQuantizationParams(scales, zero_points, 0, uint8_buffer, + buffer_size, TensorType_INT32, model, tensor); +} + TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor, float input_scale, const float* weight_scales, diff --git a/tensorflow/lite/tools/optimize/quantization_utils.h b/tensorflow/lite/tools/optimize/quantization_utils.h index 6fe3da813cf..274862a536c 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils.h +++ b/tensorflow/lite/tools/optimize/quantization_utils.h @@ -76,6 +76,11 @@ TfLiteStatus AddQuantizationParams(const std::vector& scales, TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor, int32_t channel_dim_index); +// Symmetrically quantized the bias for per-layer ops (i.e. FullyConnected). +TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor, + float input_scale, + float weight_scale); + // Symmetrically quantizes the bias for ops like Conv and DepthwiseConv. // The scale of bias if weight_per_channel_scale[channel] * input_scale TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor, diff --git a/tensorflow/lite/tools/optimize/quantization_utils_test.cc b/tensorflow/lite/tools/optimize/quantization_utils_test.cc index c36ce64ebca..74813c180cb 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils_test.cc +++ b/tensorflow/lite/tools/optimize/quantization_utils_test.cc @@ -291,6 +291,43 @@ TEST(QuantizationUtilsTest, AddQuantizationParams) { EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT8); } +TEST(QuantizationUtilsTest, SymmetricPerLayerBiasQuantize) { + // Create data. + auto model = absl::make_unique(); + auto subgraph = absl::make_unique(); + auto tensor = absl::make_unique(); + auto buffer = absl::make_unique(); + const float weight_scale = 0.5; + const float input_scale = 0.5; + std::vector bias_data = {4.0, 1.0}; + auto bias_reinterpreted_data = + reinterpret_cast(bias_data.data()); + buffer->data.assign(bias_reinterpreted_data, + bias_reinterpreted_data + bias_data.size() * 4); + tensor->buffer = 0; + tensor->shape = {2, 1, 1, 1}; + tensor->quantization = absl::make_unique(); + + // Wire the model. + model->subgraphs.push_back(std::move(subgraph)); + model->subgraphs[0]->tensors.push_back(std::move(tensor)); + model->buffers.push_back(std::move(buffer)); + + // Call and verify. + EXPECT_EQ(SymmetricPerLayerBiasQuantize(model.get(), + model->subgraphs[0]->tensors[0].get(), + input_scale, weight_scale), + kTfLiteOk); + + EXPECT_THAT(model->subgraphs[0]->tensors[0]->quantization->scale[0], + weight_scale * input_scale); + EXPECT_THAT(model->subgraphs[0]->tensors[0]->quantization->zero_point[0], 0); + + EXPECT_THAT(model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data, + ElementsAreArray({16, 0, 0, 0, 4, 0, 0, 0})); + EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT32); +} + TEST(QuantizationUtilsTest, SymmetricPerChannelBiasQuantize) { // Create data. auto model = absl::make_unique(); diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc index 72385e5b287..b75b96d8e42 100644 --- a/tensorflow/lite/tools/optimize/quantize_model.cc +++ b/tensorflow/lite/tools/optimize/quantize_model.cc @@ -37,36 +37,50 @@ namespace optimize { namespace { TfLiteStatus QuantizeBias(ModelT* model, const TensorT* input_tensor, const TensorT* weight_tensor, TensorT* bias_tensor, - int channel_dim_index, + bool is_per_channel, int channel_dim_index, ErrorReporter* error_reporter) { if (bias_tensor->shape.size() != 1) { error_reporter->Report("Expected bias tensor shape to be 1."); return kTfLiteError; } - if (bias_tensor->shape[0] != weight_tensor->shape[channel_dim_index]) { - error_reporter->Report( - "Channel mismatch between bias and weight tensors %d vs %d", - bias_tensor->shape[0], weight_tensor->shape[channel_dim_index]); - return kTfLiteError; - } int32_t channel_dim_size = bias_tensor->shape[0]; - if (!input_tensor->quantization || - input_tensor->quantization->scale.size() != 1) { - error_reporter->Report("Input tensor missing quantization information"); - return kTfLiteError; - } TF_LITE_ENSURE(error_reporter, weight_tensor->quantization); - const std::vector& weight_scales = weight_tensor->quantization->scale; + std::vector weight_scales = weight_tensor->quantization->scale; - if (weight_scales.size() != channel_dim_size) { - error_reporter->Report("Mismatch weight scale dimension: %d", - weight_scales.size()); - return kTfLiteError; + if (is_per_channel) { + if (bias_tensor->shape[0] != weight_tensor->shape[channel_dim_index]) { + error_reporter->Report( + "Channel mismatch between bias and weight tensors %d vs %d", + bias_tensor->shape[0], weight_tensor->shape[channel_dim_index]); + return kTfLiteError; + } + if (!input_tensor->quantization || + input_tensor->quantization->scale.size() != 1) { + error_reporter->Report("Input tensor missing quantization information"); + return kTfLiteError; + } + + if (weight_scales.size() != channel_dim_size) { + error_reporter->Report("Mismatch weight scale dimension: %d", + weight_scales.size()); + return kTfLiteError; + } + return utils::SymmetricPerChannelBiasQuantize( + model, bias_tensor, input_tensor->quantization->scale[0], + weight_scales.data(), channel_dim_size, channel_dim_index); + } else { + if (weight_scales.size() != 1) { + error_reporter->Report( + "Expected per-layer weight scale dimension size 1, got %d", + weight_scales.size()); + return kTfLiteError; + } + return utils::SymmetricPerLayerBiasQuantize( + model, bias_tensor, input_tensor->quantization->scale[0], + weight_scales[0]); } - return utils::SymmetricPerChannelBiasQuantize( - model, bias_tensor, input_tensor->quantization->scale[0], - weight_scales.data(), channel_dim_size, channel_dim_index); + return kTfLiteError; } // True if the tensor type has to be modified. @@ -478,7 +492,7 @@ TfLiteStatus QuantizeBiases(flatbuffers::FlatBufferBuilder* builder, for (const int bias_idx : property.biases) { if (bias_idx >= op->inputs.size()) { error_reporter->Report( - "Requaired input index %d is larger than the input length of " + "Required input index %d is larger than the input length of " "op %s at index %d in subgraph %d", bias_idx, op->inputs.size(), EnumNameBuiltinOperator(op_code), op_idx, subgraph_idx); @@ -502,8 +516,9 @@ TfLiteStatus QuantizeBiases(flatbuffers::FlatBufferBuilder* builder, subgraph->tensors[op->inputs[property.input_indexes[0]]].get(); TensorT* weight_tensor = subgraph->tensors[op->inputs[property.input_indexes[1]]].get(); - QuantizeBias(model, input_tensor, weight_tensor, bias_tensor, - property.per_axis_index, error_reporter); + TF_LITE_ENSURE_STATUS(QuantizeBias( + model, input_tensor, weight_tensor, bias_tensor, + property.per_axis, property.per_axis_index, error_reporter)); } } } diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc index 55b84deb019..f02e93fd8c8 100644 --- a/tensorflow/lite/tools/optimize/quantize_model_test.cc +++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc @@ -827,6 +827,48 @@ TEST_F(QuantizeArgMaxTest, VerifyArgMax) { subgraph->tensors[op->outputs[0]].get()->type); } +class QuantizeFCTest : public QuantizeModelTest { + protected: + QuantizeFCTest() { + input_model_ = ReadModel(internal::kModelWithFCOp); + readonly_model_ = input_model_->GetModel(); + readonly_model_->UnPackTo(&model_); + } +}; + +TEST_F(QuantizeFCTest, VerifyFC) { + auto status = QuantizeModel(&builder_, &model_, TensorType_INT8, + TensorType_INT8, &error_reporter_); + ASSERT_EQ(kTfLiteOk, status); + + const auto& subgraph = model_.subgraphs[0]; + auto op = subgraph->operators[0].get(); + ASSERT_EQ(model_.operator_codes[op->opcode_index].get()->builtin_code, + BuiltinOperator_FULLY_CONNECTED); + + ASSERT_EQ(op->inputs.size(), 3); + ASSERT_EQ(op->outputs.size(), 1); + + auto float_graph = readonly_model_->subgraphs()->Get(0); + // Verify FC input and weight is quantized. + ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(), + TensorType_FLOAT32); + EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8); + ASSERT_EQ(float_graph->tensors()->Get(op->inputs[1])->type(), + TensorType_FLOAT32); + EXPECT_EQ(subgraph->tensors[op->inputs[1]].get()->type, TensorType_INT8); + + // Verify FC bias should be int32 quantized. + ASSERT_EQ(float_graph->tensors()->Get(op->inputs[2])->type(), + TensorType_FLOAT32); + EXPECT_EQ(subgraph->tensors[op->inputs[2]].get()->type, TensorType_INT32); + + // The output of FC should be quantized. + ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(), + TensorType_FLOAT32); + EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8); +} + } // namespace } // namespace optimize } // namespace tflite diff --git a/tensorflow/lite/tools/optimize/test_util.cc b/tensorflow/lite/tools/optimize/test_util.cc index a1cb73fb4c6..9c5a4799818 100644 --- a/tensorflow/lite/tools/optimize/test_util.cc +++ b/tensorflow/lite/tools/optimize/test_util.cc @@ -43,6 +43,8 @@ const char* kModelWithCustomOp = "custom_op.bin"; const char* kModelWithArgMaxOp = "argmax.bin"; +const char* kModelWithFCOp = "fc.bin"; + int FailOnErrorReporter::Report(const char* format, va_list args) { char buf[1024]; vsnprintf(buf, sizeof(buf), format, args); diff --git a/tensorflow/lite/tools/optimize/test_util.h b/tensorflow/lite/tools/optimize/test_util.h index 941bb22f880..4fcc7283957 100644 --- a/tensorflow/lite/tools/optimize/test_util.h +++ b/tensorflow/lite/tools/optimize/test_util.h @@ -66,6 +66,9 @@ extern const char* kModelWithCustomOp; // Test model with a argmax op. extern const char* kModelWithArgMaxOp; +// Test model with a argmax op. +extern const char* kModelWithFCOp; + // An error reporter that fails on testing. class FailOnErrorReporter : public ErrorReporter { public: diff --git a/tensorflow/lite/tools/optimize/testdata/fc.bin b/tensorflow/lite/tools/optimize/testdata/fc.bin new file mode 100644 index 00000000000..c1ec203cfde Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/fc.bin differ