From 4d1c107bef1c06835371179c9474655e6810ed91 Mon Sep 17 00:00:00 2001 From: Yunlu Li Date: Wed, 9 Dec 2020 16:47:41 -0800 Subject: [PATCH] Add hybrid BatchMatMul kernel that supports legacy symmetric_quantize_inputs. PiperOrigin-RevId: 346666303 Change-Id: Ife2d74a25aa24a8444c86741dc57b23d6de66ad6 --- RELEASE.md | 5 +- .../lite/core/api/flatbuffer_conversions.cc | 2 + tensorflow/lite/kernels/batch_matmul.cc | 14 +- tensorflow/lite/kernels/batch_matmul_test.cc | 168 ++++++++++++++++-- tensorflow/lite/kernels/register.cc | 2 +- .../lite/tools/versioning/runtime_version.cc | 1 + 6 files changed, 172 insertions(+), 20 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 0e3eb0e0271..a9399974b47 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -58,10 +58,11 @@ directly. * 16 bits quantization * Added int16x8 support for ABS, REDUCE_MAX and REDUCE_MIN operators. - * Added support for saved model's session initializer through + * Added support for saved model's session initializer through `TFLiteConverter.from_saved_model`. - * Added dynamic range quantization support for the BatchMatMul op. * Added DEPTH_TO_SPACE support in Post training quantization. + * Added dynamic range quantization support for the BatchMatMul op. + * Both symmetric and asymmetric quantized input tensor are supported. * Add `RFFT2D` as builtin op. (`RFFT2D` also supports `RFFTD`.) Currently only supports float32 input. * TFLite Supports SingatureDef: diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 14000f93cd1..94fa7a6a094 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -765,6 +765,8 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, op->builtin_options_as_BatchMatMulOptions()) { params->adj_x = bmm_params->adj_x(); params->adj_y = bmm_params->adj_y(); + params->asymmetric_quantize_inputs = + bmm_params->asymmetric_quantize_inputs(); } *builtin_data = params.release(); return kTfLiteOk; diff --git a/tensorflow/lite/kernels/batch_matmul.cc b/tensorflow/lite/kernels/batch_matmul.cc index 5f6afa3d14f..23c283355cf 100644 --- a/tensorflow/lite/kernels/batch_matmul.cc +++ b/tensorflow/lite/kernels/batch_matmul.cc @@ -450,6 +450,8 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, OpData* data, TfLiteTensor* scaling_factors, TfLiteTensor* accum_scratch, TfLiteTensor* row_sums, TfLiteTensor* input_offsets, TfLiteTensor* output) { + const auto* params = + reinterpret_cast(node->builtin_data); const int32_t num_input_dims = input_shape.DimensionsCount(); // Input row/cols have been swapped at this point, so dims are @@ -465,18 +467,20 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, OpData* data, float* scaling_factors_ptr = GetTensorData(scaling_factors); int32_t* input_offset_ptr = nullptr; int32_t* row_sums_ptr = nullptr; - // Only asymmetric quantization is supported. input_offset_ptr = GetTensorData(input_offsets); row_sums_ptr = GetTensorData(row_sums); + if (!params->asymmetric_quantize_inputs) { + memset(input_offset_ptr, 0, input_offsets->bytes); + } int8_t* quant_data = GetTensorData(input_quantized); const int8_t* filter_data = GetTensorData(filter); const float* input_ptr = GetTensorData(input); // Quantize each batch independently. + tensor_utils::BatchQuantizeFloats(input_ptr, num_batches_to_quantize, + input_size, quant_data, scaling_factors_ptr, + input_offset_ptr, + params->asymmetric_quantize_inputs); for (int b = 0; b < num_batches_to_quantize; ++b) { - const int offset = b * input_size; - tensor_utils::AsymmetricQuantizeFloats( - input_ptr + offset, input_size, quant_data + offset, - &scaling_factors_ptr[b], &input_offset_ptr[b]); // Incorporate scaling of the filter. scaling_factors_ptr[b] *= filter->params.scale; } diff --git a/tensorflow/lite/kernels/batch_matmul_test.cc b/tensorflow/lite/kernels/batch_matmul_test.cc index 7abef73d5a2..29750693daa 100644 --- a/tensorflow/lite/kernels/batch_matmul_test.cc +++ b/tensorflow/lite/kernels/batch_matmul_test.cc @@ -281,12 +281,12 @@ INSTANTIATE_TEST_SUITE_P( // In the hybrid model the weights are quantized int8. But the input // and output are expected to be in float precision. -class HybridAsymmetricBatchMatMulOpModel : public SingleOpModel { +class HybridBatchMatMulOpModel : public SingleOpModel { public: - HybridAsymmetricBatchMatMulOpModel( - int units, int batches, const TensorData& lhs, const TensorData& rhs, - const TensorData& output = {TensorType_FLOAT32}, bool adj_x = false, - bool adj_y = false) + HybridBatchMatMulOpModel(int units, int batches, const TensorData& lhs, + const TensorData& rhs, + const TensorData& output = {TensorType_FLOAT32}, + bool asymmetric_quantize_inputs = true) : units_(units), batches_(batches) { int total_input_size = 1; for (size_t i = 0; i < lhs.shape.size(); ++i) { @@ -299,9 +299,11 @@ class HybridAsymmetricBatchMatMulOpModel : public SingleOpModel { output_id_ = AddOutput(output); - SetBuiltinOp(BuiltinOperator_BATCH_MATMUL, - BuiltinOptions_BatchMatMulOptions, - CreateBatchMatMulOptions(builder_, adj_x, adj_y).Union()); + SetBuiltinOp( + BuiltinOperator_BATCH_MATMUL, BuiltinOptions_BatchMatMulOptions, + CreateBatchMatMulOptions(builder_, /*adj_x=*/false, /*adj_y=*/false, + asymmetric_quantize_inputs) + .Union()); BuildInterpreter({GetShape(lhs_id_), GetShape(rhs_id_)}); } void SetWeights(const std::vector& data) { @@ -340,7 +342,7 @@ class HybridAsymmetricBatchMatMulOpTest : public SingleOpTest { }; TEST_P(HybridAsymmetricBatchMatMulOpTest, SimpleTestQuantizedInt8) { - HybridAsymmetricBatchMatMulOpModel m( + HybridBatchMatMulOpModel m( /*units=*/3, /*batches=*/2, /*lhs=*/{TensorType_FLOAT32, {2, 10}}, /*rhs=*/{TensorType_INT8, {10, 3}, 0, 0, 10.0 / 127.0, 0}); @@ -371,7 +373,7 @@ TEST_P(HybridAsymmetricBatchMatMulOpTest, SimpleTestQuantizedInt8) { } TEST_P(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastWeights) { - HybridAsymmetricBatchMatMulOpModel m( + HybridBatchMatMulOpModel m( /*units=*/3, /*batches=*/2, /*lhs=*/{TensorType_FLOAT32, {2, 2, 10}}, /*rhs=*/{TensorType_INT8, {10, 3}, 0, 0, 10.0 / 127.0, 0}); @@ -402,7 +404,7 @@ TEST_P(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastWeights) { } TEST_P(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastBigWeights) { - HybridAsymmetricBatchMatMulOpModel m( + HybridBatchMatMulOpModel m( /*units=*/9, /*batches=*/2, /*lhs=*/{TensorType_FLOAT32, {2, 2, 10}}, /*rhs=*/{TensorType_INT8, {10, 9}, 0, 0, 10.0 / 127.0, 0}); @@ -437,7 +439,7 @@ TEST_P(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastBigWeights) { } TEST_P(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastInputs) { - HybridAsymmetricBatchMatMulOpModel m( + HybridBatchMatMulOpModel m( /*units=*/3, /*batches=*/2, /*lhs=*/{TensorType_FLOAT32, {2, 10}}, /*rhs=*/{TensorType_INT8, {2, 10, 3}, 0, 0, 10.0 / 127.0, 0}); @@ -470,6 +472,148 @@ INSTANTIATE_TEST_SUITE_P( HybridAsymmetricBatchMatMulOpTest, HybridAsymmetricBatchMatMulOpTest, ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap))); +class HybridSymmetricBatchMatMulOpTest : public SingleOpTest { + protected: + const std::map& GetKernelMap() override { + return *kKernelMap; + } +}; + +TEST_P(HybridSymmetricBatchMatMulOpTest, SimpleTestQuantizedInt8) { + HybridBatchMatMulOpModel m( + /*units=*/3, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 10}}, + /*rhs=*/{TensorType_INT8, {10, 3}, 0, 0, 10.0 / 127.0, 0}, + /*output=*/{TensorType_FLOAT32}, /*asymmetric_quantize_inputs=*/false); + + m.SetSignedWeights({ + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + }); + + m.SetInput({ + 11, 12, 13, 14, 15, 16, 17, 18, -19, -20, // batch 1, 0 + 11, 12, 13, 14, 15, 16, 17, -18, 19, -20, // batch 1, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + { + 194, + 194, + 194, + 248, + 248, + 248, + }, + /*max_abs_error=*/0.64f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 3})); +} + +TEST_P(HybridSymmetricBatchMatMulOpTest, QuantizedInt8BroadcastWeights) { + HybridBatchMatMulOpModel m( + /*units=*/3, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 2, 10}}, + /*rhs=*/{TensorType_INT8, {10, 3}, 0, 0, 10.0 / 127.0, 0}, + /*output=*/{TensorType_FLOAT32}, /*asymmetric_quantize_inputs=*/false); + + m.SetSignedWeights({ + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + }); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // batch 0, 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // batch 0, 1 + 11, 12, 13, 14, 15, 16, 17, 18, -19, -20, // batch 1, 0 + 11, 12, 13, 14, 15, 16, 17, -18, 19, -20, // batch 1, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + { + 24, 24, 24, // + 56, 56, 56, // + 194, 194, 194, // + 248, 248, 248, // + }, + /*max_abs_error=*/1.3f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 3})); +} + +TEST_P(HybridSymmetricBatchMatMulOpTest, QuantizedInt8BroadcastBigWeights) { + HybridBatchMatMulOpModel m( + /*units=*/9, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 2, 10}}, + /*rhs=*/{TensorType_INT8, {10, 9}, 0, 0, 10.0 / 127.0, 0}, + {TensorType_FLOAT32}, false); + + m.SetSignedWeights({ + 1, 1, 1, 17, 17, 17, 26, 26, 26, 2, 2, 2, 18, 18, 18, 27, 27, 27, + 3, 3, 3, 19, 19, 19, 28, 28, 28, 4, 4, 4, 20, 20, 20, 29, 29, 29, + 5, 5, 5, 21, 21, 21, 30, 30, 30, 6, 6, 6, 22, 22, 22, 31, 31, 31, + 7, 7, 7, 23, 23, 23, 32, 32, 32, 8, 8, 8, 24, 24, 24, 33, 33, 33, + 9, 9, 9, 25, 25, 25, 34, 34, 34, 10, 10, 10, 26, 26, 26, 35, 35, 35, + }); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // batch 0, 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // batch 0, 1 + 11, 12, 13, 14, 15, 16, 17, 18, -19, -20, // batch 1, 0 + 11, 12, 13, 14, 15, 16, 17, -18, 19, -20, // batch 1, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + { + 23, 23, 23, 296, 296, 296, 451, 451, 451, // + 58, 58, 58, 362, 362, 362, 529, 529, 529, // + 193, 193, 193, 1424, 1424, 1424, 2118, 2118, 2118, // + 253, 253, 253, 1519, 1519, 1519, 2223, 2223, 2223 // + }, + /*max_abs_error=*/1.3f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 9})); +} + +TEST_P(HybridSymmetricBatchMatMulOpTest, QuantizedInt8BroadcastInputs) { + HybridBatchMatMulOpModel m( + /*units=*/3, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 10}}, + /*rhs=*/{TensorType_INT8, {2, 10, 3}, 0, 0, 10.0 / 127.0, 0}, + {TensorType_FLOAT32}, false); + + m.SetSignedWeights({ + 1, -3, 1, 2, -2, 2, 3, -1, 3, 4, 0, 4, 5, 1, 5, 6, 2, 6, 7, 3, + 7, 8, 4, 8, 9, 5, 9, 10, 6, 10, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, + 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + }); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // batch 0, 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // batch 0, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + { + 24, -45, 24, // + 56, -19, 56, // + 24, 24, 24, // + 56, 56, 56, // + }, + /*max_abs_error=*/0.64f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 3})); +} + +INSTANTIATE_TEST_SUITE_P( + HybridSymmetricBatchMatMulOpTest, HybridSymmetricBatchMatMulOpTest, + ::testing::ValuesIn(SingleOpTest::GetKernelTags(*kKernelMap))); + class QuantizedBatchMatMulOpModel : public SingleOpModel { public: QuantizedBatchMatMulOpModel(int units, int batches, const TensorData& lhs, diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 9a6c28fd633..a57f358f1ab 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -301,7 +301,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_SEGMENT_SUM, Register_SEGMENT_SUM()); AddBuiltin(BuiltinOperator_BATCH_MATMUL, Register_BATCH_MATMUL(), /* min_version = */ 1, - /* max_version = */ 3); + /* max_version = */ 4); AddBuiltin(BuiltinOperator_CUMSUM, Register_CUMSUM()); // The version one of broadcast to op won't be not supported since the version // one was rollbacked and the builtin op code number has been changed because diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index dc3a5b93366..3b418d9f526 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -61,6 +61,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_BATCH_MATMUL, 1}, "2.3.0"}, {{BuiltinOperator_BATCH_MATMUL, 2}, "2.3.0"}, {{BuiltinOperator_BATCH_MATMUL, 3}, "2.4.0"}, + {{BuiltinOperator_BATCH_MATMUL, 4}, kPendingReleaseVersion}, // The version one of broadcast to op won't be not supported since // the version one was rollbacked and the builtin op code number // has been changed because of builtin op code shortage problem.