From 6be604aaacd9d270de01c37ec6e9a9a077397848 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Tue, 4 Aug 2020 12:26:02 -0700 Subject: [PATCH] Reland (Attempt #3) PR #35985: [TFLite int16] 16-bit version of ADD/SUB reference kernel operators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/35985 This PR is one of steps to extend 8-bit quantization to support symmetric 16-bit activations. Each activation is of type int16 and symmetric around zero. The weight tensor precision remains at 8-bit signed values. The bias is set to int64 precision. In this PR we introduce implementation and tests for ADD/SUB kernel reference function. The specification of this operator: SUB   Input 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Input 1:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Output 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0 ADD   Input 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Input 1:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Output 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0 Copybara import of the project: -- b94cb4732ab536828e565fd1c7b557f124432e29 by Elena Zhelezina : Added 16-bit version of ADD/SUB operators. Broadcasting is included. -- 924d0b72c568f249f2fd224a942f8922524bfede by Elena Zhelezina : Addressed reviewer comments. -- dd0d9e8f03d1fb1b887609fffb8ea5a86638c63e by Elena Zhelezina : Added versioning to ADD/SUB + some rework of the existing code. -- abae3fd9a9b894c07d13c9ef416092c9004bc913 by Elena Zhelezina : Added versioning for ADD/SUB with new option in the schema.fbs schema_generated.h is edited manually. -- 24f3f5593a06d24fa1ca6be257f1265b5293d492 by Elena Zhelezina : Fix for broken build. -- d252fe175aef3a1a08c65155815efb706aa80afd by Elena Zhelezina : Fix for the failing internal test for NN delegates. -- 2223a5c380bb821eb05f8034703c687269353e32 by Elena Zhelezina : Fix for asan failures. Change-Id: I2cf421ddda7f9e802202239136ab062bcd63b4aa -- 3c219a46ce5888e8e402b64cc943ac6522156ef5 by Elena Zhelezina : Added broadcast params to addsub structure. Change-Id: I61d7d4a94087d052a782890799211031f6ed3015 -- 9131a38c776109cdbcfa60be602667ec7aafe00f by Elena Zhelezina : Corrected defaults. Change-Id: I9ea50c75014cc03ac91fdef0f5b4fe11395f7074 PiperOrigin-RevId: 324865496 --- tensorflow/lite/c/builtin_op_data.h | 4 + .../lite/core/api/flatbuffer_conversions.cc | 2 + .../experimental/writer/writer_lib_test.cc | 4 + tensorflow/lite/kernels/add.cc | 85 ++++++++++++---- tensorflow/lite/kernels/add_test.cc | 31 ++++-- .../lite/kernels/internal/reference/add.h | 66 ++++++++++--- tensorflow/lite/kernels/register.cc | 6 +- tensorflow/lite/kernels/sub.cc | 97 +++++++++++++++---- tensorflow/lite/kernels/sub_test.cc | 12 +++ tensorflow/lite/schema/schema.fbs | 4 + tensorflow/lite/schema/schema_generated.h | 22 ++++- tensorflow/lite/toco/tflite/op_version.cc | 3 + tensorflow/lite/toco/tflite/operator.cc | 4 +- .../lite/tools/versioning/op_version.cc | 46 ++++++++- tensorflow/lite/tools/versioning/op_version.h | 5 + .../lite/tools/versioning/runtime_version.cc | 3 + 16 files changed, 321 insertions(+), 73 deletions(-) diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h index 232f5f95928..e205f075b43 100644 --- a/tensorflow/lite/c/builtin_op_data.h +++ b/tensorflow/lite/c/builtin_op_data.h @@ -199,6 +199,8 @@ typedef struct { typedef struct { TfLiteFusedActivation activation; + // Parameter added for the version 4. + bool pot_scale_int16; } TfLiteAddParams; typedef struct { @@ -220,6 +222,8 @@ typedef struct { typedef struct { TfLiteFusedActivation activation; + // Parameter added for the version 5. + bool pot_scale_int16; } TfLiteSubParams; typedef struct { diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 0652c64f6c2..7fb04f5b89e 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -896,6 +896,7 @@ TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter, if (schema_params != nullptr) { params->activation = ConvertActivation(schema_params->fused_activation_function()); + params->pot_scale_int16 = schema_params->pot_scale_int16(); } else { // TODO(b/157480169): We should either return kTfLiteError or fill in some // reasonable defaults in the params struct. We are not doing so until we @@ -1631,6 +1632,7 @@ TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter, if (schema_params != nullptr) { params->activation = ConvertActivation(schema_params->fused_activation_function()); + params->pot_scale_int16 = schema_params->pot_scale_int16(); } else { // TODO(b/157480169): We should either return kTfLiteError or fill in some // reasonable defaults in the params struct. We are not doing so until we diff --git a/tensorflow/lite/experimental/writer/writer_lib_test.cc b/tensorflow/lite/experimental/writer/writer_lib_test.cc index fb59482f705..bf50d4944f1 100644 --- a/tensorflow/lite/experimental/writer/writer_lib_test.cc +++ b/tensorflow/lite/experimental/writer/writer_lib_test.cc @@ -47,6 +47,7 @@ TEST(Writer, FloatModelTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); @@ -84,6 +85,7 @@ TEST(Writer, CustomInputOutputTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); @@ -131,6 +133,7 @@ TEST(Writer, CustomInputOutputErrorCasesTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); @@ -173,6 +176,7 @@ TEST(Writer, PerTensorQuantizedModelTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc index bda475bdc35..7692ae9e54b 100644 --- a/tensorflow/lite/kernels/add.cc +++ b/tensorflow/lite/kernels/add.cc @@ -68,6 +68,11 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; + + // This parameter is used to indicate whether + // parameter scale is power of two. + // It is used in 16-bit -> 16-bit quantization. + bool pot_scale_int16; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -103,12 +108,55 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, int16 -> int16 with general rescalings + bool pot_scale_int16 = true; + + bool input1_scale_is_pot = false; + bool input2_scale_is_pot = false; + bool output_scale_is_pot = false; + + int input1_scale_log2_rounded{0}; + int input2_scale_log2_rounded{0}; + int output_scale_log2_rounded{0}; + + if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16) { + // In case of 16-bit, there are two implementation: + // the scale parameter is a general number + // the scale parameter is POT and + // zero_point is zero for inputs/output. + pot_scale_int16 = (input1->params.zero_point == 0) && + (input2->params.zero_point == 0) && + (output->params.zero_point == 0); + + input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); + + input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); + + output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); + + pot_scale_int16 &= + input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; + } + + data->pot_scale_int16 = pot_scale_int16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + !pot_scale_int16) { // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings data->input1_offset = -input1->params.zero_point; data->input2_offset = -input2->params.zero_point; data->output_offset = output->params.zero_point; - data->left_shift = 20; + + // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. + // In case of 16-bit we have 65535 << 15 which is less than 1 << 31, + // therefore the addition will still fit in a 32 bit accumulator. + data->left_shift = !pot_scale_int16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = @@ -144,19 +192,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0); TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - int input1_scale_log2_rounded; - bool input1_scale_is_pot = - CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); TF_LITE_ENSURE(context, input1_scale_is_pot); - - int input2_scale_log2_rounded; - bool input2_scale_is_pot = - CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); TF_LITE_ENSURE(context, input2_scale_is_pot); - - int output_scale_log2_rounded; - bool output_scale_is_pot = - CheckedLog2(output->params.scale, &output_scale_log2_rounded); TF_LITE_ENSURE(context, output_scale_is_pot); data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded; @@ -231,7 +268,8 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input1, const TfLiteTensor* input2, TfLiteTensor* output) { - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + !data->pot_scale_int16) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; op_params.input1_offset = data->input1_offset; @@ -266,6 +304,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TF_LITE_ADD(optimized_integer_ops, Add, int8_t); } } + } else if (output->type == kTfLiteInt16) { + if (need_broadcast) { + TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + reference_ops::Add( + op_params, GetTensorShape(input1), GetTensorData(input1), + GetTensorShape(input2), GetTensorData(input2), + GetTensorShape(output), GetTensorData(output), false); + } } else { if (kernel_type == kReference) { if (need_broadcast) { @@ -283,12 +330,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, } #undef TF_LITE_ADD } else if (output->type == kTfLiteInt16) { + tflite::ArithmeticParams op_params; + op_params.input1_shift = data->input1_shift; + op_params.input2_shift = data->input2_shift; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); #define TF_LITE_ADD(type, opname) \ - tflite::ArithmeticParams op_params; \ - op_params.input1_shift = data->input1_shift; \ - op_params.input2_shift = data->input2_shift; \ - SetActivationParams(data->output_activation_min, \ - data->output_activation_max, &op_params); \ type::opname(op_params, GetTensorShape(input1), \ GetTensorData(input1), GetTensorShape(input2), \ GetTensorData(input2), GetTensorShape(output), \ diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc index bb883dd9b05..fc78f930897 100644 --- a/tensorflow/lite/kernels/add_test.cc +++ b/tensorflow/lite/kernels/add_test.cc @@ -310,15 +310,18 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector> inputs1 = { - {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { - {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { - {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; + std::vector> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7}, + {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0}, + {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}}; + std::vector> inputs2 = {{0.6, 0.4, 0.3, 0.1, -0.1, 0.3}, + {0.6, 0.4, 0.5, -0.8, 0.0, -1.0}, + {0.6, 0.4, -0.8, 0.5, -0.9, 0.1}}; + std::vector> results = {{0.7, 0.6, 0.6, 0.5, 0.8, 1.0}, + {-0.2, 0.6, 0.9, -0.1, 0.1, -1.0}, + {-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}}; for (size_t i = 0; i < inputs1.size(); ++i) { - QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, - {TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, + QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, + {TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, {TensorType_INT16, {}, kMin, kMax}, ActivationFunctionType_NONE); m.QuantizeAndPopulate(m.input1(), inputs1[i]); @@ -439,6 +442,10 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) { QuantizedWithScalarBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) { + QuantizedWithScalarBroadcast(); +} + template void QuantizedWithMixedBroadcast() { float kQuantizedTolerance = GetTolerance(-3.f, 3.f); @@ -501,6 +508,10 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) { QuantizedWithMixedBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) { + QuantizedWithMixedBroadcast(); +} + template void QuantizedWithGenericBroadcast() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -527,5 +538,9 @@ TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) { QuantizedWithGenericBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt16) { + QuantizedWithGenericBroadcast(); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/reference/add.h b/tensorflow/lite/kernels/internal/reference/add.h index 94c58097154..5be7ab4dc0c 100644 --- a/tensorflow/lite/kernels/internal/reference/add.h +++ b/tensorflow/lite/kernels/internal/reference/add.h @@ -51,13 +51,18 @@ inline void Add(const ArithmeticParams& params, // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. + +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template inline void AddElementwise(int size, const ArithmeticParams& params, - const uint8_t* input1_data, - const uint8_t* input2_data, uint8_t* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); + const T* input1_data, const T* input2_data, + T* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits::max()); for (int i = 0; i < size; ++i) { const int32_t input1_val = params.input1_offset + input1_data[i]; @@ -78,7 +83,7 @@ inline void AddElementwise(int size, const ArithmeticParams& params, const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -132,10 +137,38 @@ inline void Add(const ArithmeticParams& params, AddElementwise(flat_size, params, input1_data, input2_data, output_data); } +inline void AddGeneralParamScale(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int16_t* input1_data, + const RuntimeShape& input2_shape, + const int16_t* input2_data, + const RuntimeShape& output_shape, + int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + int max_value = std::numeric_limits::max(); + + TFLITE_DCHECK_GT(params.input1_offset, -max_value); + TFLITE_DCHECK_GT(params.input2_offset, -max_value); + TFLITE_DCHECK_LT(params.input1_offset, max_value); + TFLITE_DCHECK_LT(params.input2_offset, max_value); + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + inline void Add(const ArithmeticParams& params, const RuntimeShape& input1_shape, const int16_t* input1_data, const RuntimeShape& input2_shape, const int16_t* input2_data, - const RuntimeShape& output_shape, int16_t* output_data) { + const RuntimeShape& output_shape, int16_t* output_data, + bool pot_scale = true) { + if (!pot_scale) { + AddGeneralParamScale(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); + return; + } + TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); @@ -258,13 +291,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, } } -inline void BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const uint8_t* input1_data, - const RuntimeShape& input2_shape, - const uint8_t* input2_data, - const RuntimeShape& output_shape, - uint8_t* output_data) { +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template +inline void BroadcastAdd4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -314,7 +348,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + static_cast(clamped_output); } } } diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index adffa19c4e1..1d1db9e0403 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -89,8 +89,8 @@ BuiltinOpResolver::BuiltinOpResolver() { /* min_version = */ 1, /* max_version = */ 3); AddBuiltin(BuiltinOperator_ADD, Register_ADD(), - /* min_version = */ 1, - /* max_version = */ 2); + /* min_version */ 1, + /* max_version */ 4); AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(), /* min_version = */ 1, /* max_version = */ 3); @@ -143,7 +143,7 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version */ 2); AddBuiltin(BuiltinOperator_SUB, Register_SUB(), /* min_version = */ 1, - /* max_version = */ 4); + /* max_version = */ 5); AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version = */ 1, /* max_version = */ 4); diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index 4cd9dd7ff60..f93ebecd46d 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -71,6 +71,11 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; + + // This parameter is used to indicate whether + // parameter scale is power of two. + // It is used in 16-bit -> 16-bit quantization. + bool pot_scale_int16; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -83,13 +88,14 @@ void Free(TfLiteContext* context, void* buffer) { delete reinterpret_cast(buffer); } -TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, - const TfLiteTensor* input_1, - const TfLiteTensor* input_2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* op_params, - int op_sign) { - TF_LITE_ENSURE(context, - output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); +TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, + const TfLiteTensor* input_1, + const TfLiteTensor* input_2, + TfLiteTensor* output, TfLiteSubParams* params, + OpData* op_params, int op_sign) { + TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || + output->type == kTfLiteInt8 || + output->type == kTfLiteInt16); const auto& input1_quantization_params = input_1->params; const auto& input2_quantization_params = input_2->params; const auto& output_quantization_params = output->params; @@ -98,6 +104,9 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, if (output->type == kTfLiteUInt8) { integer_type_min = std::numeric_limits::min(); integer_type_max = std::numeric_limits::max(); + } else if (output->type == kTfLiteInt16) { + integer_type_min = std::numeric_limits::min(); + integer_type_max = std::numeric_limits::max(); } else { // output->type == kTfLiteInt8 integer_type_min = std::numeric_limits::min(); @@ -120,7 +129,11 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point; op_params->output_offset = output_quantization_params.zero_point; - op_params->left_shift = 20; + + // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, + // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 << + // 31, therefore the addition will still fit in a 32 bit accumulator. + op_params->left_shift = output->type == kTfLiteInt16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1_quantization_params.scale, input2_quantization_params.scale); @@ -146,13 +159,15 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &op_params->output_activation_min, &op_params->output_activation_max)); + return kTfLiteOk; } -TfLiteStatus PrepareInt16SubOp(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* data) { +TfLiteStatus PrepareInt16SubOpPOT(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output, TfLiteSubParams* params, + OpData* data) { // 16bit -> 16bit special quantized path, supporting only a rather // narrow case of quantization parameters: zero_points must all be 0 // ("symmetric quantization") and scales must be power-of-two (which @@ -219,12 +234,51 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { - TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, - params, data, -1)); + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings + bool pot_scale_int16 = true; + + bool input1_scale_is_pot = false; + bool input2_scale_is_pot = false; + bool output_scale_is_pot = false; + + int input1_scale_log2_rounded{0}; + int input2_scale_log2_rounded{0}; + int output_scale_log2_rounded{0}; + + if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16) { + // In case of 16-bit, there are two implementation: + // the scale parameter is a general number + // the scale parameter is POT and + // zero_point is zero for inputs/output. + pot_scale_int16 = (input1->params.zero_point == 0) && + (input2->params.zero_point == 0) && + (output->params.zero_point == 0); + + input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); + + input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); + + output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); + + pot_scale_int16 &= + input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; + } + + data->pot_scale_int16 = pot_scale_int16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + !pot_scale_int16) { + TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, + output, params, data, -1)); } else if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, - output, params, data)); + // LSTM-special case with scale parameter of POT + TF_LITE_ENSURE_OK(context, PrepareInt16SubOpPOT(context, input1, input2, + output, params, data)); } return context->ResizeTensor(context, output, output_size); @@ -332,6 +386,15 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } else { TF_LITE_SUB(reference_integer_ops, Add, int8_t); } + } else if (!data->pot_scale_int16) { + if (need_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + reference_ops::Add(op_params, GetTensorShape(input1), + GetTensorData(input1), GetTensorShape(input2), + GetTensorData(input2), GetTensorShape(output), + GetTensorData(output), false); + } } else if (output->type == kTfLiteUInt8) { if (kernel_type == kReference) { if (need_broadcast) { diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 67054fe4903..24d9c251afb 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -304,6 +304,10 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) { QuantizedTestsNoActivation(); } +TEST(QuantizedSubOpModel, QuantizedTestsNoActivationGenericInt16) { + QuantizedTestsNoActivation(); +} + template void QuantizedTestsActivationRELU_N1_TO_1() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -365,6 +369,10 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) { QuantizedVariousInputShapes(); } +TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) { + QuantizedVariousInputShapes(); +} + template void QuantizedWithBroadcast() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); @@ -393,6 +401,10 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) { QuantizedWithBroadcast(); } +TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) { + QuantizedWithBroadcast(); +} + TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index 878acde1e16..baeb49f7b7a 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -583,6 +583,8 @@ table ConcatenationOptions { table AddOptions { fused_activation_function:ActivationFunctionType; + // Parameters supported by version 4. + pot_scale_int16:bool = true; } table MulOptions { @@ -704,6 +706,8 @@ table DepthToSpaceOptions { table SubOptions { fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; } table DivOptions { diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index a6117dc72ab..a4691b70e49 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -4742,22 +4742,29 @@ flatbuffers::Offset CreateConcatenationOptions(flatbuffers struct AddOptionsT : public flatbuffers::NativeTable { typedef AddOptions TableType; + bool pot_scale_int16; tflite::ActivationFunctionType fused_activation_function; AddOptionsT() - : fused_activation_function(tflite::ActivationFunctionType_NONE) { + : pot_scale_int16(true), + fused_activation_function(tflite::ActivationFunctionType_NONE) { } }; struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef AddOptionsT NativeTableType; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 0) != 0; + } tflite::ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && + VerifyField(verifier, VT_POT_SCALE_INT16) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } @@ -5907,22 +5914,29 @@ flatbuffers::Offset CreateDepthToSpaceOptions(flatbuffers:: struct SubOptionsT : public flatbuffers::NativeTable { typedef SubOptions TableType; + bool pot_scale_int16; tflite::ActivationFunctionType fused_activation_function; SubOptionsT() - : fused_activation_function(tflite::ActivationFunctionType_NONE) { + : pot_scale_int16(true), + fused_activation_function(tflite::ActivationFunctionType_NONE) { } }; struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SubOptionsT NativeTableType; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 0) != 0; + } tflite::ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && + VerifyField(verifier, VT_POT_SCALE_INT16) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 567d000dab6..222be969560 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -53,12 +53,15 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kDepthwiseConv, 5}, kPendingReleaseOpVersion}, {{OperatorType::kAdd, 1}, "1.5.0"}, {{OperatorType::kAdd, 2}, "1.14.0"}, + {{OperatorType::kAdd, 3}, kPendingReleaseOpVersion}, {{OperatorType::kAddN, 1}, "1.14.0"}, {{OperatorType::kSpaceToBatchND, 1}, "1.6.0"}, {{OperatorType::kSpaceToBatchND, 2}, "1.14.0"}, {{OperatorType::kSub, 1}, "1.6.0"}, {{OperatorType::kSub, 2}, "1.14.0"}, + {{OperatorType::kSub, 3}, "1.15.0"}, {{OperatorType::kSub, 4}, kPendingReleaseOpVersion}, + {{OperatorType::kSub, 5}, kPendingReleaseOpVersion}, {{OperatorType::kDiv, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 2}, "1.14.0"}, diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 794691f5724..585b15bae2e 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -276,10 +276,10 @@ class Sub : public BuiltinOperator 4) { + if (op_sig.options.addsub.need_broadcast && + op_sig.options.addsub.num_dims > 4) { return 3; } if (op_sig.input_types.at(0) == TensorType_INT8) { @@ -542,7 +560,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { } } return 1; - case BuiltinOperator_ADD: + case BuiltinOperator_SPACE_TO_DEPTH: case BuiltinOperator_SPLIT_V: case BuiltinOperator_SUM: @@ -669,6 +687,26 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, } } break; + case BuiltinOperator_ADD: { + auto add_option = op->builtin_options_as_AddOptions(); + op_sig.options.addsub.pot_scale_int16 = true; + if (add_option) { + op_sig.options.addsub.pot_scale_int16 = add_option->pot_scale_int16(); + } + } break; + + case BuiltinOperator_SUB: { + auto sub_option = op->builtin_options_as_SubOptions(); + op_sig.options.addsub.need_broadcast = + !HaveSameShapes(subgraph, op, 0, 1); + op_sig.options.addsub.num_dims = + std::max(GetNumDims(subgraph, op, 0), GetNumDims(subgraph, op, 1)); + op_sig.options.addsub.pot_scale_int16 = true; + if (sub_option) { + op_sig.options.addsub.pot_scale_int16 = sub_option->pot_scale_int16(); + } + } break; + case BuiltinOperator_LSTM: { auto lstm_option = op->builtin_options_as_LSTMOptions(); if (lstm_option) { @@ -714,7 +752,7 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, case BuiltinOperator_TRANSPOSE: { op_sig.options.single_input_op.num_dims = GetNumDims(subgraph, op, 0); } break; - case BuiltinOperator_SUB: + case BuiltinOperator_DIV: case BuiltinOperator_MAXIMUM: case BuiltinOperator_MINIMUM: { diff --git a/tensorflow/lite/tools/versioning/op_version.h b/tensorflow/lite/tools/versioning/op_version.h index 71362001387..67a7b79fe38 100644 --- a/tensorflow/lite/tools/versioning/op_version.h +++ b/tensorflow/lite/tools/versioning/op_version.h @@ -63,6 +63,11 @@ typedef struct { int32_t num_dims; bool need_broadcast; } broadcast; + struct { + bool pot_scale_int16; + int32_t num_dims; + bool need_broadcast; + } addsub; struct { bool is_per_channel_quantized; } conv_2d; diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index ccbbaa27d68..5a454224b92 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -72,6 +72,8 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_DEPTHWISE_CONV_2D, 6}, "2.3.0"}, {{BuiltinOperator_ADD, 1}, "1.5.0"}, {{BuiltinOperator_ADD, 2}, "1.14.0"}, + {{BuiltinOperator_ADD, 3}, kPendingReleaseVersion}, + {{BuiltinOperator_ADD, 4}, kPendingReleaseVersion}, {{BuiltinOperator_ADD_N, 1}, "1.14.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 1}, "1.6.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 2}, "1.14.0"}, @@ -80,6 +82,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_SUB, 2}, "1.14.0"}, {{BuiltinOperator_SUB, 3}, "2.3.0"}, {{BuiltinOperator_SUB, 4}, kPendingReleaseVersion}, + {{BuiltinOperator_SUB, 5}, kPendingReleaseVersion}, {{BuiltinOperator_DENSIFY, 1}, "2.2.0"}, {{BuiltinOperator_DIV, 1}, "1.6.0"}, {{BuiltinOperator_DIV, 2}, "2.3.0"},