From b94cb4732ab536828e565fd1c7b557f124432e29 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina <elena.zhelezina@arm.com> Date: Thu, 19 Dec 2019 09:09:38 +0000 Subject: [PATCH] Added 16-bit version of ADD/SUB operators. Broadcasting is included. --- tensorflow/lite/kernels/add.cc | 31 +++++++-- tensorflow/lite/kernels/add_test.cc | 31 ++++++--- .../lite/kernels/internal/reference/add.h | 54 +++++++++++----- tensorflow/lite/kernels/sub.cc | 63 ++++++++++++++----- tensorflow/lite/kernels/sub_test.cc | 12 ++++ 5 files changed, 147 insertions(+), 44 deletions(-) diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc index d9b8c87eeb7..7ad744b4910 100644 --- a/tensorflow/lite/kernels/add.cc +++ b/tensorflow/lite/kernels/add.cc @@ -93,12 +93,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings + bool general_16bit = input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + general_16bit) { // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings data->input1_offset = -input1->params.zero_point; data->input2_offset = -input2->params.zero_point; data->output_offset = output->params.zero_point; - data->left_shift = 20; + + // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. + // In case of 16-bit we have 65535 << 15 which is less than 1 << 31, + // therefore the addition will still fit in a 32 bit accumulator. + data->left_shift = general_16bit ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = @@ -221,7 +233,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input1, const TfLiteTensor* input2, TfLiteTensor* output) { - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + bool general_16bit = input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + general_16bit) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; op_params.input1_offset = data->input1_offset; @@ -256,6 +273,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TF_LITE_ADD(optimized_integer_ops, Add, int8_t); } } + } else if (output->type == kTfLiteInt16) { + if (need_broadcast) { + TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + TF_LITE_ADD(reference_ops, Add, int16_t); + } } else { if (kernel_type == kReference) { if (need_broadcast) { @@ -286,7 +309,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, // The quantized version of Add doesn't support activations, so we // always use BroadcastAdd. if (kernel_type == kReference) { - TF_LITE_ADD(reference_ops, Add); + TF_LITE_ADD(reference_ops, AddLSTM); } else { TF_LITE_ADD(optimized_ops, Add); } diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc index 267b80564c9..1a243c7a4e6 100644 --- a/tensorflow/lite/kernels/add_test.cc +++ b/tensorflow/lite/kernels/add_test.cc @@ -306,15 +306,18 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector<std::vector<float>> inputs1 = { - {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector<std::vector<float>> inputs2 = { - {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector<std::vector<float>> results = { - {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; + std::vector<std::vector<float>> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7}, + {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0}, + {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}}; + std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.3, 0.1, -0.1, 0.3}, + {0.6, 0.4, 0.5, -0.8, 0.0, -1.0}, + {0.6, 0.4, -0.8, 0.5, -0.9, 0.1}}; + std::vector<std::vector<float>> results = {{0.7, 0.6, 0.6, 0.5, 0.8, 1.0}, + {-0.2, 0.6, 0.9, -0.1, 0.1, -1.0}, + {-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}}; for (size_t i = 0; i < inputs1.size(); ++i) { - QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, - {TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, + QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, + {TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, {TensorType_INT16, {}, kMin, kMax}, ActivationFunctionType_NONE); m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]); @@ -435,6 +438,10 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) { QuantizedWithScalarBroadcast<TensorType_INT8, int8_t>(); } +TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) { + QuantizedWithScalarBroadcast<TensorType_INT16, int16_t>(); +} + template <enum TensorType tensor_type, typename integer_dtype> void QuantizedWithMixedBroadcast() { float kQuantizedTolerance = GetTolerance(-3.f, 3.f); @@ -497,6 +504,10 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) { QuantizedWithMixedBroadcast<TensorType_INT8, int8_t>(); } +TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) { + QuantizedWithMixedBroadcast<TensorType_INT16, int16_t>(); +} + template <enum TensorType tensor_type, typename integer_dtype> void QuantizedWithGenericBroadcast() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -523,5 +534,9 @@ TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) { QuantizedWithGenericBroadcast<TensorType_INT8, int8_t>(); } +TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt16) { + QuantizedWithGenericBroadcast<TensorType_INT16, int16_t>(); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/reference/add.h b/tensorflow/lite/kernels/internal/reference/add.h index d0c40912091..c1b0163640b 100644 --- a/tensorflow/lite/kernels/internal/reference/add.h +++ b/tensorflow/lite/kernels/internal/reference/add.h @@ -51,13 +51,18 @@ inline void Add(const ArithmeticParams& params, // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. + +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template <typename T> inline void AddElementwise(int size, const ArithmeticParams& params, - const uint8* input1_data, const uint8* input2_data, - uint8* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); + const T* input1_data, const T* input2_data, + T* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max()); + TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max()); + TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max()); + TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max()); for (int i = 0; i < size; ++i) { const int32 input1_val = params.input1_offset + input1_data[i]; @@ -78,7 +83,7 @@ inline void AddElementwise(int size, const ArithmeticParams& params, const int32 clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast<uint8>(clamped_output); + output_data[i] = static_cast<T>(clamped_output); } } @@ -138,6 +143,24 @@ inline void Add(const ArithmeticParams& params, const RuntimeShape& output_shape, int16* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + int max_value = std::numeric_limits<int16>::max(); + + TFLITE_DCHECK_GT(params.input1_offset, -max_value); + TFLITE_DCHECK_GT(params.input2_offset, -max_value); + TFLITE_DCHECK_LT(params.input1_offset, max_value); + TFLITE_DCHECK_LT(params.input2_offset, max_value); + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + +inline void AddLSTM(const ArithmeticParams& params, + const RuntimeShape& input1_shape, const int16* input1_data, + const RuntimeShape& input2_shape, const int16* input2_data, + const RuntimeShape& output_shape, int16* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); const int input1_shift = params.input1_shift; const int flat_size = @@ -257,13 +280,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, } } -inline void BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const uint8* input1_data, - const RuntimeShape& input2_shape, - const uint8* input2_data, - const RuntimeShape& output_shape, - uint8* output_data) { +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template <typename T> +inline void BroadcastAdd4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -313,7 +337,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast<uint8>(clamped_output); + static_cast<T>(clamped_output); } } } diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index f2913faeb76..077533c7338 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -72,13 +72,14 @@ void Free(TfLiteContext* context, void* buffer) { delete reinterpret_cast<OpData*>(buffer); } -TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, - const TfLiteTensor* input_1, - const TfLiteTensor* input_2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* op_params, - int op_sign) { - TF_LITE_ENSURE(context, - output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); +TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, + const TfLiteTensor* input_1, + const TfLiteTensor* input_2, + TfLiteTensor* output, TfLiteSubParams* params, + OpData* op_params, int op_sign) { + TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || + output->type == kTfLiteInt8 || + output->type == kTfLiteInt16); const auto& input1_quantization_params = input_1->params; const auto& input2_quantization_params = input_2->params; const auto& output_quantization_params = output->params; @@ -87,6 +88,9 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, if (output->type == kTfLiteUInt8) { integer_type_min = std::numeric_limits<uint8_t>::min(); integer_type_max = std::numeric_limits<uint8_t>::max(); + } else if (output->type == kTfLiteInt16) { + integer_type_min = std::numeric_limits<int16_t>::min(); + integer_type_max = std::numeric_limits<int16_t>::max(); } else { // output->type == kTfLiteInt8 integer_type_min = std::numeric_limits<int8_t>::min(); @@ -109,7 +113,11 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point; op_params->output_offset = output_quantization_params.zero_point; - op_params->left_shift = 20; + + // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, + // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 << + // 31, therefore the addition will still fit in a 32 bit accumulator. + op_params->left_shift = output->type == kTfLiteInt16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1_quantization_params.scale, input2_quantization_params.scale); @@ -135,13 +143,14 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &op_params->output_activation_min, &op_params->output_activation_max)); + return kTfLiteOk; } -TfLiteStatus PrepareInt16SubOp(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* data) { +TfLiteStatus PrepareLSTMSubOp(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + TfLiteSubParams* params, OpData* data) { // 16bit -> 16bit special quantized path, supporting only a rather // narrow case of quantization parameters: zero_points must all be 0 // ("symmetric quantization") and scales must be power-of-two (which @@ -208,12 +217,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { - TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, - params, data, -1)); + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings + + bool general_16bit = output->type == kTfLiteInt16 && + input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + general_16bit) { + TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, + output, params, data, -1)); } else if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, - output, params, data)); + // LSTM-special case with scale parameter of POT + TF_LITE_ENSURE_OK(context, PrepareLSTMSubOp(context, input1, input2, output, + params, data)); } return context->ResizeTensor(context, output, output_size); @@ -288,6 +306,11 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const bool need_broadcast = optimized_ops::ProcessBroadcastShapes( GetTensorShape(input1), GetTensorShape(input2), &op_params); + // 16bit -> 16bit with general rescaling + bool general_16bit = output->type == kTfLiteInt16 && + input1->type == kTfLiteInt16 && + input2->type == kTfLiteInt16; + #define TF_LITE_SUB(type, opname, data_type) \ type::opname(op_params, GetTensorShape(input1), \ GetTensorData<data_type>(input1), GetTensorShape(input2), \ @@ -301,6 +324,12 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } else { TF_LITE_SUB(reference_integer_ops, Add, int8_t); } + } else if (general_16bit) { + if (need_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + TF_LITE_SUB(reference_ops, Add, int16_t); + } } else if (output->type == kTfLiteUInt8) { if (kernel_type == kReference) { if (need_broadcast) { diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 24b554f087b..b5363a038a3 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -226,6 +226,10 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) { QuantizedTestsNoActivation<TensorType_INT8, int8_t>(); } +TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16Generic) { + QuantizedTestsNoActivation<TensorType_INT16, int16_t>(); +} + template <TensorType tensor_type, typename integer_dtype> void QuantizedTestsActivationRELU_N1_TO_1() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -287,6 +291,10 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) { QuantizedVariousInputShapes<TensorType_INT8, int8_t>(); } +TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) { + QuantizedVariousInputShapes<TensorType_INT16, int16_t>(); +} + template <TensorType tensor_type, typename integer_dtype> void QuantizedWithBroadcast() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); @@ -315,6 +323,10 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) { QuantizedWithBroadcast<TensorType_INT8, int8_t>(); } +TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) { + QuantizedWithBroadcast<TensorType_INT16, int16_t>(); +} + TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax =