From c69c80248848f97969293012f98f5eae571a7207 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 1 Jul 2020 14:18:52 +0100 Subject: [PATCH 1/2] Fixes for tanh and logistic activation functions, 16x8. Change-Id: I2d8dc5c706ad834ce2331ad0f77cce41986bf477 --- tensorflow/lite/kernels/activations.cc | 47 ++++++++++++------- tensorflow/lite/kernels/activations_test.cc | 28 ++++++----- .../internal/reference/integer_ops/logistic.h | 43 ++++++++++++----- .../internal/reference/integer_ops/tanh.h | 22 +++++---- 4 files changed, 88 insertions(+), 52 deletions(-) diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index 654ccbc27ec..d894a28bd90 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -298,7 +298,6 @@ void HardSwishFree(TfLiteContext* context, void* buffer) { delete static_cast(buffer); } - TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS(GenericPrepare(context, node)); TfLiteTensor* output = GetOutput(context, node, 0); @@ -426,13 +425,19 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { (data->input_left_shift == 0 || data->input_left_shift == 1); if (!param_scale_pot) { - // In case of general scale parameter, we need to do a rescaling. - // Magic constant 4096: - // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval - // from 16-bit (-2^15, 2^15), - // so we need to multiply by - // 2^(15 - kInputIntegerBits) = 2^12 = 4096. - data->input_multiplier = static_cast(input->params.scale * 4096); + // Calculate multiplier to change input scale to 1/(3*4096) + // as required by the table lookup. + // In this scaling +/-2^17 represents +/-10.7 + + double multiplier = input->params.scale * 4096.0 * 3.0; + data->input_left_shift = 0; + + while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) { + data->input_left_shift++; + multiplier = multiplier * 2.0; + } + + data->input_multiplier = static_cast(multiplier); } int output_scale_log2_rounded; @@ -521,13 +526,19 @@ TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) { param_scale_pot &= (data->input_left_shift == 0); if (!param_scale_pot) { - // In case of general scale parameter, we need to do a rescaling. - // Magic constant 4096: - // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval - // from 16-bit (-2^15, 2^15), - // so we need to multiply by - // 2^(15 - kInputIntegerBits) = 2^12 = 4096. - data->input_multiplier = static_cast(input->params.scale * 4096); + // Calculate multiplier to change input scale to 1/(3*4096) + // as required by the table lookup. + // In this scaling +/-2^17 represents +/-10.7 + double multiplier = input->params.scale * 4096.0 * 3.0; + + data->input_left_shift = 0; + + while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) { + data->input_left_shift++; + multiplier = multiplier * 2.0; + } + + data->input_multiplier = static_cast(multiplier); } int output_scale_log2_rounded; @@ -943,9 +954,9 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) { const int size = MatchingFlatSize(GetTensorShape(input), GetTensorShape(output)); - reference_integer_ops::Logistic(data->input_multiplier, size, - GetTensorData(input), - GetTensorData(output)); + reference_integer_ops::Logistic( + data->input_multiplier, data->input_left_shift, size, + GetTensorData(input), GetTensorData(output)); } else { optimized_ops::Logistic( params, GetTensorShape(input), GetTensorData(input), diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc index d8f883b9c1d..9473b367706 100644 --- a/tensorflow/lite/kernels/activations_test.cc +++ b/tensorflow/lite/kernels/activations_test.cc @@ -892,13 +892,15 @@ TEST_P(TanhOpTest, TanhInt16General) { const float kMax = 32767.f / 32768.f; QuantizedActivationsOpModel m( GetRegistration(), BuiltinOperator_TANH, - /*input=*/{TensorType_INT16, {6}, 11 * kMin, 11 * kMax}, - /*output=*/{TensorType_INT16, {5}, kMin, kMax}); - m.SetInput({-10, -4, 0, 6, 7.0909090909, 8}); + /*input=*/{TensorType_INT16, {10}, 11 * kMin, 11 * kMax}, + /*output=*/{TensorType_INT16, {10}, kMin, kMax}); + m.SetInput({-10, -4, 1, 0.5, 0.25, // + 0, -0.1, 6, 7.0909090909, 8}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( - {-0.999969, -0.99408, 0, 0.999664, 0.999939, 0.999969}, + {-1.0, -0.999329, 0.761594, 0.462117, 0.244919, // + 0.0, -0.099668, 0.999988, 0.999999, 1.0}, kQuantizedToleranceInt16))); } @@ -1083,18 +1085,18 @@ TEST_P(LogisticOpTest, SigmoidInt16General) { const float kMax = 32767.f / 32768.f; QuantizedActivationsOpModel m( GetRegistration(), BuiltinOperator_LOGISTIC, - /*input=*/{TensorType_INT16, {8}, 10 * kMin, 10 * kMax}, - /*output=*/{TensorType_INT16, {8}, kMin, kMax}); + /*input=*/{TensorType_INT16, {12}, 13 * kMin, 13 * kMax}, + /*output=*/{TensorType_INT16, {12}, kMin, kMax}); m.SetInput({ - 0, -6, 2, 4, // - 3, -2, 10, 1, // + 0, -6, 2, 4, 0.1, 12, // + 3, -2, 10, 1, 0.25, -12 // }); m.Invoke(); - EXPECT_THAT( - m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({0.5, 0.00814819, 0.832031, 0.960846, // - 0.916809, 0.167969, 0.999664, 0.689972}, - kQuantizedToleranceInt16))); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {0.5, 0.002473, 0.880797, 0.982014, 0.524979, 0.999994, // + 0.952574, 0.119203, 0.999955, 0.731059, 0.562177, 0}, + kQuantizedToleranceInt16))); } TEST(FloatActivationsOpTest, Softmax4D) { diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h index e315683c0cd..b1a970396d3 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h @@ -58,30 +58,47 @@ inline void Logistic(int32_t input_zero_point, int32_t input_range_radius, } } -inline void Logistic(int32_t input_multiplier, int32_t input_size, - const int16_t* ptr_input_data, int16_t* ptr_output_data) { +inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, + int32_t input_size, const int16_t* ptr_input_data, + int16_t* ptr_output_data) { // We use the LUT for sigmoid and take into account, that // tanh(x) = 2*sigmoid(2*x) - 1 - int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1; + // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. + // In case of general parameter scale, multiplier 3 is taken into account + // in TanhPrepare function and it is included in + // input_multiplier already. + + if (input_multiplier == 0) { // power of two case + input_multiplier = 3 << input_left_shift; + input_left_shift = 0; + } + + int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0; for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) { - int32_t input_data = (*ptr_input_data) * input_data_mul; + int32_t input_data = + ((*ptr_input_data) * input_multiplier + round) >> input_left_shift; - // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and - // we do interpolation on unsigned values. - uint32_t abs_input_data = 3 * abs(input_data); + // We do interpolation on unsigned values. + uint32_t abs_input_data = abs(input_data); // We divide by 2 power of 9, because // we need to divide by 2 in power of 7 for // the input conversion + 1/4 from the scale above. - uint8_t uh = abs_input_data >> 9; - uint32_t ua = sigmoid_table_uint16[uh]; - uint32_t ub = sigmoid_table_uint16[uh + 1]; - uint32_t ut = abs_input_data & 0x1ff; + uint32_t uh = abs_input_data >> 9; - // Interpolation is done using the fractional bit. - uint32_t result = (ua << 9) + ut * (ub - ua); + uint32_t result; + if (uh >= 255) { + result = 0xfffe << 9; + } else { + uint32_t ua = sigmoid_table_uint16[uh]; + uint32_t ub = sigmoid_table_uint16[uh + 1]; + uint32_t ut = abs_input_data & 0x1ff; + + // Interpolation is done using the fractional bit. + result = (ua << 9) + ut * (ub - ua); + } result = (input_data >= 0) ? (result + (1 << 9)) : ((1 << (16 + 9)) - result + (1 << 9) - 1); diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h index baae65ab30e..ade3d958ccc 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h @@ -63,17 +63,23 @@ inline void Tanh(int32_t input_multiplier, int32_t input_left_shift, // We use the LUT for sigmoid and take into account, that // tanh(x) = 2*sigmoid(2*x) - 1 - int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1; + // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. + // In case of general parameter scale, multiplier 3 is taken into account + // in TanhPrepare function and it is included in + // input_multiplier already. + + if (input_multiplier == 0) { // power of two case + input_multiplier = 3 << input_left_shift; + input_left_shift = 0; + } + + int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0; for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) { - int32_t input_data = (*ptr_input_data) * input_data_mul; + int32_t input_data = + ((*ptr_input_data) * input_multiplier + round) >> input_left_shift; - if (input_left_shift == 1) { - input_data <<= 1; - } - - // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7]. - uint32_t abs_input_data = 3 * abs(input_data); + uint32_t abs_input_data = abs(input_data); uint32_t uh = abs_input_data >> 8; int32_t result; From a2b53623a2c0f81c32b02e2d4b3bac153daaacf9 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Tue, 22 Sep 2020 10:35:42 +0100 Subject: [PATCH 2/2] Addressed reviewer's comments. Change-Id: I798bf7919b6a268a4631984ed07a242943ca0b72 --- tensorflow/lite/kernels/activations.cc | 8 +++++--- .../kernels/internal/reference/integer_ops/logistic.h | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index 30665095c60..a951ff8dad4 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -437,14 +437,16 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { if (!param_scale_pot) { // Calculate multiplier to change input scale to 1/(3*4096) // as required by the table lookup. - // In this scaling +/-2^17 represents +/-10.7 + // The number 3.0 in the multiplier comes from here, + // because the interval is [-10.7, 10.7] instead of [-8, 8]. + // So, in this scaling +/-2^17 represents +/-10.7. double multiplier = input->params.scale * 4096.0 * 3.0; data->input_left_shift = 0; while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) { - data->input_left_shift++; - multiplier = multiplier * 2.0; + data->input_left_shift++; + multiplier = multiplier * 2.0; } data->input_multiplier = static_cast(multiplier); diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h index b1a970396d3..07eb732e2bd 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h @@ -86,14 +86,14 @@ inline void Logistic(int32_t input_multiplier, int32_t input_left_shift, // We divide by 2 power of 9, because // we need to divide by 2 in power of 7 for // the input conversion + 1/4 from the scale above. - uint32_t uh = abs_input_data >> 9; + uint32_t u_table = abs_input_data >> 9; uint32_t result; - if (uh >= 255) { + if (u_table >= 255) { result = 0xfffe << 9; } else { - uint32_t ua = sigmoid_table_uint16[uh]; - uint32_t ub = sigmoid_table_uint16[uh + 1]; + uint32_t ua = sigmoid_table_uint16[u_table]; + uint32_t ub = sigmoid_table_uint16[u_table + 1]; uint32_t ut = abs_input_data & 0x1ff; // Interpolation is done using the fractional bit.