Merge pull request #42671 from wwwind:tanh_logistic_fixes

PiperOrigin-RevId: 354038596
2021-01-27 00:43:04 -08:00 · 2021-01-27 00:43:04 -08:00 · 389c741c16
commit 389c741c16
parent 6bb607e96e 595c3e3d13
4 changed files with 79 additions and 45 deletions
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@ -447,13 +447,21 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
        (data->input_left_shift == 0 || data->input_left_shift == 1);

    if (!param_scale_pot) {
-      // In case of general scale parameter, we need to do a rescaling.
-      // Magic constant 4096:
-      // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval
-      // from 16-bit (-2^15, 2^15),
-      // so we need to multiply by
-      // 2^(15 - kInputIntegerBits) = 2^12 = 4096.
-      data->input_multiplier = static_cast<int32_t>(input->params.scale * 4096);
+      // Calculate multiplier to change input scale to 1/(3*4096)
+      // as required by the table lookup.
+      // The number 3.0 in the multiplier comes from here,
+      // because the interval is [-10.7, 10.7] instead of [-8, 8].
+      // So, in this scaling +/-2^17 represents +/-10.7.
+
+      double multiplier = input->params.scale * 4096.0 * 3.0;
+      data->input_left_shift = 0;
+
+      while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
+        data->input_left_shift++;
+        multiplier = multiplier * 2.0;
+      }
+
+      data->input_multiplier = static_cast<int32_t>(multiplier);
    }

    int output_scale_log2_rounded;
@ -544,13 +552,19 @@ TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) {
    param_scale_pot &= (data->input_left_shift == 0);

    if (!param_scale_pot) {
-      // In case of general scale parameter, we need to do a rescaling.
-      // Magic constant 4096:
-      // We need to scale down to (-2^3, 2^3) / 3 is kInputIntegerBits/ interval
-      // from 16-bit (-2^15, 2^15),
-      // so we need to multiply by
-      // 2^(15 - kInputIntegerBits) = 2^12 = 4096.
-      data->input_multiplier = static_cast<int32_t>(input->params.scale * 4096);
+      // Calculate multiplier to change input scale to 1/(3*4096)
+      // as required by the table lookup.
+      // In this scaling +/-2^17 represents +/-10.7
+      double multiplier = input->params.scale * 4096.0 * 3.0;
+
+      data->input_left_shift = 0;
+
+      while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
+        data->input_left_shift++;
+        multiplier = multiplier * 2.0;
+      }
+
+      data->input_multiplier = static_cast<int32_t>(multiplier);
    }

    int output_scale_log2_rounded;
@ -983,9 +997,9 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
        const int size =
            MatchingFlatSize(GetTensorShape(input), GetTensorShape(output));

-        reference_integer_ops::Logistic(data->input_multiplier, size,
-                                        GetTensorData<int16_t>(input),
-                                        GetTensorData<int16_t>(output));
+        reference_integer_ops::Logistic(
+            data->input_multiplier, data->input_left_shift, size,
+            GetTensorData<int16_t>(input), GetTensorData<int16_t>(output));
      } else {
        optimized_ops::Logistic(
            params, GetTensorShape(input), GetTensorData<int16_t>(input),
--- a/tensorflow/lite/kernels/activations_test.cc
+++ b/tensorflow/lite/kernels/activations_test.cc
@ -988,13 +988,15 @@ TEST_P(TanhOpTest, TanhInt16General) {
  const float kMax = 32767.f / 32768.f;
  QuantizedActivationsOpModel m(
      GetRegistration(), BuiltinOperator_TANH,
-      /*input=*/{TensorType_INT16, {6}, 11 * kMin, 11 * kMax},
-      /*output=*/{TensorType_INT16, {5}, kMin, kMax});
-  m.SetInput<int16_t>({-10, -4, 0, 6, 7.0909090909, 8});
+      /*input=*/{TensorType_INT16, {10}, 11 * kMin, 11 * kMax},
+      /*output=*/{TensorType_INT16, {10}, kMin, kMax});
+  m.SetInput<int16_t>({-10, -4, 1, 0.5, 0.25,  //
+                       0, -0.1, 6, 7.0909090909, 8});
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
-                  {-0.999969, -0.99408, 0, 0.999664, 0.999939, 0.999969},
+                  {-1.0, -0.999329, 0.761594, 0.462117, 0.244919,  //
+                   0.0, -0.099668, 0.999988, 0.999999, 1.0},
                  kQuantizedToleranceInt16)));
 }

@ -1219,18 +1221,18 @@ TEST_P(LogisticOpTest, SigmoidInt16General) {
  const float kMax = 32767.f / 32768.f;
  QuantizedActivationsOpModel m(
      GetRegistration(), BuiltinOperator_LOGISTIC,
-      /*input=*/{TensorType_INT16, {8}, 10 * kMin, 10 * kMax},
-      /*output=*/{TensorType_INT16, {8}, kMin, kMax});
+      /*input=*/{TensorType_INT16, {12}, 13 * kMin, 13 * kMax},
+      /*output=*/{TensorType_INT16, {12}, kMin, kMax});
  m.SetInput<int16_t>({
-      0, -6, 2, 4,   //
-      3, -2, 10, 1,  //
+      0, -6, 2, 4, 0.1, 12,    //
+      3, -2, 10, 1, 0.25, -12  //
  });
  m.Invoke();
-  EXPECT_THAT(
-      m.GetDequantizedOutput<int16_t>(),
-      ElementsAreArray(ArrayFloatNear({0.5, 0.00814819, 0.832031, 0.960846,  //
-                                       0.916809, 0.167969, 0.999664, 0.689972},
-                                      kQuantizedToleranceInt16)));
+  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {0.5, 0.002473, 0.880797, 0.982014, 0.524979, 0.999994,  //
+                   0.952574, 0.119203, 0.999955, 0.731059, 0.562177, 0},
+                  kQuantizedToleranceInt16)));
 }

 TEST(FloatActivationsOpTest, Softmax4D) {
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@ -58,23 +58,35 @@ inline void Logistic(int32_t input_zero_point, int32_t input_range_radius,
  }
 }

-inline void Logistic(int32_t input_multiplier, int32_t input_size,
-                     const int16_t* ptr_input_data, int16_t* ptr_output_data) {
+inline void Logistic(int32_t input_multiplier, int32_t input_left_shift,
+                     int32_t input_size, const int16_t* ptr_input_data,
+                     int16_t* ptr_output_data) {
  // We use the LUT for sigmoid and take into account, that
  // tanh(x) = 2*sigmoid(2*x) - 1

-  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
+  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
+  // In case of general parameter scale, multiplier 3 is taken into account
+  // in TanhPrepare function and it is included in
+  // input_multiplier already.
+
+  if (input_multiplier == 0) {  // power of two case
+    input_multiplier = 3 << input_left_shift;
+    input_left_shift = 0;
+  }
+
+  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;

  for (int i = 0; i < input_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data = (*ptr_input_data) * input_data_mul;
+    int32_t input_data =
+        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;

-    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7] and
-    // we do interpolation on unsigned values.
-    uint32_t abs_input_data = 3 * abs(input_data);
+    // We do interpolation on unsigned values.
+    uint32_t abs_input_data = abs(input_data);

    // We divide by 2 power of 9, because
    // we need to divide by 2 in power of 7 for
    // the input conversion + 1/4 from the scale above.
+
    // Define uh as uint32_t type not to make this function overflow.
    uint32_t uh = abs_input_data >> 9;
    uint32_t result;
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@ -65,19 +65,25 @@ inline void Tanh(int32_t input_multiplier, int32_t input_left_shift,
  // We use the LUT for sigmoid and take into account, that
  // tanh(x) = 2*sigmoid(2*x) - 1

-  int32_t input_data_mul = (input_multiplier > 0) ? input_multiplier : 1;
+  // We scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
+  // In case of general parameter scale, multiplier 3 is taken into account
+  // in TanhPrepare function and it is included in
+  // input_multiplier already.
+
+  if (input_multiplier == 0) {  // power of two case
+    input_multiplier = 3 << input_left_shift;
+    input_left_shift = 0;
+  }
+
+  int32_t round = (input_left_shift > 0) ? 1 << (input_left_shift - 1) : 0;

  int flat_size = MatchingFlatSize(input_shape, output_shape);

  for (int i = 0; i < flat_size; ++i, ptr_input_data++, ptr_output_data++) {
-    int32_t input_data = (*ptr_input_data) * input_data_mul;
+    int32_t input_data =
+        ((*ptr_input_data) * input_multiplier + round) >> input_left_shift;

-    if (input_left_shift == 1) {
-      input_data <<= 1;
-    }
-
-    // Scale by 3/4 to expand range [-8,8]->[-10.7,10.7].
-    uint32_t abs_input_data = 3 * abs(input_data);
+    uint32_t abs_input_data = abs(input_data);
    uint32_t uh = abs_input_data >> 8;
    int32_t result;