Merge pull request #35996 from psunn:int16_softmax

PiperOrigin-RevId: 306740044 Change-Id: I28304bf393b4444d57a76642400272edfa2502ed
2020-04-15 16:17:49 -07:00 · 2020-04-15 16:17:49 -07:00 · fd76504f5f
commit fd76504f5f
parent 2c8d58c57f a76e599c21
5 changed files with 336 additions and 8 deletions
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@ -60,7 +60,13 @@ struct OpData {
 struct SoftmaxOpData {
  struct SoftmaxParams params = {};
-  float table[256];
+  float table[256]{};
  const int size_of_lut = 513;
  int16_t exp_lut[513]{};  // int16 LUT for exp(x), where x uniform distributed
                           // between [-10.0 , 0.0]
  int16_t one_over_one_plus_x_lut[513]{};  // int16 LUT for 1 / (1 + x), where
                                           // x uniform distributed between
                                           // [0.0 , 1.0]
 };
 struct LogSoftmaxOpData : public OpData {
@ -546,8 +552,9 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
  const TfLiteTensor* input = GetInput(context, node, 0);
  TfLiteTensor* output = GetOutput(context, node, 0);
  if (output->type == kTfLiteInt16) {
-    TF_LITE_ENSURE(context,
+    TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
-                   input->type == kTfLiteInt8 || input->type == kTfLiteUInt8);
+                                input->type == kTfLiteUInt8 ||
                                input->type == kTfLiteInt16);
  } else {
    TF_LITE_ENSURE_EQ(context, input->type, output->type);
  }
@ -562,6 +569,28 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
    data->params.scale = output->params.scale;
  }
  if (input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    data->params.exp_lut = data->exp_lut;
    // exp LUT only used on nagative values
    // we consider exp(-10.0) is insignificant to accumulation
    gen_lut([](double value) { return std::exp(value); }, -10.0, 0.0,
            data->params.exp_lut, data->size_of_lut);
    data->params.one_over_one_plus_x_lut = data->one_over_one_plus_x_lut;
    gen_lut([](double value) { return 1.0 / (1.0 + value); }, 0.0, 1.0,
            data->params.one_over_one_plus_x_lut, data->size_of_lut);
    data->params.zero_point = output->params.zero_point;
    data->params.scale = output->params.scale;
    double input_scale_beta_rescale =
        input->params.scale * params->beta /
        (10.0 / 65535.0);  // scale the input_diff such that [-65535, 0]
                           // correspond to [-10.0, 0.0]
    QuantizeMultiplier(input_scale_beta_rescale, &data->params.input_multiplier,
                       &data->params.input_left_shift);
  }
  return context->ResizeTensor(context, output,
                               TfLiteIntArrayCopy(input->dims));
 }
@ -945,6 +974,25 @@ TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input,
  return kTfLiteOk;
 }
 template <>
 TfLiteStatus SoftmaxQuantized<int16, int16>(TfLiteContext* context,
                                            const TfLiteTensor* input,
                                            TfLiteTensor* output,
                                            SoftmaxOpData* data) {
  if (NumDimensions(input) >= 1 && NumDimensions(input) <= 4) {
    reference_ops::SoftmaxInt16(
        data->params, GetTensorShape(input), GetTensorData<int16_t>(input),
        GetTensorShape(output), GetTensorData<int16_t>(output));
    return kTfLiteOk;
  } else {
    TF_LITE_KERNEL_LOG(context,
                       "Only 1D, 2D, 3D and 4D tensors supported for int16 "
                       "input with int16 output, got %dD.",
                       NumDimensions(input));
    return kTfLiteError;
  }
 }
 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
  auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
  SoftmaxOpData* data = reinterpret_cast<SoftmaxOpData*>(node->user_data);
@ -987,12 +1035,15 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
          return kTfLiteError;
      }
    }
    case kTfLiteInt16: {
      return SoftmaxQuantized<int16_t, int16_t>(context, input, output, data);
    }
    default:
-      TF_LITE_KERNEL_LOG(
+      TF_LITE_KERNEL_LOG(context,
-          context,
+                         "Only float32, uint8_t, Int8_t, Int16_t are supported "
-          "Only float32, uint8_t and int8_t are supported currently, got %s.",
+                         "currently, got %s.",
-          TfLiteTypeGetName(input->type));
+                         TfLiteTypeGetName(input->type));
      return kTfLiteError;
  }
 }
--- a/tensorflow/lite/kernels/activations_test.cc
+++ b/tensorflow/lite/kernels/activations_test.cc
@ -85,7 +85,16 @@ class BaseActivationsOpModel : public SingleOpModel {
      output_ = AddOutput({TensorType_UINT8, {}, 0, 0, 1. / 256});
    } else if (output_type == TensorType_INT8) {
      output_ = AddOutput({TensorType_INT8, {}, 0, 0, 1. / 256, -128});
-    } else if (output_type == TensorType_INT16) {
+    } else if (input.type == TensorType_INT16 &&
               output_type == TensorType_INT16) {
      output_ = AddOutput({TensorType_INT16,
                           {},
                           0,
                           0,
                           1.0f / (std::numeric_limits<int16_t>::max() + 1),
                           0});
    } else if (input.type != TensorType_INT16 &&
               output_type == TensorType_INT16) {
      output_ = AddOutput({TensorType_INT16, {}, 0, 0, 1. / 32768, -16384});
    } else {
      output_ = AddOutput({output_type, {}});
@ -1040,6 +1049,149 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt8) {
                                      kQuantizedTolerance)));
 }
 // Test quantized softmax with int16 input and output. With the same input as in
 // QuantizedActivationsOpTest.Softmax2D, the dequantized output is identical.
 TEST(QuantizedActivationsOpTest, Softmax1DInt16) {
  QuantizedActivationsOpModel m(1,
                                /*input=*/{TensorType_INT16, {3}, -3, 3},
                                /*output_type-*/ TensorType_INT16);
  m.SetInput<int16_t>({1, 2, 3});
  m.Invoke();
  EXPECT_THAT(
      m.GetDequantizedOutput<int16_t>(),
      ElementsAreArray(ArrayFloatNear({0.0900269, 0.2447285, 0.66524096},
                                      kQuantizedToleranceInt16)));
 }
 TEST(QuantizedActivationsOpTest, Softmax1DInt16ZeroElement) {
  QuantizedActivationsOpModel m(0.1,
                                /*input=*/{TensorType_INT16, {1}, -1, 1},
                                TensorType_INT16);
  m.SetInput<int16_t>({0});
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear({1}, kQuantizedToleranceInt16)));
 }
 TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
  QuantizedActivationsOpModel m(0.1,
                                /*input=*/{TensorType_INT16, {2, 4}, -10, 10},
                                TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   //
      3, -2, 10, 1,  //
  });
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
                  {
                      .23463, .12877, .28658, .35003,  //
                      .22528, .13664, .45365, .18443,  //
                  },
                  kQuantizedToleranceInt16)));
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(0.1,
                                 /*input=*/{TensorType_INT16, {4, 2}, -10, 10},
                                 TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
      3, -2,  //
      10, 1,  //
  });
  m2.Invoke();
  EXPECT_THAT(m2.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
                  {
                      0.645656, 0.354344,  //
                      0.450166, 0.549834,  //
                      0.622459, 0.377541,  //
                      0.710949, 0.28905,   //
                  },
                  kQuantizedToleranceInt16)));
 }
 TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
  QuantizedActivationsOpModel m(
      1,
      /*input=*/{TensorType_INT16, {1, 2, 4}, -10, 10}, TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   // depth = 0
      3, -2, 10, 1,  // depth = 1
  });
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
                  {
                      .0158756, .000039, .1173, .866779,   //
                      .00091, .0000061, .998959, .000123,  //
                  },
                  kQuantizedTolerance)));
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(
      1,
      /*input=*/{TensorType_INT16, {4, 1, 2}, -10, 10}, TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
      3, -2,  //
      10, 1,  //
  });
  m2.Invoke();
  EXPECT_THAT(m2.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
                  {
                      0.997527, 0.0024726,       //
                      0.11920292, 0.88079707,    //
                      0.99330715, 0.00669285,    //
                      0.999876605, 0.000123395,  //
                  },
                  kQuantizedTolerance)));
 }
 // Test quantized softmax with int16 input and output. With the same input as in
 // QuantizedActivationsOpTest.Softmax4D, the dequantized output is identical.
 TEST(QuantizedActivationsOpTest, Softmax4DInt16) {
  QuantizedActivationsOpModel m(
      0.1,
      /*input=*/{TensorType_INT16, {1, 2, 1, 4}, -10, 10}, TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   // depth = 0
      3, -2, 10, 1,  // depth = 1
  });
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
                  {
                      .23463, .12877, .28658, .35003,  //
                      .22528, .13664, .45365, .18443,  //
                  },
                  kQuantizedToleranceInt16)));
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(
      0.1,
      /*input=*/{TensorType_INT16, {4, 1, 1, 2}, -10, 10}, TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
      3, -2,  //
      10, 1,  //
  });
  m2.Invoke();
  EXPECT_THAT(m2.GetDequantizedOutput<int16_t>(),
              ElementsAreArray(ArrayFloatNear(
                  {
                      0.645656, 0.354344,  //
                      0.450166, 0.549834,  //
                      0.622459, 0.377541,  //
                      0.710949, 0.28905,   //
                  },
                  kQuantizedToleranceInt16)));
 }
 // Test quantized softmax with int8 input and int16 output. With the same input
 // as in QuantizedActivationsOpTest.Softmax1D, the dequantized output is
 // identical.
--- a/tensorflow/lite/kernels/internal/common.h
+++ b/tensorflow/lite/kernels/internal/common.h
@ -21,6 +21,8 @@ limitations under the License.
 #endif
 #endif
 #include <functional>
 #include "fixedpoint/fixedpoint.h"
 #include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
 #include "tensorflow/lite/kernels/internal/types.h"
@ -195,6 +197,49 @@ inline int CountLeadingSignBits(T integer_input) {
 #endif
 }
 // generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
 // softmax
 inline void gen_lut(const std::function<double(double)>& func, double min,
                    double max, int16_t* table, const int num) {
  // size of table should equal to num + 1
  // last element only for slope calculation
  double step = (max - min) / (num - 1);
  double half_step = step / 2.0;
  for (int i = 0; i < num - 1; i++) {
    double sample_val = std::round(func(min + i * step) * 32768.0);
    double midpoint_interp_val =
        std::round((func(min + (i + 1) * step) * 32768.0 +
                    std::round(func(min + i * step) * 32768.0)) /
                   2.0);
    double midpoint_val =
        std::round(func(min + i * step + half_step) * 32768.0);
    double midpoint_err = midpoint_interp_val - midpoint_val;
    double bias = std::round(midpoint_err / 2.0);
    table[i] = std::min(std::max(sample_val - bias, -32768.0), 32767.0);
  }
  table[num - 1] =
      std::min(std::max(std::round(func(max) * 32768.0), -32768.0), 32767.0);
 }
 // int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
 static int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
  // 512 base value, lut[513] only for calculate slope
  uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
  assert(index < 512 && "LUT index out of range.");
  int16_t offset = value & 0x7f;
  // base and slope are Q0.15
  int16_t base = lut[index];
  int16_t slope = lut[index + 1] - lut[index];
  // Q0.15 * Q0.7 = Q0.22
  // Round and convert from Q0.22 to Q0.15
  int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
  // Q0.15 + Q0.15
  return base + delta;
 }
 // Table of sigmoid(i/24) at 0.16 format - 256 elements.
 // We use combined sigmoid and tanh look-up table, since
--- a/tensorflow/lite/kernels/internal/reference/softmax.h
+++ b/tensorflow/lite/kernels/internal/reference/softmax.h
@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
 #include <limits>
 #include <vector>
 #include "fixedpoint/fixedpoint.h"
 #include "tensorflow/lite/kernels/internal/common.h"
@ -142,6 +143,83 @@ inline void Softmax(const SoftmaxParams& params,
  }
 }
 // Quantized softmax with int16 input and int16 output.
 inline void SoftmaxInt16(const SoftmaxParams& params,
                         const RuntimeShape& input_shape,
                         const int16_t* input_data,
                         const RuntimeShape& output_shape,
                         int16_t* output_data) {
  const int trailing_dim = input_shape.DimensionsCount() - 1;
  const int outer_size =
      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
  const int depth =
      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
  for (int i = 0; i < outer_size; ++i) {
    // Find the largest element
    int16_t max_in_row = std::numeric_limits<int16_t>::min();
    for (int c = 0; c < depth; ++c) {
      max_in_row = std::max(max_in_row, input_data[i * depth + c]);
    }
    // Compute exp(input - max_input)
    std::vector<int16_t> exp_result_Q015(depth);
    for (int c = 0; c < depth; ++c) {
      int32_t input_diff = input_data[i * depth + c] - max_in_row;
      // scale the input_diff such that [-65535, 0] correspond to [-10.0, 0.0]
      int32_t scaled_diff = MultiplyByQuantizedMultiplier(
          input_diff, params.input_multiplier, params.input_left_shift);
      // recenter to [-32768, 32767]
      int32_t sym_scaled_diff = scaled_diff + 32767;
      int16_t sat_sym_scaled_diff =
          std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
                   static_cast<int32_t>(32767));
      // apply the exp() LUT activation function
      exp_result_Q015[c] =
          generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
    }
    // sum_of_exps is a Q16.15 fixed point format.
    int32_t sum_of_exps = 0;
    for (int c = 0; c < depth; ++c) {
      // Q16.15 + Q0.15
      sum_of_exps += exp_result_Q015[c];
    }
    // Compute the reciprocal 1/sum_of_exps
    uint8_t headroom_plus_one =
        CountLeadingZeros(static_cast<uint32_t>(sum_of_exps));
    int32_t shifted_sum =
        ((static_cast<int64_t>(sum_of_exps) << (headroom_plus_one - 1)) +
         (1 << 13)) >>
        14;
    // since the LUT computes 1/(1 + x) we need to first compute x = (sum - 1).
    // also, the LUT expects a symmetrical input, so we must also recenter x
    // from [0, 65535] to [-32768, 32767].
    int32_t sym_shifted_sum = shifted_sum + (-((1 << 15) + (1 << 16)));
    int16_t sat_sym_shifted_sum = static_cast<int16_t>(
        std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
                 static_cast<int32_t>(32767)));
    // apply 1/(1 + x) LUT activation function
    int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
        sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
    // Rescale the exp_result with reciprocal
    // range of output is [0, 32767] correspond to [0.0, 1.0]
    for (int c = 0; c < depth; ++c) {
      uint8_t right_shift = 31 - headroom_plus_one;
      int64_t round = 1 << (right_shift - 1);
      int32_t result = (static_cast<int64_t>(exp_result_Q015[c]) *
                            static_cast<int64_t>(reciprocal_scale_Q015) +
                        round) >>
                       right_shift;
      output_data[i * depth + c] = static_cast<int16_t>(
          std::min(std::max(result, static_cast<int32_t>(0)),
                   static_cast<int32_t>(32767)));
    }
  }
 }
 }  // namespace reference_ops
 }  // namespace tflite
--- a/tensorflow/lite/kernels/internal/types.h
+++ b/tensorflow/lite/kernels/internal/types.h
@ -1030,6 +1030,8 @@ struct SoftmaxParams {
  int32_t zero_point;
  float scale;
  float* table;
  int16_t* exp_lut;
  int16_t* one_over_one_plus_x_lut;
 };
 struct SpaceToBatchParams {