From cd661aab9647b700eebe9c6b4ff16d06b326fe53 Mon Sep 17 00:00:00 2001
From: Robert David <lrdx@google.com>
Date: Tue, 24 Mar 2020 12:17:36 -0700
Subject: [PATCH] Remove Softmax dimension checks: all implementations work on
 last dimension, other dimensions are just used as batches.

PiperOrigin-RevId: 302719938
Change-Id: I56e5d6715b58f9c41ea902d04fe2e0d7d33df88c
---
 tensorflow/lite/kernels/activations.cc        |  46 ++-----
 .../lite/kernels/internal/reference/softmax.h |  32 -----
 .../lite/micro/kernels/cmsis-nn/softmax.cc    | 122 +++-------------
 tensorflow/lite/micro/kernels/softmax.cc      | 130 ++----------------
 .../micro/kernels/xtensa_hifimini/softmax.cc  |  34 ++---
 5 files changed, 61 insertions(+), 303 deletions(-)
diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc
index bfbedd5f264..eece297ffea 100644
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@@ -544,8 +544,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = GetOutput(context, node, 0);
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
-  const int num_dims = NumDimensions(input);
-  TF_LITE_ENSURE(context, num_dims >= 1 && num_dims <= 4);
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
 
   if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
     data->params.table = data->table;
@@ -915,41 +914,22 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
 
 TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input,
                           TfLiteTensor* output, TfLiteSoftmaxParams* params) {
-  switch (NumDimensions(input)) {
-    case 1:
-    case 2:
-    case 3:
-    case 4:
-      SoftmaxParams op_params;
-      op_params.beta = params->beta;
-      optimized_ops::Softmax(
-          op_params, GetTensorShape(input), GetTensorData<float>(input),
-          GetTensorShape(output), GetTensorData<float>(output),
-          CpuBackendContext::GetFromContext(context));
-      return kTfLiteOk;
-    default:
-      TF_LITE_KERNEL_LOG(
-          context,
-          "Only 1D, 2D, 3D and 4D tensors supported currently, got %dD.",
-          NumDimensions(input));
-      return kTfLiteError;
-  }
+  SoftmaxParams op_params;
+  op_params.beta = params->beta;
+  optimized_ops::Softmax(op_params, GetTensorShape(input),
+                         GetTensorData<float>(input), GetTensorShape(output),
+                         GetTensorData<float>(output),
+                         CpuBackendContext::GetFromContext(context));
+  return kTfLiteOk;
 }
 
 template <typename T>
 TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input,
                               TfLiteTensor* output, SoftmaxOpData* data) {
-  if (NumDimensions(input) >= 1 && NumDimensions(input) <= 4) {
-    optimized_ops::Softmax(data->params, GetTensorShape(input),
-                           GetTensorData<T>(input), GetTensorShape(output),
-                           GetTensorData<T>(output));
-    return kTfLiteOk;
-  } else {
-    TF_LITE_KERNEL_LOG(
-        context, "Only 1D, 2D, 3D and 4D tensors supported currently, got %dD.",
-        NumDimensions(input));
-    return kTfLiteError;
-  }
+  optimized_ops::Softmax(data->params, GetTensorShape(input),
+                         GetTensorData<T>(input), GetTensorShape(output),
+                         GetTensorData<T>(output));
+  return kTfLiteOk;
 }
 
 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
@@ -959,8 +939,6 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
 
-  // TODO(ahentz): consider an implementation that works for many (all?)
-  // dimensions.
   switch (input->type) {
     case kTfLiteFloat32: {
       return SoftmaxFloat(context, input, output, params);
diff --git a/tensorflow/lite/kernels/internal/reference/softmax.h b/tensorflow/lite/kernels/internal/reference/softmax.h
index 26e402db3da..7c59b133bf3 100644
--- a/tensorflow/lite/kernels/internal/reference/softmax.h
+++ b/tensorflow/lite/kernels/internal/reference/softmax.h
@@ -142,38 +142,6 @@ inline void Softmax(const SoftmaxParams& params,
   }
 }
 
-// Performs softmax along the input of size (input_size * batch_size).
-inline void Softmax(const float* in, const int input_size, const int batch_size,
-                    const float beta, float* out) {
-  //  TF_LITE_ASSERT(input_size > 0);
-
-  // For each batch
-  for (int b = 0; b < batch_size; b++) {
-    // Find the max coeff.
-    float max_coeff = in[0];
-    for (int i = 1; i < input_size; i++) {
-      if (in[i] > max_coeff) max_coeff = in[i];
-    }
-
-    // Compute the normalized sum of exps.
-    float exp_sum = 0.0;
-    for (int i = 0; i < input_size; i++) {
-      out[i] = std::exp((in[i] - max_coeff) * beta);
-      exp_sum += out[i];
-    }
-
-    // Divide by the sum of exps.
-    float reciprocal_sum_exp = 1.f / exp_sum;
-    for (int i = 0; i < input_size; i++) {
-      out[i] *= reciprocal_sum_exp;
-    }
-
-    // Advance in and out pointers for the next batch.
-    in += input_size;
-    out += input_size;
-  }
-}
-
 }  // namespace reference_ops
 }  // namespace tflite
 
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
index 13b33b3f2cb..8cfa5413ad1 100644
--- a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
@@ -67,70 +67,17 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 void Free(TfLiteContext* context, void* buffer) {}
 
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+
   return kTfLiteOk;
 }
 
-// Takes a 1D tensor and performs softmax along it.
-void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output,
-                    TfLiteSoftmaxParams* params) {
-  const int input_size = input->dims->data[0];
-  tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta,
-                                 output->data.f);
-}
-
-// Takes a 2D tensor and perform softmax along the last dimension.
-void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output,
-                    TfLiteSoftmaxParams* params) {
-  const int batch_size = input->dims->data[0];
-  const int input_size = input->dims->data[1];
-  tflite::reference_ops::Softmax(input->data.f, input_size, batch_size,
-                                 params->beta, output->data.f);
-}
-
-void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
-                        TfLiteSoftmaxParams* params, OpData* data) {
-  const int input_size = input->dims->data[0];
-  const int32_t shape_data[4] = {1, 1, 1, input_size};
-  RuntimeShape shape(4, shape_data);
-  SoftmaxParams op_params;
-  op_params.input_multiplier = data->input_multiplier;
-  op_params.input_left_shift = data->input_left_shift;
-  op_params.diff_min = data->diff_min;
-  if (input->type == kTfLiteUInt8) {
-    tflite::reference_ops::Softmax(op_params, shape,
-                                   GetTensorData<uint8_t>(input), shape,
-                                   GetTensorData<uint8_t>(output));
-  } else {
-    arm_softmax_s8(GetTensorData<int8_t>(input), shape_data[0], shape_data[3],
-                   op_params.input_multiplier, op_params.input_left_shift,
-                   op_params.diff_min, GetTensorData<int8_t>(output));
-  }
-}
-
-void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
-                        TfLiteSoftmaxParams* params, OpData* data) {
-  const int batch_size = input->dims->data[0];
-  const int input_size = input->dims->data[1];
-  const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
-  RuntimeShape shape(4, shape_data);
-  SoftmaxParams op_params;
-  op_params.input_multiplier = data->input_multiplier;
-  op_params.input_left_shift = data->input_left_shift;
-  op_params.diff_min = data->diff_min;
-  if (input->type == kTfLiteUInt8) {
-    tflite::reference_ops::Softmax(op_params, shape,
-                                   GetTensorData<uint8_t>(input), shape,
-                                   GetTensorData<uint8_t>(output));
-  } else {
-    arm_softmax_s8(GetTensorData<int8_t>(input), shape_data[0], shape_data[3],
-                   op_params.input_multiplier, op_params.input_left_shift,
-                   op_params.diff_min, GetTensorData<int8_t>(output));
-  }
-}
-
 // Takes a 4D tensor and perform softmax along the forth dimension.
-void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
-                    TfLiteSoftmaxParams* params) {
+void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                  TfLiteSoftmaxParams* params) {
   SoftmaxParams op_params;
   op_params.beta = params->beta;
   tflite::reference_ops::Softmax(
@@ -152,13 +99,15 @@ void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
   } else {
     const unsigned int num_dims = NumDimensions(input);
 
-    arm_softmax_s8(GetTensorData<int8_t>(input),
-                   (num_dims == 4 ? input->dims->data[0] : 1) *
-                       input->dims->data[num_dims - 3] *
-                       input->dims->data[num_dims - 2],
-                   input->dims->data[num_dims - 1], op_params.input_multiplier,
-                   op_params.input_left_shift, op_params.diff_min,
-                   GetTensorData<int8_t>(output));
+    const int trailing_dim = input_shape.DimensionsCount() - 1;
+    const int outer_size =
+        MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+    const int depth =
+        MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+    arm_softmax_s8(GetTensorData<int8_t>(input), outer_size, depth,
+                   op_params.input_multiplier, op_params.input_left_shift,
+                   op_params.diff_min, GetTensorData<int8_t>(output));
   }
 }
 
@@ -175,47 +124,18 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {
     case kTfLiteFloat32: {
-      if (NumDimensions(input) == 1) {
-        Softmax1DFloat(input, output, params);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 2) {
-        Softmax2DFloat(input, output, params);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 4) {
-        Softmax4DFloat(input, output, params);
-        return kTfLiteOk;
-      }
-      TF_LITE_KERNEL_LOG(
-          context, "Only 1D, 2D and 4D tensors supported currently, got %dD.",
-          NumDimensions(input));
-      return kTfLiteError;
+      SoftmaxFloat(input, output, params);
+      return kTfLiteOk;
     }
     case kTfLiteUInt8:
     case kTfLiteInt8: {
-      if (NumDimensions(input) == 1) {
-        Softmax1DQuantized(input, output, params, data);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 2) {
-        Softmax2DQuantized(input, output, params, data);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 3 || NumDimensions(input) == 4) {
-        SoftmaxQuantized(input, output, params, data);
-        return kTfLiteOk;
-      }
-      TF_LITE_KERNEL_LOG(
-          context,
-          "Only 1D, 2D, 3D and 4D tensors supported currently, got %dD.",
-          NumDimensions(input));
-      return kTfLiteError;
+      SoftmaxQuantized(input, output, params, data);
+      return kTfLiteOk;
     }
     default:
       TF_LITE_KERNEL_LOG(
           context,
-          "Only float32, uint8_t and int8_t supported currently, got %d.",
+          "Only float32, uint8_t and int8_t input supported currently, got %d.",
           input->type);
       return kTfLiteError;
   }
diff --git a/tensorflow/lite/micro/kernels/softmax.cc b/tensorflow/lite/micro/kernels/softmax.cc
index 015ed113959..c213d6646ed 100644
--- a/tensorflow/lite/micro/kernels/softmax.cc
+++ b/tensorflow/lite/micro/kernels/softmax.cc
@@ -76,90 +76,17 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 void Free(TfLiteContext* context, void* buffer) {}
 
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+
   return kTfLiteOk;
 }
 
-// Takes a 1D tensor and performs softmax along it.
-void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output,
-                    TfLiteSoftmaxParams* params) {
-  const int input_size = input->dims->data[0];
-  tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta,
-                                 output->data.f);
-}
-
-// Takes a 2D tensor and perform softmax along the last dimension.
-void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output,
-                    TfLiteSoftmaxParams* params) {
-  const int batch_size = input->dims->data[0];
-  const int input_size = input->dims->data[1];
-  tflite::reference_ops::Softmax(input->data.f, input_size, batch_size,
-                                 params->beta, output->data.f);
-}
-
-void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
-                        TfLiteSoftmaxParams* params, OpData* data) {
-  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
-  // always traverses the last dimension of a 4D tensor, we will pretend our 1D
-  // tensor is 4D in a special way. We will convert a (Y) shape into a (1,
-  // 1, 1, Y) shape.
-  const int input_size = input->dims->data[0];
-  const int32_t shape_data[4] = {1, 1, 1, input_size};
-  RuntimeShape shape(4, shape_data);
-  SoftmaxParams op_params;
-  op_params.input_multiplier = data->input_multiplier;
-  op_params.input_left_shift = data->input_left_shift;
-  op_params.diff_min = data->diff_min;
-  if (input->type == kTfLiteUInt8) {
-    tflite::reference_ops::Softmax(op_params, shape,
-                                   GetTensorData<uint8_t>(input), shape,
-                                   GetTensorData<uint8_t>(output));
-  } else {
-    if (output->type == kTfLiteInt16) {
-      tflite::reference_ops::Softmax(op_params, shape,
-                                     GetTensorData<int8_t>(input), shape,
-                                     GetTensorData<int16_t>(output));
-    } else {
-      tflite::reference_ops::Softmax(op_params, shape,
-                                     GetTensorData<int8_t>(input), shape,
-                                     GetTensorData<int8_t>(output));
-    }
-  }
-}
-
-void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
-                        TfLiteSoftmaxParams* params, OpData* data) {
-  // TODO(ahentz): this is arguably a dirty trick. Since the implementation
-  // always traverses the last dimension of a 4D tensor, we will pretend our 2D
-  // tensor is 4D in a special way. We will convert a (X, Y) shape into a (X,
-  // 1, 1, Y) shape.
-  const int batch_size = input->dims->data[0];
-  const int input_size = input->dims->data[1];
-  const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
-  RuntimeShape shape(4, shape_data);
-  SoftmaxParams op_params;
-  op_params.input_multiplier = data->input_multiplier;
-  op_params.input_left_shift = data->input_left_shift;
-  op_params.diff_min = data->diff_min;
-  if (input->type == kTfLiteUInt8) {
-    tflite::reference_ops::Softmax(op_params, shape,
-                                   GetTensorData<uint8_t>(input), shape,
-                                   GetTensorData<uint8_t>(output));
-  } else {
-    if (output->type == kTfLiteInt16) {
-      tflite::reference_ops::Softmax(op_params, shape,
-                                     GetTensorData<int8_t>(input), shape,
-                                     GetTensorData<int16_t>(output));
-    } else {
-      tflite::reference_ops::Softmax(op_params, shape,
-                                     GetTensorData<int8_t>(input), shape,
-                                     GetTensorData<int8_t>(output));
-    }
-  }
-}
-
-// Takes a 4D tensor and perform softmax along the forth dimension.
-void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
-                    TfLiteSoftmaxParams* params) {
+// Takes a tensor and performs softmax along the last dimension.
+void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                  TfLiteSoftmaxParams* params) {
   SoftmaxParams op_params;
   op_params.beta = static_cast<double>(params->beta);
   tflite::reference_ops::Softmax(
@@ -201,51 +128,20 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_STATUS(
       CalculateSoftmaxOpData(context, input, output, params, data));
 
-  // TODO(ahentz): consider an implementation that works for many (all?)
-  // dimensions.
   switch (input->type) {
     case kTfLiteFloat32: {
-      if (NumDimensions(input) == 1) {
-        Softmax1DFloat(input, output, params);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 2) {
-        Softmax2DFloat(input, output, params);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 4) {
-        Softmax4DFloat(input, output, params);
-        return kTfLiteOk;
-      }
-      TF_LITE_KERNEL_LOG(
-          context, "Only 1D, 2D and 4D tensors supported currently, got %dD.",
-          NumDimensions(input));
-      return kTfLiteError;
+      SoftmaxFloat(input, output, params);
+      return kTfLiteOk;
     }
     case kTfLiteInt8:
     case kTfLiteUInt8: {
-      if (NumDimensions(input) == 1) {
-        Softmax1DQuantized(input, output, params, data);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 2) {
-        Softmax2DQuantized(input, output, params, data);
-        return kTfLiteOk;
-      }
-      if (NumDimensions(input) == 3 || NumDimensions(input) == 4) {
-        SoftmaxQuantized(input, output, params, data);
-        return kTfLiteOk;
-      }
-      TF_LITE_KERNEL_LOG(
-          context,
-          "Only 1D, 2D, 3D and 4D tensors supported currently, got %dD.",
-          NumDimensions(input));
-      return kTfLiteError;
+      SoftmaxQuantized(input, output, params, data);
+      return kTfLiteOk;
     }
     default:
       TF_LITE_KERNEL_LOG(
           context,
-          "Only float32, uint8_t and int8_t supported currently, got %d.",
+          "Only float32, uint8_t and int8_t input supported currently, got %d.",
           input->type);
       return kTfLiteError;
   }
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc
index 4631791fede..4336dccbb46 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc
@@ -160,24 +160,22 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
 
 }  // namespace
 
-void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
-                        TfLiteSoftmaxParams* params, OpData* data) {
-  const int batch_size = input->dims->data[0];
-  const int input_size = input->dims->data[1];
-  const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
-  RuntimeShape shape(4, shape_data);
+void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                      TfLiteSoftmaxParams* params, OpData* data) {
   SoftmaxParams op_params;
   op_params.input_multiplier = data->input_multiplier;
   op_params.input_left_shift = data->input_left_shift;
   op_params.diff_min = data->diff_min;
 
   if (output->type == kTfLiteInt16) {
-    xtensa::hifimini::Softmax(op_params, shape, GetTensorData<int8_t>(input),
-                              shape, GetTensorData<int16_t>(output));
+    xtensa::hifimini::Softmax(
+        op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
+        GetTensorShape(output), GetTensorData<int16_t>(output));
 
   } else {
-    xtensa::hifimini::Softmax(op_params, shape, GetTensorData<int8_t>(input),
-                              shape, GetTensorData<int8_t>(output));
+    xtensa::hifimini::Softmax(
+        op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
+        GetTensorShape(output), GetTensorData<int8_t>(output));
   }
 }
 
@@ -190,8 +188,11 @@ void Free(TfLiteContext* context, void* buffer) {}
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
 
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
   const TfLiteTensor* input = GetInput(context, node, 0);
   TfLiteTensor* output = GetOutput(context, node, 0);
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
 
   // TODO(b/132070898): Use statically slotted OpData structures until a
   // scratch memory API is ready.
@@ -213,17 +214,12 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {
     case kTfLiteInt8: {
-      if (NumDimensions(input) == 2) {
-        Softmax2DQuantized(input, output, params, op_data);
-        return kTfLiteOk;
-      }
-      TF_LITE_KERNEL_LOG(context,
-                         "Only 2D tensors supported currently, got %dD.",
-                         NumDimensions(input));
-      return kTfLiteError;
+      SoftmaxQuantized(input, output, params, op_data);
+      return kTfLiteOk;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Only int8_t supported currently, got %d.",
+      TF_LITE_KERNEL_LOG(context,
+                         "Only int8_t input supported currently, got %d.",
                          input->type);
       return kTfLiteError;
   }