Port the cmsis-nn optimized kernels to the new TfLiteEvalTensor API.

PiperOrigin-RevId: 325082800 Change-Id: Ib7ce474449f1f8b537c5965dbdf685d1984cf983
2020-08-05 12:57:01 -07:00 · 2020-08-05 12:57:01 -07:00 · 8846105326
commit 8846105326
parent a07effec7d
7 changed files with 476 additions and 340 deletions
--- a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/memory_helpers.h"

 namespace tflite {
@ -96,18 +97,20 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
 }

 void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
-             const OpData* data, const TfLiteTensor* input1,
-             const TfLiteTensor* input2, TfLiteTensor* output) {
+             const OpData* data, const TfLiteEvalTensor* input1,
+             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
  float output_activation_min, output_activation_max;
  CalculateActivationRange(params->activation, &output_activation_min,
                           &output_activation_max);
  tflite::ArithmeticParams op_params;
  SetActivationParams(output_activation_min, output_activation_max, &op_params);
-#define TF_LITE_ADD(opname)                                                   \
-  reference_ops::opname(op_params, GetTensorShape(input1),                    \
-                        GetTensorData<float>(input1), GetTensorShape(input2), \
-                        GetTensorData<float>(input2), GetTensorShape(output), \
-                        GetTensorData<float>(output))
+#define TF_LITE_ADD(opname)                                               \
+  reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
+                        tflite::micro::GetTensorData<float>(input1),      \
+                        tflite::micro::GetTensorShape(input2),            \
+                        tflite::micro::GetTensorData<float>(input2),      \
+                        tflite::micro::GetTensorShape(output),            \
+                        tflite::micro::GetTensorData<float>(output))
  if (data->requires_broadcast) {
    TF_LITE_ADD(BroadcastAdd4DSlow);
  } else {
@ -118,9 +121,9 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,

 TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
                              TfLiteAddParams* params, const OpData* data,
-                              const TfLiteTensor* input1,
-                              const TfLiteTensor* input2,
-                              TfLiteTensor* output) {
+                              const TfLiteEvalTensor* input1,
+                              const TfLiteEvalTensor* input2,
+                              TfLiteEvalTensor* output) {
  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
    tflite::ArithmeticParams op_params;
    op_params.left_shift = data->left_shift;
@ -136,27 +139,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
    SetActivationParams(data->output_activation_min,
                        data->output_activation_max, &op_params);
    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        GetTensorShape(input1), GetTensorShape(input2), &op_params);
-#define TF_LITE_ADD(type, opname, dtype)                             \
-  type::opname(op_params, GetTensorShape(input1),                    \
-               GetTensorData<dtype>(input1), GetTensorShape(input2), \
-               GetTensorData<dtype>(input2), GetTensorShape(output), \
-               GetTensorData<dtype>(output));
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorShape(input2), &op_params);
+#define TF_LITE_ADD(type, opname, dtype)                         \
+  type::opname(op_params, tflite::micro::GetTensorShape(input1), \
+               tflite::micro::GetTensorData<dtype>(input1),      \
+               tflite::micro::GetTensorShape(input2),            \
+               tflite::micro::GetTensorData<dtype>(input2),      \
+               tflite::micro::GetTensorShape(output),            \
+               tflite::micro::GetTensorData<dtype>(output));
    if (output->type == kTfLiteInt8) {
      if (need_broadcast) {
        TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
      } else {
        arm_elementwise_add_s8(
-            GetTensorData<int8_t>(input1), GetTensorData<int8_t>(input2),
+            tflite::micro::GetTensorData<int8_t>(input1),
+            tflite::micro::GetTensorData<int8_t>(input2),
            op_params.input1_offset, op_params.input1_multiplier,
            op_params.input1_shift, op_params.input2_offset,
            op_params.input2_multiplier, op_params.input2_shift,
-            op_params.left_shift, GetTensorData<int8_t>(output),
+            op_params.left_shift, tflite::micro::GetTensorData<int8_t>(output),
            op_params.output_offset, op_params.output_multiplier,
            op_params.output_shift, op_params.quantized_activation_min,
            op_params.quantized_activation_max,
-            MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2),
-                                 GetTensorShape(output)));
+            MatchingElementsSize(tflite::micro::GetTensorShape(input1),
+                                 tflite::micro::GetTensorShape(input2),
+                                 tflite::micro::GetTensorShape(output)));
      }
    } else {
      if (need_broadcast) {
@ -196,9 +204,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);

-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

  TFLITE_DCHECK(node->user_data != nullptr);
  const OpData* data = static_cast<const OpData*>(node->user_data);
--- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc
@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"

 namespace tflite {
 namespace ops {
@ -43,6 +44,12 @@ constexpr int kConvQuantizedDimension = 0;

 struct OpData {
  TfLitePaddingValues padding;
+
+  // Cached tensor zero point values for quantized operations.
+  int32_t input_zero_point;
+  int32_t filter_zero_point;
+  int32_t output_zero_point;
+
  // The scaling factor from input to output (aka the 'real multiplier') can
  // be represented as a fixed point multiplier plus a left shift.
  int32_t output_multiplier;
@ -57,6 +64,9 @@ struct OpData {
  // uint8_t these would be 0 and 255.
  int32_t output_activation_min;
  int32_t output_activation_max;
+
+  // Index to buffer for optimizations if applicable.
+  int buffer_idx;
 };

 inline PaddingType RuntimePaddingType(TfLitePadding padding) {
@ -110,16 +120,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,

 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(int));
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }

 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 #if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE)
-  OpData data;
  int32_t buf_size = 0;

+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
  auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
-
+  auto* data = reinterpret_cast<OpData*>(node->user_data);
  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
  const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
@ -148,11 +159,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  output_dims.w = output->dims->data[2];
  output_dims.c = output_shape.Dims(3);

-  int* buffer_idx = reinterpret_cast<int*>(node->user_data);
-
  TF_LITE_ENSURE_STATUS(CalculateOpData(
      context, node, params, input_dims.w, input_dims.h, filter_dims.w,
-      filter_dims.h, output_dims.w, output_dims.h, input->type, &data));
+      filter_dims.h, output_dims.w, output_dims.h, input->type, data));
+
+  data->input_zero_point = input->params.zero_point;
+  data->filter_zero_point = filter->params.zero_point;
+  data->output_zero_point = output->params.zero_point;

  if (input->type == kTfLiteInt8) {
    // Initialize cmsis-nn convolution parameters
@ -163,40 +176,41 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    conv_params.stride.w = params->stride_width;
    conv_params.dilation.h = params->dilation_height_factor;
    conv_params.dilation.w = params->dilation_width_factor;
-    conv_params.padding.h = data.padding.height;
-    conv_params.padding.w = data.padding.width;
-    conv_params.activation.min = data.output_activation_min;
-    conv_params.activation.max = data.output_activation_max;
+    conv_params.padding.h = data->padding.height;
+    conv_params.padding.w = data->padding.width;
+    conv_params.activation.min = data->output_activation_min;
+    conv_params.activation.max = data->output_activation_max;

    buf_size = arm_convolve_wrapper_s8_get_buffer_size(
        &conv_params, &input_dims, &filter_dims, &output_dims);
  }

-  node->user_data = buffer_idx;
  if (buf_size > 0) {
-    TF_LITE_ENSURE_STATUS(
-        context->RequestScratchBufferInArena(context, buf_size, buffer_idx));
+    TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
+        context, buf_size, &data->buffer_idx));
  } else {
-    *buffer_idx = -1;
+    data->buffer_idx = -1;
  }
 #endif
  return kTfLiteOk;
 }

 TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                           TfLiteConvParams* params, OpData* data,
-                           const TfLiteTensor* input,
-                           const TfLiteTensor* filter, const TfLiteTensor* bias,
-                           TfLiteTensor* im2col, TfLiteTensor* hwcn_weights,
-                           TfLiteTensor* output) {
-  const int32_t input_offset = -input->params.zero_point;
-  const int32_t filter_offset = -filter->params.zero_point;
-  const int32_t output_offset = output->params.zero_point;
+                           TfLiteConvParams* params, const OpData& data,
+                           const TfLiteEvalTensor* input,
+                           const TfLiteEvalTensor* filter,
+                           const TfLiteEvalTensor* bias,
+                           TfLiteEvalTensor* im2col,
+                           TfLiteEvalTensor* hwcn_weights,
+                           TfLiteEvalTensor* output) {
+  const int32_t input_offset = -data.input_zero_point;
+  const int32_t filter_offset = -data.filter_zero_point;
+  const int32_t output_offset = data.output_zero_point;

  ConvParams op_params;
  op_params.padding_type = RuntimePaddingType(params->padding);
-  op_params.padding_values.width = data->padding.width;
-  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data.padding.width;
+  op_params.padding_values.height = data.padding.height;
  op_params.stride_width = params->stride_width;
  op_params.stride_height = params->stride_height;
  op_params.dilation_width_factor = params->dilation_width_factor;
@ -204,46 +218,52 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
  op_params.input_offset = input_offset;
  op_params.weights_offset = filter_offset;
  op_params.output_offset = output_offset;
-  op_params.output_multiplier = data->output_multiplier;
-  op_params.output_shift = -data->output_shift;
-  op_params.quantized_activation_min = data->output_activation_min;
-  op_params.quantized_activation_max = data->output_activation_max;
-  reference_ops::Conv(op_params, GetTensorShape(input),
-                      GetTensorData<uint8_t>(input), GetTensorShape(filter),
-                      GetTensorData<uint8_t>(filter), GetTensorShape(bias),
-                      GetTensorData<int32_t>(bias), GetTensorShape(output),
-                      GetTensorData<uint8_t>(output), GetTensorShape(im2col),
-                      GetTensorData<uint8_t>(im2col), nullptr);
+  op_params.output_multiplier = data.output_multiplier;
+  op_params.output_shift = -data.output_shift;
+  op_params.quantized_activation_min = data.output_activation_min;
+  op_params.quantized_activation_max = data.output_activation_max;
+  reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
+                      tflite::micro::GetTensorData<uint8_t>(input),
+                      tflite::micro::GetTensorShape(filter),
+                      tflite::micro::GetTensorData<uint8_t>(filter),
+                      tflite::micro::GetTensorShape(bias),
+                      tflite::micro::GetTensorData<int32_t>(bias),
+                      tflite::micro::GetTensorShape(output),
+                      tflite::micro::GetTensorData<uint8_t>(output),
+                      tflite::micro::GetTensorShape(im2col),
+                      tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
  return kTfLiteOk;
 }

 TfLiteStatus EvalQuantizedPerChannel(
    TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params,
-    OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter,
-    const TfLiteTensor* bias, TfLiteTensor* output, TfLiteTensor* im2col) {
+    const OpData& data, const TfLiteEvalTensor* input,
+    const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
+    TfLiteEvalTensor* output, TfLiteEvalTensor* im2col) {
  // Initialize cmsis-nn convolution parameters
  cmsis_nn_conv_params conv_params;
-  conv_params.input_offset = -input->params.zero_point;
-  conv_params.output_offset = output->params.zero_point;
+  conv_params.input_offset = -data.input_zero_point;
+  conv_params.output_offset = data.output_zero_point;
  conv_params.stride.h = params->stride_height;
  conv_params.stride.w = params->stride_width;
  conv_params.dilation.h = params->dilation_height_factor;
  conv_params.dilation.w = params->dilation_width_factor;
-  conv_params.padding.h = data->padding.height;
-  conv_params.padding.w = data->padding.width;
-  conv_params.activation.min = data->output_activation_min;
-  conv_params.activation.max = data->output_activation_max;
+  conv_params.padding.h = data.padding.height;
+  conv_params.padding.w = data.padding.width;
+  conv_params.activation.min = data.output_activation_min;
+  conv_params.activation.max = data.output_activation_max;

  // Initialize cmsis-nn per channel quantization parameters
  cmsis_nn_per_channel_quant_params quant_params;
-  quant_params.multiplier = data->per_channel_output_multiplier;
-  quant_params.shift = data->per_channel_output_shift;
+  quant_params.multiplier =
+      const_cast<int32_t*>(data.per_channel_output_multiplier);
+  quant_params.shift = const_cast<int32_t*>(data.per_channel_output_shift);

 #if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE)
-  RuntimeShape filter_shape = GetTensorShape(filter);
-  RuntimeShape input_shape = GetTensorShape(input);
-  RuntimeShape output_shape = GetTensorShape(output);
-  RuntimeShape bias_shape = GetTensorShape(bias);
+  RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
+  RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+  RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);

  // Consistency check.
  TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
@ -253,7 +273,7 @@ TfLiteStatus EvalQuantizedPerChannel(
  const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-  if (GetTensorData<int8_t>(bias)) {
+  if (tflite::micro::GetTensorData<int8_t>(bias)) {
    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
  }

@ -291,9 +311,8 @@ TfLiteStatus EvalQuantizedPerChannel(
  ctx.buf = nullptr;
  ctx.size = 0;

-  auto* buffer_idx = reinterpret_cast<int*>(node->user_data);
-  if (*buffer_idx > -1) {
-    ctx.buf = context->GetScratchBuffer(context, *buffer_idx);
+  if (data.buffer_idx > -1) {
+    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
    // Note: ctx.size is currently not used in cmsis-nn.
    // The buffer should be allocated in the Prepare function through
    // arm_convolve_wrapper_s8_get_buffer_size
@ -303,9 +322,10 @@ TfLiteStatus EvalQuantizedPerChannel(
  // the parameters passed
  arm_status status = arm_convolve_wrapper_s8(
      &ctx, &conv_params, &quant_params, &input_dims,
-      GetTensorData<int8_t>(input), &filter_dims, GetTensorData<int8_t>(filter),
-      &bias_dims, GetTensorData<int32_t>(bias), &output_dims,
-      GetTensorData<int8_t>(output));
+      tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+      tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+      tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
+      tflite::micro::GetTensorData<int8_t>(output));

  if (status == ARM_MATH_SUCCESS) {
    return kTfLiteOk;
@ -318,42 +338,47 @@ TfLiteStatus EvalQuantizedPerChannel(
    "CMSIS-NN optimization for conv not available for this target. Using reference kernel.")

  ConvParams op_params;
-  op_params.input_offset = -input->params.zero_point;
-  op_params.output_offset = output->params.zero_point;
+  conv_params.input_offset = -data.input_zero_point;
+  conv_params.output_offset = data.output_zero_point;
  op_params.stride_height = params->stride_height;
  op_params.stride_width = params->stride_width;
  op_params.dilation_height_factor = params->dilation_height_factor;
  op_params.dilation_width_factor = params->dilation_width_factor;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data.padding.height;
+  op_params.padding_values.width = data.padding.width;
  op_params.quantized_activation_min = data->output_activation_min;
  op_params.quantized_activation_max = data->output_activation_max;

  reference_integer_ops::ConvPerChannel(
      op_params, data->per_channel_output_multiplier,
-      data->per_channel_output_shift, GetTensorShape(input),
-      GetTensorData<int8_t>(input), GetTensorShape(filter),
-      GetTensorData<int8_t>(filter), GetTensorShape(bias),
-      GetTensorData<int32_t>(bias), GetTensorShape(output),
-      GetTensorData<int8_t>(output));
+      data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<int8_t>(input),
+      tflite::micro::GetTensorShape(filter),
+      tflite::micro::GetTensorData<int8_t>(filter),
+      tflite::micro::GetTensorShape(bias),
+      tflite::micro::GetTensorData<int32_t>(bias),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<int8_t>(output));

 #endif
  return kTfLiteOk;
 }

 TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
-                       TfLiteConvParams* params, OpData* data,
-                       const TfLiteTensor* input, const TfLiteTensor* filter,
-                       const TfLiteTensor* bias, TfLiteTensor* im2col,
-                       TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
+                       TfLiteConvParams* params, const OpData& data,
+                       const TfLiteEvalTensor* input,
+                       const TfLiteEvalTensor* filter,
+                       const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col,
+                       TfLiteEvalTensor* hwcn_weights,
+                       TfLiteEvalTensor* output) {
  float output_activation_min, output_activation_max;
  CalculateActivationRange(params->activation, &output_activation_min,
                           &output_activation_max);
-
+  // TODO(b/154032858): Investigate removing extra copies.
  ConvParams op_params;
  op_params.padding_type = RuntimePaddingType(params->padding);
-  op_params.padding_values.width = data->padding.width;
-  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data.padding.width;
+  op_params.padding_values.height = data.padding.height;
  op_params.stride_width = params->stride_width;
  op_params.stride_height = params->stride_height;
  op_params.dilation_width_factor = params->dilation_width_factor;
@ -361,66 +386,47 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
  op_params.float_activation_min = output_activation_min;
  op_params.float_activation_max = output_activation_max;

-  reference_ops::Conv(op_params, GetTensorShape(input),
-                      GetTensorData<float>(input), GetTensorShape(filter),
-                      GetTensorData<float>(filter), GetTensorShape(bias),
-                      GetTensorData<float>(bias), GetTensorShape(output),
-                      GetTensorData<float>(output), GetTensorShape(im2col),
-                      GetTensorData<float>(im2col));
+  reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
+                      tflite::micro::GetTensorData<float>(input),
+                      tflite::micro::GetTensorShape(filter),
+                      tflite::micro::GetTensorData<float>(filter),
+                      tflite::micro::GetTensorShape(bias),
+                      tflite::micro::GetTensorData<float>(bias),
+                      tflite::micro::GetTensorShape(output),
+                      tflite::micro::GetTensorData<float>(output),
+                      tflite::micro::GetTensorShape(im2col),
+                      tflite::micro::GetTensorData<float>(im2col));
  return kTfLiteOk;
 }

 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);

-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kFilterTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kBiasTensor)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

-  int input_width = input->dims->data[2];
-  int input_height = input->dims->data[1];
-  int filter_width = filter->dims->data[2];
-  int filter_height = filter->dims->data[1];
-  int output_width = output->dims->data[2];
-  int output_height = output->dims->data[1];
-
-  OpData data;
-
-  // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
-                      kTfLiteAffineQuantization);
-
-    const auto* affine_quantization =
-        reinterpret_cast<TfLiteAffineQuantization*>(
-            filter->quantization.params);
-    TF_LITE_ENSURE(context, affine_quantization);
-    TF_LITE_ENSURE(context, affine_quantization->scale);
-    TF_LITE_ENSURE(context, affine_quantization->zero_point);
-    TF_LITE_ENSURE(context,
-                   affine_quantization->scale->size == 1 ||
-                       affine_quantization->scale->size ==
-                           filter->dims->data[kConvQuantizedDimension]);
-    TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
-                      affine_quantization->zero_point->size);
-  }
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(
-      context, node, params, input_width, input_height, filter_width,
-      filter_height, output_width, output_height, input->type, &data));
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));

  switch (input->type) {  // Already know in/out types are same.
    case kTfLiteFloat32:
-      return EvalFloat(context, node, params, &data, input, filter, bias,
-                       nullptr, nullptr, output);
+      EvalFloat(context, node, params, data, input, filter, bias, nullptr,
+                nullptr, output);
      break;
    case kTfLiteInt8:
-      return EvalQuantizedPerChannel(context, node, params, &data, input,
-                                     filter, bias, output, nullptr);
+      return EvalQuantizedPerChannel(context, node, params, data, input, filter,
+                                     bias, output, nullptr);
      break;
    case kTfLiteUInt8:
-      return EvalQuantized(context, node, params, &data, input, filter, bias,
+      return EvalQuantized(context, node, params, data, input, filter, bias,
                           nullptr, nullptr, output);
      break;
    default:
--- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc
@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"

 namespace tflite {
 namespace ops {
@ -44,6 +45,12 @@ constexpr int kDepthwiseConvQuantizedDimension = 3;

 struct OpData {
  TfLitePaddingValues padding;
+
+  // Cached tensor zero point values for quantized operations.
+  int32_t input_zero_point;
+  int32_t filter_zero_point;
+  int32_t output_zero_point;
+
  // The scaling factor from input to output (aka the 'real multiplier') can
  // be represented as a fixed point multiplier plus a left shift.
  int32_t output_multiplier;
@ -115,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {

  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);

  const TfLiteType data_type = input->type;
  int width = SizeOfDimension(input, 2);
@ -150,8 +158,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                                        filter_width, filter_height, data_type,
                                        data));

+  data->input_zero_point = input->params.zero_point;
+  data->filter_zero_point = filter->params.zero_point;
+  data->output_zero_point = output->params.zero_point;
+
  if (input->type == kTfLiteInt8) {
-    const TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
    RuntimeShape input_shape = GetTensorShape(input);
    RuntimeShape output_shape = GetTensorShape(output);
    RuntimeShape filter_shape = GetTensorShape(filter);
@ -200,8 +211,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {

 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
               TfLiteDepthwiseConvParams* params, const OpData* data,
-               const TfLiteTensor* input, const TfLiteTensor* filter,
-               const TfLiteTensor* bias, TfLiteTensor* output) {
+               const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
+               const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
  float output_activation_min, output_activation_max;
  CalculateActivationRange(params->activation, &output_activation_min,
                           &output_activation_max);
@ -220,25 +231,30 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
  op_params.float_activation_max = output_activation_max;

  tflite::reference_ops::DepthwiseConv(
-      op_params, GetTensorShape(input), GetTensorData<float>(input),
-      GetTensorShape(filter), GetTensorData<float>(filter),
-      GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
-      GetTensorData<float>(output));
+      op_params, tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<float>(input),
+      tflite::micro::GetTensorShape(filter),
+      tflite::micro::GetTensorData<float>(filter),
+      tflite::micro::GetTensorShape(bias),
+      tflite::micro::GetTensorData<float>(bias),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<float>(output));
 }

 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
                             TfLiteDepthwiseConvParams* params, OpData* data,
-                             const TfLiteTensor* input,
-                             const TfLiteTensor* filter,
-                             const TfLiteTensor* bias, TfLiteTensor* output) {
+                             const TfLiteEvalTensor* input,
+                             const TfLiteEvalTensor* filter,
+                             const TfLiteEvalTensor* bias,
+                             TfLiteEvalTensor* output) {
  cmsis_nn_dw_conv_params dw_conv_params;
  dw_conv_params.dilation.h = params->dilation_height_factor;
  dw_conv_params.dilation.w = params->dilation_width_factor;
  // Call to reference implementation can be removed when dilation is supported
  // in the optimized implementations.
  if (1 == dw_conv_params.dilation.h && 1 == dw_conv_params.dilation.w) {
-    dw_conv_params.input_offset = -input->params.zero_point;
-    dw_conv_params.output_offset = output->params.zero_point;
+    dw_conv_params.input_offset = -data->input_zero_point;
+    dw_conv_params.output_offset = data->output_zero_point;
    dw_conv_params.stride.h = params->stride_height;
    dw_conv_params.stride.w = params->stride_width;
    dw_conv_params.padding.h = data->padding.height;
@ -252,10 +268,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
    quant_params.multiplier = data->per_channel_output_multiplier;
    quant_params.shift = data->per_channel_output_shift;

-    RuntimeShape filter_shape = GetTensorShape(filter);
-    RuntimeShape input_shape = GetTensorShape(input);
-    RuntimeShape output_shape = GetTensorShape(output);
-    RuntimeShape bias_shape = GetTensorShape(bias);
+    RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
+    RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+    RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);

    TFLITE_DCHECK_LE(dw_conv_params.activation.min,
                     dw_conv_params.activation.max);
@ -263,7 +279,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
    const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);

-    if (GetTensorData<int8_t>(bias)) {
+    if (tflite::micro::GetTensorData<int8_t>(bias)) {
      TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
    }

@ -300,13 +316,14 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
      ctx.buf = context->GetScratchBuffer(context, data->buffer_idx);
    }

-    TFLITE_DCHECK_EQ(arm_depthwise_conv_wrapper_s8(
-                         &ctx, &dw_conv_params, &quant_params, &input_dims,
-                         GetTensorData<int8_t>(input), &filter_dims,
-                         GetTensorData<int8_t>(filter), &bias_dims,
-                         GetTensorData<int32_t>(bias), &output_dims,
-                         GetTensorData<int8_t>(output)),
-                     ARM_MATH_SUCCESS);
+    TFLITE_DCHECK_EQ(
+        arm_depthwise_conv_wrapper_s8(
+            &ctx, &dw_conv_params, &quant_params, &input_dims,
+            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+            tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
+            tflite::micro::GetTensorData<int8_t>(output)),
+        ARM_MATH_SUCCESS);
  } else {
    DepthwiseParams op_params;
    op_params.padding_type = PaddingType::kSame;
@ -317,30 +334,34 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
    op_params.dilation_width_factor = params->dilation_width_factor;
    op_params.dilation_height_factor = params->dilation_height_factor;
    op_params.depth_multiplier = params->depth_multiplier;
-    op_params.input_offset = -input->params.zero_point;
+    op_params.input_offset = -data->input_zero_point;
    op_params.weights_offset = 0;
-    op_params.output_offset = output->params.zero_point;
+    op_params.output_offset = data->output_zero_point;
    // TODO(b/130439627): Use calculated value for clamping.
    op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
    op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();

    reference_integer_ops::DepthwiseConvPerChannel(
        op_params, data->per_channel_output_multiplier,
-        data->per_channel_output_shift, GetTensorShape(input),
-        GetTensorData<int8_t>(input), GetTensorShape(filter),
-        GetTensorData<int8_t>(filter), GetTensorShape(bias),
-        GetTensorData<int32_t>(bias), GetTensorShape(output),
-        GetTensorData<int8_t>(output));
+        data->per_channel_output_shift, tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<int8_t>(input),
+        tflite::micro::GetTensorShape(filter),
+        tflite::micro::GetTensorData<int8_t>(filter),
+        tflite::micro::GetTensorShape(bias),
+        tflite::micro::GetTensorData<int32_t>(bias),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
  }
 }

 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
                   TfLiteDepthwiseConvParams* params, const OpData* data,
-                   const TfLiteTensor* input, const TfLiteTensor* filter,
-                   const TfLiteTensor* bias, TfLiteTensor* output) {
-  const int32_t input_offset = -input->params.zero_point;
-  const int32_t filter_offset = -filter->params.zero_point;
-  const int32_t output_offset = output->params.zero_point;
+                   const TfLiteEvalTensor* input,
+                   const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
+                   TfLiteEvalTensor* output) {
+  const int32_t input_offset = -data->input_zero_point;
+  const int32_t filter_offset = -data->filter_zero_point;
+  const int32_t output_offset = data->output_zero_point;

  tflite::DepthwiseParams op_params;
  // Padding type is ignored, but still set.
@ -363,34 +384,39 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,

  if (1 == op_params.dilation_width_factor &&
      1 == op_params.dilation_height_factor) {
-    RuntimeShape filter_shape = GetTensorShape(filter);
+    RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
    const int filter_height = filter_shape.Dims(1);
    const int filter_width = filter_shape.Dims(2);
-    RuntimeShape input_shape = GetTensorShape(input);
+    RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
    const int input_height = input_shape.Dims(1);
    const int input_width = input_shape.Dims(2);
    const int input_depth = input_shape.Dims(3);
-    RuntimeShape output_shape = GetTensorShape(output);
+    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
    const int output_height = output_shape.Dims(1);
    const int output_width = output_shape.Dims(2);
    arm_depthwise_conv_u8_basic_ver1(
-        GetTensorData<uint8_t>(input), input_width, input_height, input_depth,
-        GetTensorData<uint8_t>(filter), filter_width, filter_height,
-        op_params.depth_multiplier, op_params.padding_values.width,
-        op_params.padding_values.height, op_params.stride_width,
-        op_params.stride_height, op_params.dilation_width_factor,
-        op_params.dilation_height_factor, GetTensorData<int32_t>(bias),
-        op_params.input_offset, op_params.weights_offset,
-        op_params.output_offset, GetTensorData<uint8_t>(output), output_width,
+        tflite::micro::GetTensorData<uint8_t>(input), input_width, input_height,
+        input_depth, tflite::micro::GetTensorData<uint8_t>(filter),
+        filter_width, filter_height, op_params.depth_multiplier,
+        op_params.padding_values.width, op_params.padding_values.height,
+        op_params.stride_width, op_params.stride_height,
+        op_params.dilation_width_factor, op_params.dilation_height_factor,
+        tflite::micro::GetTensorData<int32_t>(bias), op_params.input_offset,
+        op_params.weights_offset, op_params.output_offset,
+        tflite::micro::GetTensorData<uint8_t>(output), output_width,
        output_height, op_params.quantized_activation_min,
        op_params.quantized_activation_max, op_params.output_shift,
        op_params.output_multiplier);
  } else {
    tflite::reference_ops::DepthwiseConv(
-        op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
-        GetTensorShape(filter), GetTensorData<uint8_t>(filter),
-        GetTensorShape(bias), GetTensorData<int32_t>(bias),
-        GetTensorShape(output), GetTensorData<uint8_t>(output));
+        op_params, tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<uint8_t>(input),
+        tflite::micro::GetTensorShape(filter),
+        tflite::micro::GetTensorData<uint8_t>(filter),
+        tflite::micro::GetTensorShape(bias),
+        tflite::micro::GetTensorData<int32_t>(bias),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<uint8_t>(output));
  }
 }

@ -402,11 +428,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
      reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
  OpData& data = *(static_cast<OpData*>(node->user_data));

-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
-  const TfLiteTensor* bias =
-      (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kFilterTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kBiasTensor)
+          : nullptr;

  // TODO(aselle): Consider whether float conv and quantized conv should be
  // separate ops to avoid dispatch overhead here.
--- a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"

 namespace tflite {
 namespace ops {
@ -43,6 +44,11 @@ struct OpData {
  int input_quantized_index;
  // Index to buffer for optimizations if applicable.
  int buffer_idx;
+
+  // Cached tensor zero point values for quantized operations.
+  int32_t input_zero_point;
+  int32_t filter_zero_point;
+  int32_t output_zero_point;
 };

 constexpr int kInputTensor = 0;
@ -69,6 +75,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
        context, activation, output, &data->output_activation_min,
        &data->output_activation_max));
+    data->input_zero_point = input->params.zero_point;
+    data->filter_zero_point = filter->params.zero_point;
+    data->output_zero_point = output->params.zero_point;
  }
  return status;
 }
@ -125,25 +134,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }

 TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
-                               const OpData& data, const TfLiteTensor* input,
-                               const TfLiteTensor* filter,
-                               const TfLiteTensor* bias, TfLiteTensor* output) {
+                               const OpData& data,
+                               const TfLiteEvalTensor* input,
+                               const TfLiteEvalTensor* filter,
+                               const TfLiteEvalTensor* bias,
+                               TfLiteEvalTensor* output) {
  // The 'if' condition can be removed when null handling of bias is added to
  // arm_fully_connected_s8
-  if (nullptr != GetTensorData<int32_t>(bias)) {
-    RuntimeShape output_shape = GetTensorShape(output);
+  if (nullptr != tflite::micro::GetTensorData<int32_t>(bias)) {
+    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
    const int batches = output_shape.Dims(0);
    const int output_depth = output_shape.Dims(1);
-    const RuntimeShape filter_shape = GetTensorShape(filter);
+    const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
    const int filter_dim_count = filter_shape.DimensionsCount();
    const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-    const RuntimeShape input_shape = GetTensorShape(input);
+    const RuntimeShape input_shape = tflite::micro::GetTensorShape(input);

    cmsis_nn_fc_params fc_params;
-    fc_params.input_offset = -input->params.zero_point;
-    fc_params.filter_offset = -filter->params.zero_point;
-    fc_params.output_offset = output->params.zero_point;
+    fc_params.input_offset = -data.input_zero_point;
+    fc_params.output_offset = data.output_zero_point;
    fc_params.activation.min = data.output_activation_min;
    fc_params.activation.max = data.output_activation_max;

@ -186,17 +196,18 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,

    TF_LITE_ENSURE_EQ(
        context,
-        arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims,
-                               GetTensorData<int8_t>(input), &filter_dims,
-                               GetTensorData<int8_t>(filter), &bias_dims,
-                               GetTensorData<int32_t>(bias), &output_dims,
-                               GetTensorData<int8_t>(output)),
+        arm_fully_connected_s8(
+            &ctx, &fc_params, &quant_params, &input_dims,
+            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+            tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
+            tflite::micro::GetTensorData<int8_t>(output)),
        ARM_MATH_SUCCESS);
  } else {
    tflite::FullyConnectedParams op_params;
-    op_params.input_offset = -input->params.zero_point;
-    op_params.weights_offset = -filter->params.zero_point;
-    op_params.output_offset = output->params.zero_point;
+    op_params.input_offset = -data.input_zero_point;
+    op_params.weights_offset = -data.filter_zero_point;
+    op_params.output_offset = data.output_zero_point;
    op_params.output_multiplier = data.output_multiplier;
    // TODO(b/138810107): Figure out whether output shift should be inverted
    op_params.output_shift = -data.output_shift;
@ -204,21 +215,26 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
    op_params.quantized_activation_max = data.output_activation_max;

    reference_integer_ops::FullyConnected(
-        op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
-        GetTensorShape(filter), GetTensorData<int8_t>(filter),
-        GetTensorShape(bias), GetTensorData<int32_t>(bias),
-        GetTensorShape(output), GetTensorData<int8_t>(output));
+        op_params, tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<int8_t>(input),
+        tflite::micro::GetTensorShape(filter),
+        tflite::micro::GetTensorData<int8_t>(filter),
+        tflite::micro::GetTensorShape(bias),
+        tflite::micro::GetTensorData<int32_t>(bias),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
  }
  return kTfLiteOk;
 }

 TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                           const OpData& data, const TfLiteTensor* input,
-                           const TfLiteTensor* filter, const TfLiteTensor* bias,
-                           TfLiteTensor* output) {
-  const int32_t input_offset = -input->params.zero_point;
-  const int32_t filter_offset = -filter->params.zero_point;
-  const int32_t output_offset = output->params.zero_point;
+                           const OpData& data, const TfLiteEvalTensor* input,
+                           const TfLiteEvalTensor* filter,
+                           const TfLiteEvalTensor* bias,
+                           TfLiteEvalTensor* output) {
+  const int32_t input_offset = -data.input_zero_point;
+  const int32_t filter_offset = -data.filter_zero_point;
+  const int32_t output_offset = data.output_zero_point;

  tflite::FullyConnectedParams op_params;
  op_params.input_offset = input_offset;
@ -230,12 +246,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
  op_params.quantized_activation_min = data.output_activation_min;
  op_params.quantized_activation_max = data.output_activation_max;

-#define TF_LITE_FULLY_CONNECTED(output_data_type)                      \
-  reference_ops::FullyConnected(                                       \
-      op_params, GetTensorShape(input), GetTensorData<uint8_t>(input), \
-      GetTensorShape(filter), GetTensorData<uint8_t>(filter),          \
-      GetTensorShape(bias), GetTensorData<int32_t>(bias),              \
-      GetTensorShape(output), GetTensorData<output_data_type>(output))
+#define TF_LITE_FULLY_CONNECTED(output_data_type)      \
+  reference_ops::FullyConnected(                       \
+      op_params, tflite::micro::GetTensorShape(input), \
+      tflite::micro::GetTensorData<uint8_t>(input),    \
+      tflite::micro::GetTensorShape(filter),           \
+      tflite::micro::GetTensorData<uint8_t>(filter),   \
+      tflite::micro::GetTensorShape(bias),             \
+      tflite::micro::GetTensorData<int32_t>(bias),     \
+      tflite::micro::GetTensorShape(output),           \
+      tflite::micro::GetTensorData<output_data_type>(output))
  switch (output->type) {
    case kTfLiteUInt8:
      TF_LITE_FULLY_CONNECTED(uint8_t);
@ -254,8 +274,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,

 TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
                       TfLiteFusedActivation activation,
-                       const TfLiteTensor* input, const TfLiteTensor* filter,
-                       const TfLiteTensor* bias, TfLiteTensor* output) {
+                       const TfLiteEvalTensor* input,
+                       const TfLiteEvalTensor* filter,
+                       const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
  float output_activation_min, output_activation_max;
  CalculateActivationRange(activation, &output_activation_min,
                           &output_activation_max);
@ -263,10 +284,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
  op_params.float_activation_min = output_activation_min;
  op_params.float_activation_max = output_activation_max;
  tflite::reference_ops::FullyConnected(
-      op_params, GetTensorShape(input), GetTensorData<float>(input),
-      GetTensorShape(filter), GetTensorData<float>(filter),
-      GetTensorShape(bias), GetTensorData<float>(bias), GetTensorShape(output),
-      GetTensorData<float>(output));
+      op_params, tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<float>(input),
+      tflite::micro::GetTensorShape(filter),
+      tflite::micro::GetTensorData<float>(filter),
+      tflite::micro::GetTensorShape(bias),
+      tflite::micro::GetTensorData<float>(bias),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<float>(output));
  return kTfLiteOk;
 }

@ -275,10 +300,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  const auto* params =
      static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);

-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      tflite::micro::GetEvalInput(context, node, kBiasTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

  TFLITE_DCHECK(node->user_data != nullptr);
  const OpData& data = *(static_cast<const OpData*>(node->user_data));
--- a/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc
@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/memory_helpers.h"

 namespace tflite {
@ -38,6 +39,11 @@ struct OpData {

  int32_t output_multiplier;
  int output_shift;
+
+  // Cached tensor zero point values for quantized operations.
+  int32_t input1_zero_point;
+  int32_t input2_zero_point;
+  int32_t output_zero_point;
 };

 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
@ -65,6 +71,11 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
  return kTfLiteOk;
 }

+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
@ -74,44 +85,59 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    return AllocateOutputDimensionsFromInput(context, input1, input2, output);
  }

+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
+  data->input1_zero_point = input1->params.zero_point;
+  data->input2_zero_point = input2->params.zero_point;
+  data->output_zero_point = output->params.zero_point;
+  CalculateOpData(context, node, params, data);
+
  return kTfLiteOk;
 }

 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                   TfLiteMulParams* params, OpData* data,
-                   const TfLiteTensor* input1, const TfLiteTensor* input2,
-                   TfLiteTensor* output) {
+                   TfLiteMulParams* params, const OpData& data,
+                   const TfLiteEvalTensor* input1,
+                   const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
  if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) {
    tflite::ArithmeticParams op_params;
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    op_params.input1_offset = -input1->params.zero_point;
-    op_params.input2_offset = -input2->params.zero_point;
-    op_params.output_offset = output->params.zero_point;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
+    SetActivationParams(data.output_activation_min, data.output_activation_max,
+                        &op_params);
+    op_params.input1_offset = -data.input1_zero_point;
+    op_params.input2_offset = -data.input2_zero_point;
+    op_params.output_offset = data.output_zero_point;
+    op_params.output_multiplier = data.output_multiplier;
+    op_params.output_shift = data.output_shift;
    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        GetTensorShape(input1), GetTensorShape(input2), &op_params);
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorShape(input2), &op_params);

-#define TF_LITE_MUL(type, opname, dtype)                             \
-  type::opname(op_params, GetTensorShape(input1),                    \
-               GetTensorData<dtype>(input1), GetTensorShape(input2), \
-               GetTensorData<dtype>(input2), GetTensorShape(output), \
-               GetTensorData<dtype>(output));
+#define TF_LITE_MUL(type, opname, dtype)                         \
+  type::opname(op_params, tflite::micro::GetTensorShape(input1), \
+               tflite::micro::GetTensorData<dtype>(input1),      \
+               tflite::micro::GetTensorShape(input2),            \
+               tflite::micro::GetTensorData<dtype>(input2),      \
+               tflite::micro::GetTensorShape(output),            \
+               tflite::micro::GetTensorData<dtype>(output));

    if (output->type == kTfLiteInt8) {
      if (need_broadcast) {
        TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
      } else {
        arm_elementwise_mul_s8(
-            GetTensorData<int8_t>(input1), GetTensorData<int8_t>(input2),
+            tflite::micro::GetTensorData<int8_t>(input1),
+            tflite::micro::GetTensorData<int8_t>(input2),
            op_params.input1_offset, op_params.input2_offset,
-            GetTensorData<int8_t>(output), op_params.output_offset,
-            op_params.output_multiplier, op_params.output_shift,
-            op_params.quantized_activation_min,
+            tflite::micro::GetTensorData<int8_t>(output),
+            op_params.output_offset, op_params.output_multiplier,
+            op_params.output_shift, op_params.quantized_activation_min,
            op_params.quantized_activation_max,
-            MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2),
-                                 GetTensorShape(output)));
+            MatchingElementsSize(tflite::micro::GetTensorShape(input1),
+                                 tflite::micro::GetTensorShape(input2),
+                                 tflite::micro::GetTensorShape(output)));
      }
    } else if (output->type == kTfLiteUInt8) {
      if (need_broadcast) {
@ -125,9 +151,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 }

 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
-               TfLiteMulParams* params, OpData* data,
-               const TfLiteTensor* input1, const TfLiteTensor* input2,
-               TfLiteTensor* output) {
+               TfLiteMulParams* params, const TfLiteEvalTensor* input1,
+               const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
  float output_activation_min, output_activation_max;
  CalculateActivationRange(params->activation, &output_activation_min,
                           &output_activation_max);
@ -135,12 +160,15 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
  SetActivationParams(output_activation_min, output_activation_max, &op_params);

  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-      GetTensorShape(input1), GetTensorShape(input2), &op_params);
-#define TF_LITE_MUL(opname)                                                   \
-  reference_ops::opname(op_params, GetTensorShape(input1),                    \
-                        GetTensorData<float>(input1), GetTensorShape(input2), \
-                        GetTensorData<float>(input2), GetTensorShape(output), \
-                        GetTensorData<float>(output));
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+#define TF_LITE_MUL(opname)                                               \
+  reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
+                        tflite::micro::GetTensorData<float>(input1),      \
+                        tflite::micro::GetTensorShape(input2),            \
+                        tflite::micro::GetTensorData<float>(input2),      \
+                        tflite::micro::GetTensorShape(output),            \
+                        tflite::micro::GetTensorData<float>(output));

  if (need_broadcast) {
    TF_LITE_MUL(BroadcastMul4DSlow);
@ -152,21 +180,24 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,

 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
-  OpData data;

-  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
-  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInput1Tensor);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInput2Tensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

-  CalculateOpData(context, node, params, &data);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));

  switch (input1->type) {
    case kTfLiteUInt8:
    case kTfLiteInt8:
-      EvalQuantized(context, node, params, &data, input1, input2, output);
+      EvalQuantized(context, node, params, data, input1, input2, output);
      break;
    case kTfLiteFloat32:
-      EvalFloat(context, node, params, &data, input1, input2, output);
+      EvalFloat(context, node, params, input1, input2, output);
      break;
    default:
      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
@ -179,8 +210,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace mul

 TfLiteRegistration Register_MUL() {
-  return {nullptr /* Init */, nullptr /* Free */, nullptr /* Prepare */,
-          mul::Eval};
+  return {mul::Init, nullptr /* Free */, mul::Prepare, mul::Eval};
 }

 }  // namespace micro
--- a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc
@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"

 namespace tflite {
 namespace ops {
@ -72,7 +73,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,

 void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
                      const TfLitePoolParams* params, const OpData& data,
-                      const TfLiteTensor* input, TfLiteTensor* output) {
+                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
  float activation_min, activation_max;
  CalculateActivationRange(params->activation, &activation_min,
                           &activation_max);
@ -86,14 +87,16 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
  op_params.padding_values.width = data.padding.width;
  op_params.float_activation_min = activation_min;
  op_params.float_activation_max = activation_max;
-  reference_ops::AveragePool(
-      op_params, GetTensorShape(input), GetTensorData<float>(input),
-      GetTensorShape(output), GetTensorData<float>(output));
+  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
+                             tflite::micro::GetTensorData<float>(input),
+                             tflite::micro::GetTensorShape(output),
+                             tflite::micro::GetTensorData<float>(output));
 }

 void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
                          const TfLitePoolParams* params, const OpData& data,
-                          const TfLiteTensor* input, TfLiteTensor* output) {
+                          const TfLiteEvalTensor* input,
+                          TfLiteEvalTensor* output) {
  TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);

  PoolParams op_params;
@ -107,14 +110,15 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
  op_params.quantized_activation_max = data.activation_max;

  if (input->type == kTfLiteUInt8) {
-    reference_ops::AveragePool(
-        op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
-        GetTensorShape(output), GetTensorData<uint8_t>(output));
+    reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
+                               tflite::micro::GetTensorData<uint8_t>(input),
+                               tflite::micro::GetTensorShape(output),
+                               tflite::micro::GetTensorData<uint8_t>(output));
  } else {
-    RuntimeShape input_shape = GetTensorShape(input);
+    RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);

-    RuntimeShape output_shape = GetTensorShape(output);
+    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);

    const int depth = MatchingDim(input_shape, 3, output_shape, 3);
@ -154,15 +158,16 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,

    TFLITE_DCHECK_EQ(
        arm_avgpool_s8(&ctx, &pool_params, &input_dims,
-                       GetTensorData<int8_t>(input), &filter_dims, &output_dims,
-                       GetTensorData<int8_t>(output)),
+                       tflite::micro::GetTensorData<int8_t>(input),
+                       &filter_dims, &output_dims,
+                       tflite::micro::GetTensorData<int8_t>(output)),
        ARM_MATH_SUCCESS);
  }
 }

 void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
                  TfLitePoolParams* params, const OpData& data,
-                  TfLiteTensor* input, TfLiteTensor* output) {
+                  const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
  float activation_min, activation_max;
  CalculateActivationRange(params->activation, &activation_min,
                           &activation_max);
@ -175,14 +180,16 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
  op_params.padding_values.width = data.padding.width;
  op_params.float_activation_min = activation_min;
  op_params.float_activation_max = activation_max;
-  reference_ops::MaxPool(op_params, GetTensorShape(input),
-                         GetTensorData<float>(input), GetTensorShape(output),
-                         GetTensorData<float>(output));
+  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<float>(input),
+                         tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<float>(output));
 }

 void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
                           TfLitePoolParams* params, const OpData& data,
-                           TfLiteTensor* input, TfLiteTensor* output) {
+                           const TfLiteEvalTensor* input,
+                           TfLiteEvalTensor* output) {
  tflite::PoolParams op_params;
  op_params.stride_height = params->stride_height;
  op_params.stride_width = params->stride_width;
@ -192,16 +199,18 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
  op_params.padding_values.width = data.padding.width;
  op_params.quantized_activation_min = data.activation_min;
  op_params.quantized_activation_max = data.activation_max;
-  reference_ops::MaxPool(op_params, GetTensorShape(input),
-                         GetTensorData<uint8_t>(input), GetTensorShape(output),
-                         GetTensorData<uint8_t>(output));
+  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<uint8_t>(input),
+                         tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<uint8_t>(output));
 }

 TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node,
                         const TfLitePoolParams* params, const OpData& data,
-                         TfLiteTensor* input, TfLiteTensor* output) {
-  RuntimeShape input_shape = GetTensorShape(input);
-  RuntimeShape output_shape = GetTensorShape(output);
+                         const TfLiteEvalTensor* input,
+                         TfLiteEvalTensor* output) {
+  RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
  const int depth = MatchingDim(input_shape, 3, output_shape, 3);

  cmsis_nn_dims input_dims;
@ -237,10 +246,12 @@ TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node,
    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
  }

-  TFLITE_DCHECK_EQ(arm_max_pool_s8(&ctx, &pool_params, &input_dims,
-                                   GetTensorData<int8_t>(input), &filter_dims,
-                                   &output_dims, GetTensorData<int8_t>(output)),
-                   ARM_MATH_SUCCESS);
+  TFLITE_DCHECK_EQ(
+      arm_max_pool_s8(&ctx, &pool_params, &input_dims,
+                      tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+                      &output_dims,
+                      tflite::micro::GetTensorData<int8_t>(output)),
+      ARM_MATH_SUCCESS);

  return kTfLiteOk;
 }
@ -307,8 +318,10 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {

  const OpData& data = *(static_cast<const OpData*>(node->user_data));

-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

  // Inputs and outputs share the same type, guaranteed by the converter.
  switch (input->type) {
@ -332,9 +345,10 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {

  const OpData& data = *(static_cast<const OpData*>(node->user_data));

-  TfLiteTensor* input = &context->tensors[flatbuffers::EndianScalar(
-      node->inputs->data[kInputTensor])];
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);

  switch (input->type) {
    case kTfLiteFloat32:
--- a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
@ -18,6 +18,7 @@ limitations under the License.
 #include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"

 namespace tflite {
 namespace ops {
@ -47,8 +48,6 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
        TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
      }
    }
-    TF_LITE_ENSURE(context, (output->params.scale == 1.f / 256) ||
-                                (output->params.scale == 1.f / 255));

    static const int kScaledDiffIntegerBits = 5;

@ -71,37 +70,53 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,

 }  // namespace

+void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
+}
+
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+
  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
  const TfLiteTensor* input = GetInput(context, node, 0);
  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);

-  return kTfLiteOk;
+  TfLiteTensor* output = GetOutput(context, node, 0);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
+  return CalculateSoftmaxParams(context, input, output, params, data);
 }

 // Takes a tensor and performs softmax along the last dimension.
-void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output,
+void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
                  const SoftmaxParams& op_data) {
-  tflite::reference_ops::Softmax(
-      op_data, GetTensorShape(input), GetTensorData<float>(input),
-      GetTensorShape(output), GetTensorData<float>(output));
+  tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input),
+                                 tflite::micro::GetTensorData<float>(input),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<float>(output));
 }

-void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
                      const SoftmaxParams& op_data) {
-  const auto input_shape = GetTensorShape(input);
-  const auto output_shape = GetTensorShape(output);
+  const auto input_shape = tflite::micro::GetTensorShape(input);
+  const auto output_shape = tflite::micro::GetTensorShape(output);

  if (input->type == kTfLiteUInt8) {
-    tflite::reference_ops::Softmax(op_data, input_shape,
-                                   GetTensorData<uint8_t>(input), output_shape,
-                                   GetTensorData<uint8_t>(output));
+    tflite::reference_ops::Softmax(
+        op_data, tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<uint8_t>(input),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<uint8_t>(output));
  } else {
    if (output->type == kTfLiteInt16) {
      tflite::reference_ops::Softmax(
-          op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
-          GetTensorShape(output), GetTensorData<int16_t>(output));
+          op_data, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
    } else {
      const int trailing_dim = input_shape.DimensionsCount() - 1;
      const int outer_size =
@ -109,31 +124,30 @@ void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output,
      const int depth =
          MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);

-      arm_softmax_s8(GetTensorData<int8_t>(input), outer_size, depth,
-                     op_data.input_multiplier, op_data.input_left_shift,
-                     op_data.diff_min, GetTensorData<int8_t>(output));
+      arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), outer_size,
+                     depth, op_data.input_multiplier, op_data.input_left_shift,
+                     op_data.diff_min,
+                     tflite::micro::GetTensorData<int8_t>(output));
    }
  }
 }

 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
-  auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);

-  const TfLiteTensor* input = GetInput(context, node, 0);
-  TfLiteTensor* output = GetOutput(context, node, 0);
-
-  SoftmaxParams op_data;
-  TF_LITE_ENSURE_STATUS(
-      CalculateSoftmaxParams(context, input, output, params, &op_data));
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const SoftmaxParams& data =
+      *(static_cast<const SoftmaxParams*>(node->user_data));

  switch (input->type) {
    case kTfLiteFloat32: {
-      SoftmaxFloat(input, output, op_data);
+      SoftmaxFloat(input, output, data);
      return kTfLiteOk;
    }
    case kTfLiteInt8:
    case kTfLiteUInt8: {
-      SoftmaxQuantized(input, output, op_data);
+      SoftmaxQuantized(input, output, data);
      return kTfLiteOk;
    }
    default:
@ -142,10 +156,11 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
      return kTfLiteError;
  }
 }
+
 }  // namespace activations

 TfLiteRegistration Register_SOFTMAX() {
-  return {/*init=*/nullptr,
+  return {/*init=*/activations::SoftmaxInit,
          /*free=*/nullptr,
          /*prepare=*/activations::SoftmaxPrepare,
          /*invoke=*/activations::SoftmaxEval,