diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
index 1ba500fd61b..a1003a84201 100644
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@@ -69,7 +69,6 @@ cc_library(
             "xtensa_hifimini/quantize.cc",
             "xtensa_hifimini/softmax.cc",
             "xtensa_hifimini/svdf.cc",
-            "xtensa_hifimini/utils.h",
         ],
     }),
     hdrs = ["micro_ops.h"],
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
index 7a31eb77491..03eba5082af 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
 #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h"
 
 namespace tflite {
 namespace ops {
@@ -66,7 +65,7 @@ void ConvPerChannel(const ConvParams& params, const int32* output_multiplier,
   const int output_width = output_shape.Dims(2);
   const int output_depth = output_shape.Dims(3);
 
-  ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset);
+  ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset);
   ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset);
   ae_q56s output_activation_min_56 = AE_CVTQ48A32S(output_activation_min);
   ae_q56s output_activation_max_56 = AE_CVTQ48A32S(output_activation_max);
@@ -150,9 +149,6 @@ void ConvPerChannel(const ConvParams& params, const int32* output_multiplier,
               acc_24x2, output_multiplier[out_channel],
               output_shift[out_channel]);
 
-          // Shift from 48bit aligned to 32bit:
-          acc_56 = AE_Q56S_SLAI(acc_56, 16);
-
           // Add output offset, cap activation, and assign to the output:
           acc_56 = AE_ADDQ56(acc_56, output_offset_56);
           acc_56 = AE_MINQ56S(acc_56, output_activation_max_56);
@@ -178,7 +174,7 @@ inline void Conv1x32Input32x32Filter(
     const RuntimeShape& filter_shape, const int8* filter_data,
     const RuntimeShape& bias_shape, const int32* bias_data,
     const RuntimeShape& output_shape, int8* output_data) {
-  ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset);
+  ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset);
   ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset);
   ae_q56s output_activation_max_56 = AE_CVTQ48A32S(quantized_activation_max);
   ae_q56s output_activation_min_56 = AE_CVTQ48A32S(quantized_activation_min);
@@ -227,13 +223,10 @@ inline void Conv1x32Input32x32Filter(
     acc_56 = AE_Q56S_SLAI(acc_56, 8);
     ae_p24x2s acc_24x2 = AE_TRUNCP24Q48(acc_56);
 
-    // Apply quantized multiplier and accumulate result at 48bit
-    // alignment:
+    // Apply quantized multiplier and accumulate result at 48bit alignment.
+    // Convert the (unsigned) 32-bit multiplier down to a 24-bit multiplier.
     acc_56 = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
-        acc_24x2, output_multiplier[ch], output_shift[ch]);
-
-    // Shift from 48bit aligned to 32bit:
-    acc_56 = AE_Q56S_SLAI(acc_56, 16);
+        acc_24x2, output_multiplier[ch] >> 8, output_shift[ch]);
 
     // Add output offset, cap activation, and assign to the output:
     acc_56 = AE_ADDQ56(acc_56, output_offset_56);
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
index 4781f70b1fa..75bc29efdfc 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/padding.h"
 #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h"
 
 namespace tflite {
 namespace ops {
@@ -69,7 +68,7 @@ inline void DepthwiseConvPerChannel(
   const int output_width = output_shape.Dims(2);
   const int output_depth = output_shape.Dims(3);
 
-  ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset);
+  ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset);
   ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset);
   ae_q56s output_activation_min_56 = AE_CVTQ48A32S(output_activation_min);
   ae_q56s output_activation_max_56 = AE_CVTQ48A32S(output_activation_max);
@@ -114,14 +113,14 @@ inline void DepthwiseConvPerChannel(
                   // shift into 24bit space. Note: value is duplicated in the HH
                   // and LL register - but all calculations are done on the HH
                   // side.
-                  ae_p24x2s input_val_24x2 = AE_CONVERT_INT32_24x2(input_val);
+                  ae_p24x2s input_val_24x2 = AE_MOVPA24(input_val);
 
                   // Add input offset (24bit aligned):
                   input_val_24x2 =
                       AE_P24S_ADDS_P24X2S(input_val_24x2, input_offset_24x2);
 
                   // Load filter 8bit value into 24bit alignment:
-                  ae_p24x2s filter_val_24x2 = AE_CONVERT_INT32_24x2(filter_val);
+                  ae_p24x2s filter_val_24x2 = AE_MOVPA24(filter_val);
 
                   // Multiply and accumulate the HH side of each 24x24 PR
                   // register:
@@ -150,9 +149,6 @@ inline void DepthwiseConvPerChannel(
                 acc_24x2, output_multiplier[output_channel],
                 output_shift[output_channel]);
 
-            // Shift from 48bit aligned to 32bit:
-            acc_56 = AE_Q56S_SLAI(acc_56, 16);
-
             // Add output offset, cap activation, and assign to the output:
             acc_56 = AE_ADDQ56(acc_56, output_offset_56);
             acc_56 = AE_MINQ56S(acc_56, output_activation_max_56);
@@ -181,9 +177,10 @@ inline void DepthwiseConv4x32MatchingInputAndFilter(
     const RuntimeShape& filter_shape, const int8* filter_data,
     const RuntimeShape& bias_shape, const int32* bias_data,
     const RuntimeShape& output_shape, int8* output_data) {
-  const int32_t mult = output_multiplier[0];
+  // Convert the (unsigned) 32-bit multiplier down to a 24-bit multiplier.
+  const int32_t mult = output_multiplier[0] >> 8;
   const int32_t shift = output_shift[0];
-  ae_p24x2s input_offset_24x2 = AE_CONVERT_INT32_24x2(input_offset);
+  ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset);
   ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset);
   ae_q56s output_activation_min_56 = AE_CVTQ48A32S(quantized_activation_min);
   ae_q56s output_activation_max_56 = AE_CVTQ48A32S(quantized_activation_max);
@@ -270,10 +267,6 @@ inline void DepthwiseConv4x32MatchingInputAndFilter(
     block_1_acc = micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
         acc_24x2_1, mult, shift);
 
-    // Shift from 48bit aligned to 32bit:
-    block_0_acc = AE_Q56S_SLAI(block_0_acc, 16);
-    block_1_acc = AE_Q56S_SLAI(block_1_acc, 16);
-
     // Add output offset, cap activation, and assign to the output:
     block_0_acc = AE_ADDQ56(block_0_acc, output_offset_56);
     block_1_acc = AE_ADDQ56(block_1_acc, output_offset_56);
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h b/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h
index 4ffb3653f50..918192c4d8f 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h
@@ -23,7 +23,6 @@ limitations under the License.
 #include <cstdint>
 
 #include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h"
 
 namespace tflite {
 namespace ops {
@@ -31,80 +30,9 @@ namespace micro {
 namespace xtensa {
 namespace hifimini {
 
-//
-// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit
-// aligned value in the QR register.
-//
-inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x,
-                                             int32_t quantized_multiplier,
-                                             int shift) {
-  // These boolean factors will carry an additional 2^8 (e.g 256) factor
-  // throughout the equation to cover the missing 8 bits of precision when a
-  // 32bit integer is outside the bounds of INT24. The additional scaling factor
-  // will be adjusted after the final multiplication in this method.
-  //
-  // The Q-notation comments in this method describe the calculations that take
-  // place when both |x| and the shifted value of |1| overflow the INT24 limits.
-  bool x_exceeds_24bits = (x <= INT24_MIN || x >= INT24_MAX);
-  bool shift_exceeds_24bits = false;
-
-  // Q31.0 -> Q23.0 / 2^8
-  ae_p24x2s x_24x2 = AE_CONVERT_INT32_24x2(x);
-
-  if (shift > 0) {
-    int shifted = 1 << shift;
-    if (shifted <= INT24_MIN || shifted >= INT24_MAX) {
-      shift_exceeds_24bits = true;
-    }
-
-    // Load the shifted value into the PR register:
-    // Q31.0 -> Q23.0 / 2^8
-    ae_p24x2s shifted_24x2 = AE_CONVERT_INT32_24x2(shifted);
-
-    // (Q23.0 / 2^8) * (Q23.0 / 2^8) = Q47.0 / 2^16
-    ae_q56s sum_56 = AE_MULP24S_HH(x_24x2, shifted_24x2);
-
-    // Shift left into 24bit space:
-    // ((Q47.0 / 2^16) << 24) = Q23.24 / 2^16
-    sum_56 = AE_Q56S_SLAI(sum_56, 24);
-
-    // Truncate and place on the PR register:
-    // (Q23.24 / 2^16) -> Q23.0 / 2^16
-    x_24x2 = AE_TRUNCP24Q48(sum_56);
-  }
-
-  // Load the quantized multiplier into the PR register.
-  // NOTE: This method assumes that this param has been calculated for 24bit
-  // space - not 32bits.
-  // Q0.31 -> Q0.23
-  ae_p24x2s quantized_multiplier_24x2 =
-      AE_CONVERT_INT32_24x2(quantized_multiplier);
-
-  // Adjust for the additional 8 bits of lost precision throughout this
-  // function:
-  int shift_amount = 23;
-  if (x_exceeds_24bits) {
-    shift_amount = shift_amount - 8;
-  }
-  if (shift_exceeds_24bits) {
-    shift_amount = shift_amount - 8;
-  }
-
-  // Find the product of x and the quantized_multiplier and right shift
-  // to 48bit aligned.
-  // (Q23.0 / 2^16) * Q23.0 = Q47.0 / 2^16
-  // (Q47.0 / 2^16) >> 7 = Q47.0
-  ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2);
-  if (shift_amount > 0) {
-    result_56 = AE_Q56S_SRA(result_56, shift_amount);
-  }
-
-  if (shift < 0) {
-    // Handle any negative shift directly on the 48 bit value.
-    result_56 = AE_Q56S_SRA(result_56, -shift);
-  }
-  return result_56;
-}
+// INT24 MIN/MAX
+#define INT24_MIN -8388608
+#define INT24_MAX 8388607
 
 //
 // Multiply 24bit value by a quantized multiplier (w/ shift) and returns a 48bit
@@ -113,62 +41,62 @@ inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x,
 inline ae_q56s MultiplyByQuantizedMultiplier(ae_p24x2s x_24x2,
                                              int32_t quantized_multiplier,
                                              int shift) {
-  // NOTE: x_24x2 = Q23.0
-
-  // This is an optimized version of a 32 bit MultiplyByQuantizedMultiplier
-  // operation of TFLite. Sometimes, the shifted value of |x_24x2| can exceed
-  // the limits of INT24, which requires |AE_CONVERT_INT32_24x2()| to load the
-  // left-most 24 bits of a 32bit integer. When this occurs, all Q values here
-  // carry an additional division of 2^8 to account for this loss in precision.
-  // This division will be applied to the final shift after multiplication.
+  // A value with 1 sign bit, N integer bits and M fractional bits is
+  // represented as QN+1.M since the sign bit is included in the integer bits.
+  //
+  // The Q notation in this method explains the values represented in each
+  // variable, along with an implicit division since the quantized_multiplier
+  // represents a value between 0.5 and 1.0 (Q1.X-1 where X is the bit precision
+  // of the type).
   //
-  // The Q-notation comments in this method describe the calculations that take
-  // place when both |x| and the shifted value of |1| overflow the INT24 limits.
-  bool shift_exceeds_24bits = false;
-
-  ae_p24x2s x_shifted_24x2 = x_24x2;
-  if (shift > 0) {
-    int shifted = 1 << shift;
-    if (shifted <= INT24_MIN || shifted >= INT24_MAX) {
-      shift_exceeds_24bits = true;
-    }
-    // Load the shifted value into the PR register:
-    // Q31.0 -> Q23.0 / 2^8
-    ae_p24x2s shifted_24x2 = AE_CONVERT_INT32_24x2(shifted);
-
-    // Q23.0 * (Q23.0 / 2^8) = Q47.0 / 2^8
-    ae_q56s sum_56 = AE_MULP24S_HH(x_24x2, shifted_24x2);
-
-    // Shift left into 24bit space:
-    // ((Q47.0 / 2^8) << 24) = Q23.24 / 2^8
-    sum_56 = AE_Q56S_SLAI(sum_56, 24);
-
-    // Truncate and place on the PR register:
-    // (Q23.24 / 2^8) -> Q23.0 / 2^8
-    x_shifted_24x2 = AE_ROUNDSP24Q48SYM(sum_56);
-  }
-
   // Load the quantized multiplier into the PR register.
   // NOTE: This method assumes that this param has been calculated for 24bit
   // space - not 32bits.
-  // Q0.31 -> Q0.23
-  ae_p24x2s quantized_multiplier_24x2 =
-      AE_CONVERT_INT32_24x2(quantized_multiplier);
+  // Q32.0 / 2^23 -> Q24.0 / 2^23 representing a Q1.23 multiplier.
+  ae_p24x2s quantized_multiplier_24x2 = AE_MOVPA24(quantized_multiplier);
+  // Shift right by 23 - 16 bits minus the specified shift.  This is because we
+  // keep 16 fractional bits until the end to perform rounding.  Subtract shift
+  // since shift is a left shift, and the 23-16 is a right shift.
+  int shift_amount = 7 - shift;
 
-  // Find the product of x and the quantized_multiplier and right shift
-  // to 48bit aligned.
-  // NOTE: Adjust for the additional 8 bits of lost precision throughout this
-  // function:
-  // (Q23.0 / 2^8) * Q23.0 = Q47.0 / 2^8
-  // (Q47.0 / 2^8) >> 7 = Q47.0
-  ae_q56s result = AE_MULP24S_HH(x_shifted_24x2, quantized_multiplier_24x2);
-  result = AE_Q56S_SRA(result, shift_exceeds_24bits ? 15 : 23);
+  // Find the product of x and the quantized_multiplier.
+  // Q24.0 / 2^23 * Q24.0 = Q48.0 / 2^23
+  // Q48.0 / 2^23 >> 7 = Q48.0 / 2^16
+  ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2);
 
-  if (shift < 0) {
-    // Handle any negative shift directly on the 48 bit value.
-    result = AE_Q56S_SRA(result, -shift);
+  // Shift right if shift amount is positive, left if shift amount is negative.
+  if (shift_amount >= 0) {
+    result_56 = AE_Q56S_SRA(result_56, shift_amount);
+  } else {
+    result_56 = AE_Q56S_SLA(result_56, -shift_amount);
   }
-  return result;
+
+  // Round off the bottom 16 bits.
+  // Q48.0 / 2^16 -> Q32.0 aligned to 48 bits.
+  result_56 = AE_ROUNDSQ32SYM(result_56);
+  return result_56;
+}
+
+//
+// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit
+// aligned value in the QR register.
+//
+inline ae_q56s MultiplyByQuantizedMultiplier(int32_t x,
+                                             int32_t quantized_multiplier,
+                                             int shift) {
+  // Convert x into a 2x24bit PR register file. If x is outside the numerical
+  // limits of a 24bit integer, the "fractional" or lower 8bits are discarded.
+  // If x is within the range of a 24 bit integer, the "signed" or upper 8bits
+  // are discarded.
+  ae_p24x2s x_24x2;
+  if (x > INT24_MIN && x < INT24_MAX) {
+    x_24x2 = AE_MOVPA24(x);
+  } else {
+    x_24x2 = static_cast<ae_p24s>(*reinterpret_cast<ae_p24f*>(&x));
+    shift += 8;
+  }
+
+  return MultiplyByQuantizedMultiplier(x_24x2, quantized_multiplier, shift);
 }
 
 //
@@ -193,6 +121,8 @@ inline void QuantizeMultiplier(float multiplier, int32_t* quantized_multiplier,
   }
   TFLITE_CHECK_LE(q_fixed, INT24_MAX);
 
+  // Ensure shift does not exceed 24-bit range.
+  TFLITE_CHECK_LE(*shift, 23);
   if (*shift < -23) {
     *shift = 0;
     q_fixed = 0;
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
index 7a535120216..c2c2c86fe81 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h"
 
 namespace tflite {
 namespace ops {
@@ -108,9 +107,6 @@ inline void FullyConnected(
       sum_56 = MultiplyByQuantizedMultiplier(sum_24x2, output_multiplier,
                                              output_shift);
 
-      // Align from 48bit to 32bit on the QR register:
-      sum_56 = AE_Q56S_SLAI(sum_56, 16);
-
       // Add output_offset and cap min/max values:
       sum_56 = AE_ADDQ56(sum_56, output_offset_56);
       sum_56 = AE_MINQ56S(sum_56, output_activation_max_56);
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
index 0708ee7f973..2177bf1c363 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h"
 
 namespace tflite {
 namespace ops {
@@ -43,7 +42,7 @@ void AffineQuantize(int scale_multiplier,
 
   const ae_p16x2s* input_data_ptr = (const ae_p16x2s*)(input_data - 2);
 
-  ae_p24x2s scale_multiplier_24x2 = AE_CONVERT_INT32_24x2(scale_multiplier);
+  ae_p24x2s scale_multiplier_24x2 = AE_MOVPA24(scale_multiplier);
 
   int iters = flat_size / 2;
   for (int i = 0; i < iters; i++) {
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
index 1847a4e88e8..2b14bedc01f 100644
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
+++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc
@@ -25,8 +25,6 @@ limitations under the License.
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/kernels/activation_utils.h"
 #include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h"
-#include "tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h"
-#include "tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
 namespace ops {
@@ -99,7 +97,7 @@ void EvalIntegerSVDF(
 
     ae_q56s output_int16_max_56 = AE_CVTQ48A32S(INT16_MAX);
     ae_q56s output_int16_min_56 = AE_CVTQ48A32S(INT16_MIN);
-    ae_p24x2s input_zp_24x2 = AE_CONVERT_INT32_24x2(input_zp);
+    ae_p24x2s input_zp_24x2 = AE_MOVPA24(input_zp);
 
     for (int b = 0; b < n_batch; b++) {
       const int8_t* weight_feature_ptr = weight_feature - 2;
@@ -140,8 +138,6 @@ void EvalIntegerSVDF(
             tflite::ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
                 dot_prod_24x2, scale_1_a, scale_1_b);
 
-        // Align from 48bit to 32bit on the QR register
-        dot_prod_56 = AE_Q56S_SLAI(dot_prod_56, 16);
         // Cap min/max and convert to int32:
         dot_prod_56 = AE_MAXQ56S(dot_prod_56, output_int16_min_56);
         dot_prod_56 = AE_MINQ56S(dot_prod_56, output_int16_max_56);
@@ -232,8 +228,6 @@ void EvalIntegerSVDF(
       ae_q56s x_56 =
           tflite::ops::micro::xtensa::hifimini::MultiplyByQuantizedMultiplier(
               scratch_output_tensor[i], scale_2_a, scale_2_b);
-      // Align from 48bit to 32bit on the QR register:
-      x_56 = AE_Q56S_SLAI(x_56, 16);
       // Add output adjustment:
       x_56 = AE_ADDQ56(x_56, output_zp_56);
       // Cap min/max and convert to int32 (already aligned to 32bit):
diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h b/tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h
deleted file mode 100644
index 59caf4bbf2f..00000000000
--- a/tensorflow/lite/micro/kernels/xtensa_hifimini/utils.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_UTILS_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_UTILS_H_
-
-#include <xtensa/tie/xt_hifi2.h>
-
-#include <cstdint>
-
-// INT24 MIN/MAX
-#define INT24_MIN -8388608
-#define INT24_MAX 8388607
-
-// Converts an int32 value into a 2x24bit PR register file. If the int32 value
-// is outside the numerical limits of a 24bit integer, the "fractional" or lower
-// 8bits are discarded. If the value is within the range of a 24 bit integer,
-// the "signed" or upper 8bits are discarded.
-inline ae_p24x2s AE_CONVERT_INT32_24x2(int32_t v) {
-  if (v > INT24_MIN && v < INT24_MAX) {
-    return *reinterpret_cast<ae_p24s*>(&v);
-  } else {
-    return static_cast<ae_p24s>(*reinterpret_cast<ae_p24f*>(&v));
-  }
-}
-
-// Shifts a 48bit accumulator value into 32bit space and returns the value.
-#define AE_CONVERT_Q56_INT32(v) AE_TRUNCA32Q48(AE_Q56S_SLAI(v, 16))
-
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_UTILS_H_