diff --git a/tensorflow/lite/micro/kernels/elu.cc b/tensorflow/lite/micro/kernels/elu.cc
index ec8cc368bd1..12d287d6590 100644
--- a/tensorflow/lite/micro/kernels/elu.cc
+++ b/tensorflow/lite/micro/kernels/elu.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,59 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <stddef.h>
+
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
 
 #include <algorithm>
 #include <cmath>
-#include <cstdint>
 #include <functional>
 #include <limits>
 
-#include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/cpu_backend_context.h"
-#include "tensorflow/lite/kernels/internal/common.h"
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/cppmath.h"
-#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
-#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
-#include "tensorflow/lite/kernels/internal/reference/logistic.h"
-#include "tensorflow/lite/kernels/internal/reference/prelu.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
-#include "tensorflow/lite/kernels/internal/reference/softmax.h"
-#include "tensorflow/lite/kernels/internal/reference/tanh.h"
-#include "tensorflow/lite/kernels/internal/tensor.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-
-#if __aarch64__ && __clang__
-#include <arm_neon.h>
-#endif
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
 namespace ops {
-namespace builtin {
+namespace micro {
 namespace activations {
+namespace {
 
 // OLD-TODO(b/142762739): We should figure out a multi-threading plan for most
 // of the activation ops below.
 
-enum KernelType {
-  kReference,
-  kGenericOptimized,
-  kFixedPointOptimized,
-};
-
 struct OpData {
-  int32_t input_multiplier = 0;
-  int input_left_shift = 0;
-  int32_t input_range_radius = 0;
-  int diff_min = 0;
   uint8_t table[256] = {0};
 };
 
@@ -97,42 +69,19 @@ void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
   uint8_t* output_data = GetTensorData<uint8_t>(output);
   const uint8_t* input_data = GetTensorData<uint8_t>(input);
   int i = 0;
-#if __aarch64__ && __clang__
-  // This code uses ARM64-only instructions.
-  // OLD-TODO(b/143709993): Port to ARMv7
 
-  // Load the tables into registers. (4*4 128-bit registers)
-  uint8x16x4_t table[4];
-  table[0] = vld1q_u8_x4(data->table + 16 * 4 * 0);
-  table[1] = vld1q_u8_x4(data->table + 16 * 4 * 1);
-  table[2] = vld1q_u8_x4(data->table + 16 * 4 * 2);
-  table[3] = vld1q_u8_x4(data->table + 16 * 4 * 3);
-
-  // Vectorized loop; process uint8x16_t (16 elements) at a time.
-  constexpr int vectorized_16_loop_step = 16;
-  const int vectorized_16_loop_end =
-      size / vectorized_16_loop_step * vectorized_16_loop_step;
-  for (; i < vectorized_16_loop_end; i += vectorized_16_loop_step) {
-    uint8x16_t input = vld1q_u8(input_data + i);
-    uint8x16_t output = optimized_ops::aarch64_lookup_vector(table, input);
-    vst1q_u8(output_data + i, output);
-  }
-  // Postamble and non-ARM64 code: simple for loop.
-#endif
   for (; i < size; ++i) {
     output_data[i] = data->table[input_data[i]];
   }
 }
 
+}  // namespace
+
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   // This is a builtin op, so we don't use the contents in 'buffer', if any.
   // Instead, we allocate a new object to carry information from Prepare() to
   // Eval().
-  return new OpData;
-}
-
-void Free(TfLiteContext* context, void* buffer) {
-  delete reinterpret_cast<OpData*>(buffer);
+  return nullptr;
 }
 
 TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
@@ -144,8 +93,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
-  return context->ResizeTensor(context, output,
-                               TfLiteIntArrayCopy(input->dims));
+  return kTfLiteError;
 }
 
 TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
@@ -174,12 +122,12 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
       optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input),
                          GetTensorShape(output), GetTensorData<float>(output));
       return kTfLiteOk;
-    } break;
+    }
     case kTfLiteInt8: {
       OpData* data = reinterpret_cast<OpData*>(node->user_data);
       EvalUsingLookupTable(data, input, output);
       return kTfLiteOk;
-    } break;
+    }
     default:
       TF_LITE_KERNEL_LOG(
           context, "Only float32 and int8 is supported currently, got %s.",
@@ -190,12 +138,8 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
 
 }  // namespace activations
 
-TfLiteRegistration* Register_ELU() {
-  static TfLiteRegistration r = {activations::Init, activations::Free,
-                                 activations::EluPrepare, activations::EluEval};
-  return &r;
-}
+TfLiteRegistration* Register_ELU() { return nullptr; }
 
-}  // namespace builtin
+}  // namespace micro
 }  // namespace ops
 }  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/elu_test.cc b/tensorflow/lite/micro/kernels/elu_test.cc
index 5dedc7a4264..5eb893b72f4 100644
--- a/tensorflow/lite/micro/kernels/elu_test.cc
+++ b/tensorflow/lite/micro/kernels/elu_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,150 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <math.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-#include <algorithm>
-#include <initializer_list>
 #include <limits>
-#include <map>
-#include <memory>
-#include <random>
-#include <string>
-#include <utility>
-#include <vector>
+#include <type_traits>
 
-#include "absl/memory/memory.h"
-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "tensorflow/lite/core/api/op_resolver.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/test_util.h"
-#include "tensorflow/lite/schema/schema_generated.h"
-#include "tensorflow/lite/string_type.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/kernel_runner.h"
+#include "tensorflow/lite/micro/test_helpers.h"
+#include "tensorflow/lite/micro/testing/micro_test.h"
 
 namespace tflite {
-
+namespace testing {
 namespace {
 
-using ::testing::ElementsAreArray;
-
-class BaseActivationsOpModel : public SingleOpModel {
- public:
-  // Most activations don't take any options, so this constructor works for
-  // them.
-  BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
-    input_ = AddInput(input);
-    if (input.type == TensorType_UINT8) {
-      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
-    } else if (input.type == TensorType_INT8) {
-      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
-    } else {
-      output_ = AddOutput({input.type, {}});
-    }
-    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
-    BuildInterpreter({GetShape(input_)});
+#ifdef notdef
+BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
+  input_ = AddInput(input);
+  if (input.type == TensorType_UINT8) {
+    output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
+  } else if (input.type == TensorType_INT8) {
+    output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
+  } else {
+    output_ = AddOutput({input.type, {}});
   }
-
-  BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
-                         TensorData input) {
-    input_ = AddInput(input);
-    if (input.type == TensorType_UINT8) {
-      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
-    } else if (input.type == TensorType_INT8) {
-      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
-    } else {
-      output_ = AddOutput({input.type, {}});
-    }
-    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
-    resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
-    BuildInterpreter({GetShape(input_)});
-  }
-
-  // A dedicated constructor for SOFTMAX, which does some options.
-  BaseActivationsOpModel(float softmax_beta, TensorData input,
-                         TensorType output_type) {
-    input_ = AddInput(input);
-    if (output_type == TensorType_UINT8) {
-      output_ = AddOutput({TensorType_UINT8, {}, 0, 0, 1. / 256});
-    } else if (output_type == TensorType_INT8) {
-      output_ = AddOutput({TensorType_INT8, {}, 0, 0, 1. / 256, -128});
-    } else if (input.type == TensorType_INT16 &&
-               output_type == TensorType_INT16) {
-      output_ = AddOutput({TensorType_INT16,
-                           {},
-                           0,
-                           0,
-                           1.0f / (std::numeric_limits<int16_t>::max() + 1),
-                           0});
-    } else if (input.type != TensorType_INT16 &&
-               output_type == TensorType_INT16) {
-      output_ = AddOutput({TensorType_INT16, {}, 0, 0, 1. / 32768, -16384});
-    } else {
-      output_ = AddOutput({output_type, {}});
-    }
-    SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions,
-                 CreateSoftmaxOptions(builder_, softmax_beta).Union());
-    BuildInterpreter({GetShape(input_)});
-  }
-
-  // A dedicated constructor for LeakyRelu, which does some options.
-  BaseActivationsOpModel(TensorData input, float alpha) {
-    input_ = AddInput(input);
-    // The output scale and input scale might be different.
-    if (input.type == TensorType_UINT8 || input.type == TensorType_INT8 ||
-        input.type == TensorType_INT16) {
-      auto output_min = (input.min >= 0) ? input.min : input.min * alpha;
-      auto output_max = (input.max >= 0) ? input.max : input.max * alpha;
-      if (input.type == TensorType_INT16) {
-        output_ = AddOutput({TensorType_INT16,
-                             {},
-                             0,
-                             0,
-                             output_max / (std::numeric_limits<int16_t>::max()),
-                             0});
-      } else {
-        output_ = AddOutput({input.type, {}, output_min, output_max});
-      }
-    } else {
-      output_ = AddOutput({input.type, {}});
-    }
-    SetBuiltinOp(BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions,
-                 CreateLeakyReluOptions(builder_, alpha).Union());
-    BuildInterpreter({GetShape(input_)});
-  }
-
-  BaseActivationsOpModel(BuiltinOperator type, const TensorData& input,
-                         const TensorData& output) {
-    input_ = AddInput(input);
-    output_ = AddOutput(output);
-    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
-    BuildInterpreter({GetShape(input_)});
-  }
-
-  BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
-                         const TensorData& input, const TensorData& output) {
-    input_ = AddInput(input);
-    output_ = AddOutput(output);
-    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
-    resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
-    BuildInterpreter({GetShape(input_)});
-  }
-
- protected:
-  int input_;
-  int output_;
-};
-
-class FloatActivationsOpModel : public BaseActivationsOpModel {
- public:
-  using BaseActivationsOpModel::BaseActivationsOpModel;
-
-  void SetInput(const std::vector<float>& data) {
-    PopulateTensor(input_, data);
-  }
-  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
-};
+  SetBuiltinOp(type, BuiltinOptions_NONE, 0);
+  BuildInterpreter({GetShape(input_)});
+}
+#endif  // notdef
 
 // Our fixed-point math function implementations have roughly 12 bits of
 // accuracy, when specialized to 16-bit fixed-point arithmetic.
@@ -176,41 +59,25 @@ class FloatActivationsOpModel : public BaseActivationsOpModel {
 const float kQuantizedTolerance = 2 * (1. / 256);
 const float kQuantizedToleranceInt16 = 2 * (1. / 4096);
 
-class QuantizedActivationsOpModel : public BaseActivationsOpModel {
- public:
-  using BaseActivationsOpModel::BaseActivationsOpModel;
+TF_LITE_MICRO_TESTS_BEGIN
 
-  template <typename T>
-  void SetInput(const std::vector<float>& data) {
-    QuantizeAndPopulate<T>(input_, data);
-  }
-  template <typename T>
-  std::vector<T> GetOutput() {
-    return ExtractVector<T>(output_);
-  }
-
-  template <typename T>
-  std::vector<float> GetDequantizedOutput() {
-    return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
-                         GetZeroPoint(output_));
-  }
-};
-
-TEST(FloatActivationsOpTest, Elu) {
+TF_LITE_MICRO_TEST(FloatActivationsOpTestElu) {
+#ifdef notdef
   FloatActivationsOpModel m(BuiltinOperator_ELU,
                             /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}});
   m.SetInput({
       0, -6, 2, -4,     //
       3, -2, 10, -0.1,  //
   });
-  m.Invoke();
   EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({
                                  0.0, -0.997521, 2.0, -0.981684,    //
                                  3.0, -0.864665, 10.0, -0.0951626,  //
                              })));
+#endif  // notdef
 }
 
-TEST(QuantizedActivationsOpTest, EluInt8) {
+TF_LITE_MICRO_TEST(QuantizedActivationsOpTestEluInt8) {
+#ifdef notdef
   const float kMin = -1;
   const float kMax = 127.f / 128.f;
   QuantizedActivationsOpModel model(
@@ -231,7 +98,11 @@ TEST(QuantizedActivationsOpTest, EluInt8) {
                       3.0, -0.875, 6.0, -0.125,  //
                   },
                   kQuantizedTolerance)));
+#endif  // notdef
 }
 
+TF_LITE_MICRO_TESTS_END
+
 }  // namespace
+}  // namespace testing
 }  // namespace tflite