micro: prepare to port operator ELU kernel from lite with test

Implement skeleton (non-working) code for operator and test. Header files changed. Namespaces changed. Some original code deleted. Some original code modified. PR step 4 of the work to port operator ELU as tracked in Issue #46323
2021-01-12 18:36:30 -08:00 · 2021-01-12 18:36:30 -08:00 · 3f05c1842e
commit 3f05c1842e
parent 78d28301bc
2 changed files with 46 additions and 231 deletions
--- a/tensorflow/lite/micro/kernels/elu.cc
+++ b/tensorflow/lite/micro/kernels/elu.cc
@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -12,59 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <stddef.h>
+
 #include "tensorflow/lite/kernels/internal/reference/elu.h"
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <functional>
 #include <limits>
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/internal/cppmath.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
 #include "tensorflow/lite/kernels/internal/reference/logistic.h"
 #include "tensorflow/lite/kernels/internal/reference/prelu.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
 #include "tensorflow/lite/kernels/internal/reference/softmax.h"
 #include "tensorflow/lite/kernels/internal/reference/tanh.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 #if __aarch64__ && __clang__
 #include <arm_neon.h>
 #endif
 namespace tflite {
 namespace ops {
-namespace builtin {
+namespace micro {
 namespace activations {
 namespace {
 // OLD-TODO(b/142762739): We should figure out a multi-threading plan for most
 // of the activation ops below.
 enum KernelType {
  kReference,
  kGenericOptimized,
  kFixedPointOptimized,
 };
 struct OpData {
  int32_t input_multiplier = 0;
  int input_left_shift = 0;
  int32_t input_range_radius = 0;
  int diff_min = 0;
  uint8_t table[256] = {0};
 };
@ -97,42 +69,19 @@ void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
  uint8_t* output_data = GetTensorData<uint8_t>(output);
  const uint8_t* input_data = GetTensorData<uint8_t>(input);
  int i = 0;
 #if __aarch64__ && __clang__
  // This code uses ARM64-only instructions.
  // OLD-TODO(b/143709993): Port to ARMv7
  // Load the tables into registers. (4*4 128-bit registers)
  uint8x16x4_t table[4];
  table[0] = vld1q_u8_x4(data->table + 16 * 4 * 0);
  table[1] = vld1q_u8_x4(data->table + 16 * 4 * 1);
  table[2] = vld1q_u8_x4(data->table + 16 * 4 * 2);
  table[3] = vld1q_u8_x4(data->table + 16 * 4 * 3);
  // Vectorized loop; process uint8x16_t (16 elements) at a time.
  constexpr int vectorized_16_loop_step = 16;
  const int vectorized_16_loop_end =
      size / vectorized_16_loop_step * vectorized_16_loop_step;
  for (; i < vectorized_16_loop_end; i += vectorized_16_loop_step) {
    uint8x16_t input = vld1q_u8(input_data + i);
    uint8x16_t output = optimized_ops::aarch64_lookup_vector(table, input);
    vst1q_u8(output_data + i, output);
  }
  // Postamble and non-ARM64 code: simple for loop.
 #endif
  for (; i < size; ++i) {
    output_data[i] = data->table[input_data[i]];
  }
 }
 }  // namespace
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
  // This is a builtin op, so we don't use the contents in 'buffer', if any.
  // Instead, we allocate a new object to carry information from Prepare() to
  // Eval().
-  return new OpData;
+  return nullptr;
 }
 void Free(TfLiteContext* context, void* buffer) {
  delete reinterpret_cast<OpData*>(buffer);
 }
 TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
@ -144,8 +93,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
  TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  return context->ResizeTensor(context, output,
+  return kTfLiteError;
                               TfLiteIntArrayCopy(input->dims));
 }
 TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
@ -174,12 +122,12 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
      optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input),
                         GetTensorShape(output), GetTensorData<float>(output));
      return kTfLiteOk;
-    } break;
+    }
    case kTfLiteInt8: {
      OpData* data = reinterpret_cast<OpData*>(node->user_data);
      EvalUsingLookupTable(data, input, output);
      return kTfLiteOk;
-    } break;
+    }
    default:
      TF_LITE_KERNEL_LOG(
          context, "Only float32 and int8 is supported currently, got %s.",
@ -190,12 +138,8 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace activations
-TfLiteRegistration* Register_ELU() {
+TfLiteRegistration* Register_ELU() { return nullptr; }
  static TfLiteRegistration r = {activations::Init, activations::Free,
                                 activations::EluPrepare, activations::EluEval};
  return &r;
 }
-}  // namespace builtin
+}  // namespace micro
 }  // namespace ops
 }  // namespace tflite
--- a/tensorflow/lite/micro/kernels/elu_test.cc
+++ b/tensorflow/lite/micro/kernels/elu_test.cc
@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@ -12,39 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <math.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <algorithm>
 #include <initializer_list>
 #include <limits>
-#include <map>
+#include <type_traits>
 #include <memory>
 #include <random>
 #include <string>
 #include <utility>
 #include <vector>
-#include "absl/memory/memory.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
-#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/core/api/op_resolver.h"
+#include "tensorflow/lite/micro/kernels/kernel_runner.h"
-#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/micro/test_helpers.h"
-#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/micro/testing/micro_test.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/string_type.h"
 namespace tflite {
-
+namespace testing {
 namespace {
-using ::testing::ElementsAreArray;
+#ifdef notdef
-
+BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
 class BaseActivationsOpModel : public SingleOpModel {
 public:
  // Most activations don't take any options, so this constructor works for
  // them.
  BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
  input_ = AddInput(input);
  if (input.type == TensorType_UINT8) {
    output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
@ -55,107 +37,8 @@ class BaseActivationsOpModel : public SingleOpModel {
  }
  SetBuiltinOp(type, BuiltinOptions_NONE, 0);
  BuildInterpreter({GetShape(input_)});
-  }
+}
-
+#endif  // notdef
  BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
                         TensorData input) {
    input_ = AddInput(input);
    if (input.type == TensorType_UINT8) {
      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
    } else if (input.type == TensorType_INT8) {
      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
    } else {
      output_ = AddOutput({input.type, {}});
    }
    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
    resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
    BuildInterpreter({GetShape(input_)});
  }
  // A dedicated constructor for SOFTMAX, which does some options.
  BaseActivationsOpModel(float softmax_beta, TensorData input,
                         TensorType output_type) {
    input_ = AddInput(input);
    if (output_type == TensorType_UINT8) {
      output_ = AddOutput({TensorType_UINT8, {}, 0, 0, 1. / 256});
    } else if (output_type == TensorType_INT8) {
      output_ = AddOutput({TensorType_INT8, {}, 0, 0, 1. / 256, -128});
    } else if (input.type == TensorType_INT16 &&
               output_type == TensorType_INT16) {
      output_ = AddOutput({TensorType_INT16,
                           {},
                           0,
                           0,
                           1.0f / (std::numeric_limits<int16_t>::max() + 1),
                           0});
    } else if (input.type != TensorType_INT16 &&
               output_type == TensorType_INT16) {
      output_ = AddOutput({TensorType_INT16, {}, 0, 0, 1. / 32768, -16384});
    } else {
      output_ = AddOutput({output_type, {}});
    }
    SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions,
                 CreateSoftmaxOptions(builder_, softmax_beta).Union());
    BuildInterpreter({GetShape(input_)});
  }
  // A dedicated constructor for LeakyRelu, which does some options.
  BaseActivationsOpModel(TensorData input, float alpha) {
    input_ = AddInput(input);
    // The output scale and input scale might be different.
    if (input.type == TensorType_UINT8 || input.type == TensorType_INT8 ||
        input.type == TensorType_INT16) {
      auto output_min = (input.min >= 0) ? input.min : input.min * alpha;
      auto output_max = (input.max >= 0) ? input.max : input.max * alpha;
      if (input.type == TensorType_INT16) {
        output_ = AddOutput({TensorType_INT16,
                             {},
                             0,
                             0,
                             output_max / (std::numeric_limits<int16_t>::max()),
                             0});
      } else {
        output_ = AddOutput({input.type, {}, output_min, output_max});
      }
    } else {
      output_ = AddOutput({input.type, {}});
    }
    SetBuiltinOp(BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions,
                 CreateLeakyReluOptions(builder_, alpha).Union());
    BuildInterpreter({GetShape(input_)});
  }
  BaseActivationsOpModel(BuiltinOperator type, const TensorData& input,
                         const TensorData& output) {
    input_ = AddInput(input);
    output_ = AddOutput(output);
    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
    BuildInterpreter({GetShape(input_)});
  }
  BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
                         const TensorData& input, const TensorData& output) {
    input_ = AddInput(input);
    output_ = AddOutput(output);
    SetBuiltinOp(type, BuiltinOptions_NONE, 0);
    resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
    BuildInterpreter({GetShape(input_)});
  }
 protected:
  int input_;
  int output_;
 };
 class FloatActivationsOpModel : public BaseActivationsOpModel {
 public:
  using BaseActivationsOpModel::BaseActivationsOpModel;
  void SetInput(const std::vector<float>& data) {
    PopulateTensor(input_, data);
  }
  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 };
 // Our fixed-point math function implementations have roughly 12 bits of
 // accuracy, when specialized to 16-bit fixed-point arithmetic.
@ -176,41 +59,25 @@ class FloatActivationsOpModel : public BaseActivationsOpModel {
 const float kQuantizedTolerance = 2 * (1. / 256);
 const float kQuantizedToleranceInt16 = 2 * (1. / 4096);
-class QuantizedActivationsOpModel : public BaseActivationsOpModel {
+TF_LITE_MICRO_TESTS_BEGIN
 public:
  using BaseActivationsOpModel::BaseActivationsOpModel;
-  template <typename T>
+TF_LITE_MICRO_TEST(FloatActivationsOpTestElu) {
-  void SetInput(const std::vector<float>& data) {
+#ifdef notdef
    QuantizeAndPopulate<T>(input_, data);
  }
  template <typename T>
  std::vector<T> GetOutput() {
    return ExtractVector<T>(output_);
  }
  template <typename T>
  std::vector<float> GetDequantizedOutput() {
    return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
                         GetZeroPoint(output_));
  }
 };
 TEST(FloatActivationsOpTest, Elu) {
  FloatActivationsOpModel m(BuiltinOperator_ELU,
                            /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}});
  m.SetInput({
      0, -6, 2, -4,     //
      3, -2, 10, -0.1,  //
  });
  m.Invoke();
  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({
                                 0.0, -0.997521, 2.0, -0.981684,    //
                                 3.0, -0.864665, 10.0, -0.0951626,  //
                             })));
 #endif  // notdef
 }
-TEST(QuantizedActivationsOpTest, EluInt8) {
+TF_LITE_MICRO_TEST(QuantizedActivationsOpTestEluInt8) {
 #ifdef notdef
  const float kMin = -1;
  const float kMax = 127.f / 128.f;
  QuantizedActivationsOpModel model(
@ -231,7 +98,11 @@ TEST(QuantizedActivationsOpTest, EluInt8) {
                      3.0, -0.875, 6.0, -0.125,  //
                  },
                  kQuantizedTolerance)));
 #endif  // notdef
 }
 TF_LITE_MICRO_TESTS_END
 }  // namespace
 }  // namespace testing
 }  // namespace tflite