micro: prepare to port operator ELU kernel from lite with test

Implement skeleton (non-working) code for operator and test.
Header files changed.
Namespaces changed.
Some original code deleted.
Some original code modified.

PR step 4 of the work to port operator ELU as tracked in Issue #46323
This commit is contained in:
ddavis-2015 2021-01-12 18:36:30 -08:00 committed by ddavis-2015
parent 78d28301bc
commit 3f05c1842e
2 changed files with 46 additions and 231 deletions

View File

@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -12,59 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <stddef.h>
#include "tensorflow/lite/kernels/internal/reference/elu.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstdint>
#include <functional> #include <functional>
#include <limits> #include <limits>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h" #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#if __aarch64__ && __clang__
#include <arm_neon.h>
#endif
namespace tflite { namespace tflite {
namespace ops { namespace ops {
namespace builtin { namespace micro {
namespace activations { namespace activations {
namespace {
// OLD-TODO(b/142762739): We should figure out a multi-threading plan for most // OLD-TODO(b/142762739): We should figure out a multi-threading plan for most
// of the activation ops below. // of the activation ops below.
enum KernelType {
kReference,
kGenericOptimized,
kFixedPointOptimized,
};
struct OpData { struct OpData {
int32_t input_multiplier = 0;
int input_left_shift = 0;
int32_t input_range_radius = 0;
int diff_min = 0;
uint8_t table[256] = {0}; uint8_t table[256] = {0};
}; };
@ -97,42 +69,19 @@ void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
uint8_t* output_data = GetTensorData<uint8_t>(output); uint8_t* output_data = GetTensorData<uint8_t>(output);
const uint8_t* input_data = GetTensorData<uint8_t>(input); const uint8_t* input_data = GetTensorData<uint8_t>(input);
int i = 0; int i = 0;
#if __aarch64__ && __clang__
// This code uses ARM64-only instructions.
// OLD-TODO(b/143709993): Port to ARMv7
// Load the tables into registers. (4*4 128-bit registers)
uint8x16x4_t table[4];
table[0] = vld1q_u8_x4(data->table + 16 * 4 * 0);
table[1] = vld1q_u8_x4(data->table + 16 * 4 * 1);
table[2] = vld1q_u8_x4(data->table + 16 * 4 * 2);
table[3] = vld1q_u8_x4(data->table + 16 * 4 * 3);
// Vectorized loop; process uint8x16_t (16 elements) at a time.
constexpr int vectorized_16_loop_step = 16;
const int vectorized_16_loop_end =
size / vectorized_16_loop_step * vectorized_16_loop_step;
for (; i < vectorized_16_loop_end; i += vectorized_16_loop_step) {
uint8x16_t input = vld1q_u8(input_data + i);
uint8x16_t output = optimized_ops::aarch64_lookup_vector(table, input);
vst1q_u8(output_data + i, output);
}
// Postamble and non-ARM64 code: simple for loop.
#endif
for (; i < size; ++i) { for (; i < size; ++i) {
output_data[i] = data->table[input_data[i]]; output_data[i] = data->table[input_data[i]];
} }
} }
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) { void* Init(TfLiteContext* context, const char* buffer, size_t length) {
// This is a builtin op, so we don't use the contents in 'buffer', if any. // This is a builtin op, so we don't use the contents in 'buffer', if any.
// Instead, we allocate a new object to carry information from Prepare() to // Instead, we allocate a new object to carry information from Prepare() to
// Eval(). // Eval().
return new OpData; return nullptr;
}
void Free(TfLiteContext* context, void* buffer) {
delete reinterpret_cast<OpData*>(buffer);
} }
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
@ -144,8 +93,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output)); TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
return context->ResizeTensor(context, output, return kTfLiteError;
TfLiteIntArrayCopy(input->dims));
} }
TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
@ -174,12 +122,12 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input), optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output)); GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk; return kTfLiteOk;
} break; }
case kTfLiteInt8: { case kTfLiteInt8: {
OpData* data = reinterpret_cast<OpData*>(node->user_data); OpData* data = reinterpret_cast<OpData*>(node->user_data);
EvalUsingLookupTable(data, input, output); EvalUsingLookupTable(data, input, output);
return kTfLiteOk; return kTfLiteOk;
} break; }
default: default:
TF_LITE_KERNEL_LOG( TF_LITE_KERNEL_LOG(
context, "Only float32 and int8 is supported currently, got %s.", context, "Only float32 and int8 is supported currently, got %s.",
@ -190,12 +138,8 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations } // namespace activations
TfLiteRegistration* Register_ELU() { TfLiteRegistration* Register_ELU() { return nullptr; }
static TfLiteRegistration r = {activations::Init, activations::Free,
activations::EluPrepare, activations::EluEval};
return &r;
}
} // namespace builtin } // namespace micro
} // namespace ops } // namespace ops
} // namespace tflite } // namespace tflite

View File

@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
@ -12,39 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <algorithm>
#include <initializer_list>
#include <limits> #include <limits>
#include <map> #include <type_traits>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include "absl/memory/memory.h" #include "tensorflow/lite/c/builtin_op_data.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/op_resolver.h" #include "tensorflow/lite/micro/kernels/kernel_runner.h"
#include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/micro/test_helpers.h"
#include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/micro/testing/micro_test.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/string_type.h"
namespace tflite { namespace tflite {
namespace testing {
namespace { namespace {
using ::testing::ElementsAreArray; #ifdef notdef
BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
class BaseActivationsOpModel : public SingleOpModel {
public:
// Most activations don't take any options, so this constructor works for
// them.
BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
input_ = AddInput(input); input_ = AddInput(input);
if (input.type == TensorType_UINT8) { if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256}); output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
@ -55,107 +37,8 @@ class BaseActivationsOpModel : public SingleOpModel {
} }
SetBuiltinOp(type, BuiltinOptions_NONE, 0); SetBuiltinOp(type, BuiltinOptions_NONE, 0);
BuildInterpreter({GetShape(input_)}); BuildInterpreter({GetShape(input_)});
} }
#endif // notdef
BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
TensorData input) {
input_ = AddInput(input);
if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
} else if (input.type == TensorType_INT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
} else {
output_ = AddOutput({input.type, {}});
}
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
BuildInterpreter({GetShape(input_)});
}
// A dedicated constructor for SOFTMAX, which does some options.
BaseActivationsOpModel(float softmax_beta, TensorData input,
TensorType output_type) {
input_ = AddInput(input);
if (output_type == TensorType_UINT8) {
output_ = AddOutput({TensorType_UINT8, {}, 0, 0, 1. / 256});
} else if (output_type == TensorType_INT8) {
output_ = AddOutput({TensorType_INT8, {}, 0, 0, 1. / 256, -128});
} else if (input.type == TensorType_INT16 &&
output_type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16,
{},
0,
0,
1.0f / (std::numeric_limits<int16_t>::max() + 1),
0});
} else if (input.type != TensorType_INT16 &&
output_type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16, {}, 0, 0, 1. / 32768, -16384});
} else {
output_ = AddOutput({output_type, {}});
}
SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions,
CreateSoftmaxOptions(builder_, softmax_beta).Union());
BuildInterpreter({GetShape(input_)});
}
// A dedicated constructor for LeakyRelu, which does some options.
BaseActivationsOpModel(TensorData input, float alpha) {
input_ = AddInput(input);
// The output scale and input scale might be different.
if (input.type == TensorType_UINT8 || input.type == TensorType_INT8 ||
input.type == TensorType_INT16) {
auto output_min = (input.min >= 0) ? input.min : input.min * alpha;
auto output_max = (input.max >= 0) ? input.max : input.max * alpha;
if (input.type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16,
{},
0,
0,
output_max / (std::numeric_limits<int16_t>::max()),
0});
} else {
output_ = AddOutput({input.type, {}, output_min, output_max});
}
} else {
output_ = AddOutput({input.type, {}});
}
SetBuiltinOp(BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions,
CreateLeakyReluOptions(builder_, alpha).Union());
BuildInterpreter({GetShape(input_)});
}
BaseActivationsOpModel(BuiltinOperator type, const TensorData& input,
const TensorData& output) {
input_ = AddInput(input);
output_ = AddOutput(output);
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
BuildInterpreter({GetShape(input_)});
}
BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
const TensorData& input, const TensorData& output) {
input_ = AddInput(input);
output_ = AddOutput(output);
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
BuildInterpreter({GetShape(input_)});
}
protected:
int input_;
int output_;
};
class FloatActivationsOpModel : public BaseActivationsOpModel {
public:
using BaseActivationsOpModel::BaseActivationsOpModel;
void SetInput(const std::vector<float>& data) {
PopulateTensor(input_, data);
}
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
};
// Our fixed-point math function implementations have roughly 12 bits of // Our fixed-point math function implementations have roughly 12 bits of
// accuracy, when specialized to 16-bit fixed-point arithmetic. // accuracy, when specialized to 16-bit fixed-point arithmetic.
@ -176,41 +59,25 @@ class FloatActivationsOpModel : public BaseActivationsOpModel {
const float kQuantizedTolerance = 2 * (1. / 256); const float kQuantizedTolerance = 2 * (1. / 256);
const float kQuantizedToleranceInt16 = 2 * (1. / 4096); const float kQuantizedToleranceInt16 = 2 * (1. / 4096);
class QuantizedActivationsOpModel : public BaseActivationsOpModel { TF_LITE_MICRO_TESTS_BEGIN
public:
using BaseActivationsOpModel::BaseActivationsOpModel;
template <typename T> TF_LITE_MICRO_TEST(FloatActivationsOpTestElu) {
void SetInput(const std::vector<float>& data) { #ifdef notdef
QuantizeAndPopulate<T>(input_, data);
}
template <typename T>
std::vector<T> GetOutput() {
return ExtractVector<T>(output_);
}
template <typename T>
std::vector<float> GetDequantizedOutput() {
return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
GetZeroPoint(output_));
}
};
TEST(FloatActivationsOpTest, Elu) {
FloatActivationsOpModel m(BuiltinOperator_ELU, FloatActivationsOpModel m(BuiltinOperator_ELU,
/*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}});
m.SetInput({ m.SetInput({
0, -6, 2, -4, // 0, -6, 2, -4, //
3, -2, 10, -0.1, // 3, -2, 10, -0.1, //
}); });
m.Invoke();
EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({ EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({
0.0, -0.997521, 2.0, -0.981684, // 0.0, -0.997521, 2.0, -0.981684, //
3.0, -0.864665, 10.0, -0.0951626, // 3.0, -0.864665, 10.0, -0.0951626, //
}))); })));
#endif // notdef
} }
TEST(QuantizedActivationsOpTest, EluInt8) { TF_LITE_MICRO_TEST(QuantizedActivationsOpTestEluInt8) {
#ifdef notdef
const float kMin = -1; const float kMin = -1;
const float kMax = 127.f / 128.f; const float kMax = 127.f / 128.f;
QuantizedActivationsOpModel model( QuantizedActivationsOpModel model(
@ -231,7 +98,11 @@ TEST(QuantizedActivationsOpTest, EluInt8) {
3.0, -0.875, 6.0, -0.125, // 3.0, -0.875, 6.0, -0.125, //
}, },
kQuantizedTolerance))); kQuantizedTolerance)));
#endif // notdef
} }
TF_LITE_MICRO_TESTS_END
} // namespace } // namespace
} // namespace testing
} // namespace tflite } // namespace tflite