micro: prepare to port operator ELU kernel from lite with test

Implement skeleton (non-working) code for operator and test.
Header files changed.
Namespaces changed.
Some original code deleted.
Some original code modified.

PR step 4 of the work to port operator ELU as tracked in Issue #46323
This commit is contained in:
ddavis-2015 2021-01-12 18:36:30 -08:00 committed by ddavis-2015
parent 78d28301bc
commit 3f05c1842e
2 changed files with 46 additions and 231 deletions

View File

@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -12,59 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <stddef.h>
#include "tensorflow/lite/kernels/internal/reference/elu.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <functional>
#include <limits>
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/internal/cppmath.h"
#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/log_softmax.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
#include "tensorflow/lite/kernels/internal/reference/logistic.h"
#include "tensorflow/lite/kernels/internal/reference/prelu.h"
#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
#include "tensorflow/lite/kernels/internal/reference/softmax.h"
#include "tensorflow/lite/kernels/internal/reference/tanh.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#if __aarch64__ && __clang__
#include <arm_neon.h>
#endif
#include "tensorflow/lite/micro/kernels/kernel_util.h"
namespace tflite {
namespace ops {
namespace builtin {
namespace micro {
namespace activations {
namespace {
// OLD-TODO(b/142762739): We should figure out a multi-threading plan for most
// of the activation ops below.
enum KernelType {
kReference,
kGenericOptimized,
kFixedPointOptimized,
};
struct OpData {
int32_t input_multiplier = 0;
int input_left_shift = 0;
int32_t input_range_radius = 0;
int diff_min = 0;
uint8_t table[256] = {0};
};
@ -97,42 +69,19 @@ void EvalUsingLookupTable(struct OpData* data, const TfLiteTensor* input,
uint8_t* output_data = GetTensorData<uint8_t>(output);
const uint8_t* input_data = GetTensorData<uint8_t>(input);
int i = 0;
#if __aarch64__ && __clang__
// This code uses ARM64-only instructions.
// OLD-TODO(b/143709993): Port to ARMv7
// Load the tables into registers. (4*4 128-bit registers)
uint8x16x4_t table[4];
table[0] = vld1q_u8_x4(data->table + 16 * 4 * 0);
table[1] = vld1q_u8_x4(data->table + 16 * 4 * 1);
table[2] = vld1q_u8_x4(data->table + 16 * 4 * 2);
table[3] = vld1q_u8_x4(data->table + 16 * 4 * 3);
// Vectorized loop; process uint8x16_t (16 elements) at a time.
constexpr int vectorized_16_loop_step = 16;
const int vectorized_16_loop_end =
size / vectorized_16_loop_step * vectorized_16_loop_step;
for (; i < vectorized_16_loop_end; i += vectorized_16_loop_step) {
uint8x16_t input = vld1q_u8(input_data + i);
uint8x16_t output = optimized_ops::aarch64_lookup_vector(table, input);
vst1q_u8(output_data + i, output);
}
// Postamble and non-ARM64 code: simple for loop.
#endif
for (; i < size; ++i) {
output_data[i] = data->table[input_data[i]];
}
}
} // namespace
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
// This is a builtin op, so we don't use the contents in 'buffer', if any.
// Instead, we allocate a new object to carry information from Prepare() to
// Eval().
return new OpData;
}
void Free(TfLiteContext* context, void* buffer) {
delete reinterpret_cast<OpData*>(buffer);
return nullptr;
}
TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
@ -144,8 +93,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
return context->ResizeTensor(context, output,
TfLiteIntArrayCopy(input->dims));
return kTfLiteError;
}
TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) {
@ -174,12 +122,12 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
optimized_ops::Elu(GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
} break;
}
case kTfLiteInt8: {
OpData* data = reinterpret_cast<OpData*>(node->user_data);
EvalUsingLookupTable(data, input, output);
return kTfLiteOk;
} break;
}
default:
TF_LITE_KERNEL_LOG(
context, "Only float32 and int8 is supported currently, got %s.",
@ -190,12 +138,8 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
} // namespace activations
TfLiteRegistration* Register_ELU() {
static TfLiteRegistration r = {activations::Init, activations::Free,
activations::EluPrepare, activations::EluEval};
return &r;
}
TfLiteRegistration* Register_ELU() { return nullptr; }
} // namespace builtin
} // namespace micro
} // namespace ops
} // namespace tflite

View File

@ -1,4 +1,4 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -12,150 +12,33 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <algorithm>
#include <initializer_list>
#include <limits>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <utility>
#include <vector>
#include <type_traits>
#include "absl/memory/memory.h"
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/core/api/op_resolver.h"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/test_util.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/string_type.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/kernels/kernel_runner.h"
#include "tensorflow/lite/micro/test_helpers.h"
#include "tensorflow/lite/micro/testing/micro_test.h"
namespace tflite {
namespace testing {
namespace {
using ::testing::ElementsAreArray;
class BaseActivationsOpModel : public SingleOpModel {
public:
// Most activations don't take any options, so this constructor works for
// them.
BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
input_ = AddInput(input);
if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
} else if (input.type == TensorType_INT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
} else {
output_ = AddOutput({input.type, {}});
}
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
BuildInterpreter({GetShape(input_)});
#ifdef notdef
BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
input_ = AddInput(input);
if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
} else if (input.type == TensorType_INT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
} else {
output_ = AddOutput({input.type, {}});
}
BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
TensorData input) {
input_ = AddInput(input);
if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
} else if (input.type == TensorType_INT8) {
output_ = AddOutput({input.type, {}, 0, 0, 1. / 256, -128});
} else {
output_ = AddOutput({input.type, {}});
}
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
BuildInterpreter({GetShape(input_)});
}
// A dedicated constructor for SOFTMAX, which does some options.
BaseActivationsOpModel(float softmax_beta, TensorData input,
TensorType output_type) {
input_ = AddInput(input);
if (output_type == TensorType_UINT8) {
output_ = AddOutput({TensorType_UINT8, {}, 0, 0, 1. / 256});
} else if (output_type == TensorType_INT8) {
output_ = AddOutput({TensorType_INT8, {}, 0, 0, 1. / 256, -128});
} else if (input.type == TensorType_INT16 &&
output_type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16,
{},
0,
0,
1.0f / (std::numeric_limits<int16_t>::max() + 1),
0});
} else if (input.type != TensorType_INT16 &&
output_type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16, {}, 0, 0, 1. / 32768, -16384});
} else {
output_ = AddOutput({output_type, {}});
}
SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions,
CreateSoftmaxOptions(builder_, softmax_beta).Union());
BuildInterpreter({GetShape(input_)});
}
// A dedicated constructor for LeakyRelu, which does some options.
BaseActivationsOpModel(TensorData input, float alpha) {
input_ = AddInput(input);
// The output scale and input scale might be different.
if (input.type == TensorType_UINT8 || input.type == TensorType_INT8 ||
input.type == TensorType_INT16) {
auto output_min = (input.min >= 0) ? input.min : input.min * alpha;
auto output_max = (input.max >= 0) ? input.max : input.max * alpha;
if (input.type == TensorType_INT16) {
output_ = AddOutput({TensorType_INT16,
{},
0,
0,
output_max / (std::numeric_limits<int16_t>::max()),
0});
} else {
output_ = AddOutput({input.type, {}, output_min, output_max});
}
} else {
output_ = AddOutput({input.type, {}});
}
SetBuiltinOp(BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions,
CreateLeakyReluOptions(builder_, alpha).Union());
BuildInterpreter({GetShape(input_)});
}
BaseActivationsOpModel(BuiltinOperator type, const TensorData& input,
const TensorData& output) {
input_ = AddInput(input);
output_ = AddOutput(output);
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
BuildInterpreter({GetShape(input_)});
}
BaseActivationsOpModel(TfLiteRegistration* registration, BuiltinOperator type,
const TensorData& input, const TensorData& output) {
input_ = AddInput(input);
output_ = AddOutput(output);
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
resolver_ = absl::make_unique<SingleOpResolver>(type, registration);
BuildInterpreter({GetShape(input_)});
}
protected:
int input_;
int output_;
};
class FloatActivationsOpModel : public BaseActivationsOpModel {
public:
using BaseActivationsOpModel::BaseActivationsOpModel;
void SetInput(const std::vector<float>& data) {
PopulateTensor(input_, data);
}
std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
};
SetBuiltinOp(type, BuiltinOptions_NONE, 0);
BuildInterpreter({GetShape(input_)});
}
#endif // notdef
// Our fixed-point math function implementations have roughly 12 bits of
// accuracy, when specialized to 16-bit fixed-point arithmetic.
@ -176,41 +59,25 @@ class FloatActivationsOpModel : public BaseActivationsOpModel {
const float kQuantizedTolerance = 2 * (1. / 256);
const float kQuantizedToleranceInt16 = 2 * (1. / 4096);
class QuantizedActivationsOpModel : public BaseActivationsOpModel {
public:
using BaseActivationsOpModel::BaseActivationsOpModel;
TF_LITE_MICRO_TESTS_BEGIN
template <typename T>
void SetInput(const std::vector<float>& data) {
QuantizeAndPopulate<T>(input_, data);
}
template <typename T>
std::vector<T> GetOutput() {
return ExtractVector<T>(output_);
}
template <typename T>
std::vector<float> GetDequantizedOutput() {
return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
GetZeroPoint(output_));
}
};
TEST(FloatActivationsOpTest, Elu) {
TF_LITE_MICRO_TEST(FloatActivationsOpTestElu) {
#ifdef notdef
FloatActivationsOpModel m(BuiltinOperator_ELU,
/*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}});
m.SetInput({
0, -6, 2, -4, //
3, -2, 10, -0.1, //
});
m.Invoke();
EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({
0.0, -0.997521, 2.0, -0.981684, //
3.0, -0.864665, 10.0, -0.0951626, //
})));
#endif // notdef
}
TEST(QuantizedActivationsOpTest, EluInt8) {
TF_LITE_MICRO_TEST(QuantizedActivationsOpTestEluInt8) {
#ifdef notdef
const float kMin = -1;
const float kMax = 127.f / 128.f;
QuantizedActivationsOpModel model(
@ -231,7 +98,11 @@ TEST(QuantizedActivationsOpTest, EluInt8) {
3.0, -0.875, 6.0, -0.125, //
},
kQuantizedTolerance)));
#endif // notdef
}
TF_LITE_MICRO_TESTS_END
} // namespace
} // namespace testing
} // namespace tflite