diff --git a/tensorflow/lite/micro/kernels/div.cc b/tensorflow/lite/micro/kernels/div.cc new file mode 100644 index 00000000000..f87f87da45c --- /dev/null +++ b/tensorflow/lite/micro/kernels/div.cc @@ -0,0 +1,266 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h" +#include "tensorflow/lite/kernels/internal/optimized/neon_check.h" +#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/internal/types.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace div { + +// This file has three implementation of Div. +enum KernelType { + kReference, + kGenericOptimized, // Neon-free + kNeonOptimized, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + bool requires_broadcast; + + // Parameters used in the quantized paths where the output is 8bit + int32 output_activation_min; + int32 output_activation_max; + + // Parameters used in all quantized paths + int32_t output_multiplier; + int output_shift; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + const TfLiteTensor* input1; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kInputTensor1, &input1)); + const TfLiteTensor* input2; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kInputTensor2, &input2)); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kOutputTensor, &output)); + + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + output->type = input2->type; + + data->requires_broadcast = !HaveSameShapes(input1, input2); + + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } + + if (output->type == kTfLiteUInt8) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + const double real_multiplier = + input1->params.scale / (input2->params.scale * output->params.scale); + QuantizeMultiplier(real_multiplier, &data->output_multiplier, + &data->output_shift); + } + + return context->ResizeTensor(context, output, output_size); +} + +template +void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params, + const OpData* data, const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output) { +#define TF_LITE_DIV(type, opname, data_type) \ + tflite::ArithmeticParams op_params; \ + data_type output_activation_min, output_activation_max; \ + CalculateActivationRange(params->activation, &output_activation_min, \ + &output_activation_max); \ + SetActivationParams(output_activation_min, output_activation_max, \ + &op_params); \ + type::opname(op_params, GetTensorShape(input1), \ + GetTensorData(input1), GetTensorShape(input2), \ + GetTensorData(input2), GetTensorShape(output), \ + GetTensorData(output)) + if (output->type == kTfLiteInt32) { + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDivSlow, int32_t); + } else { + TF_LITE_DIV(reference_ops, Div, int32_t); + } + } else { + if (data->requires_broadcast) { + TF_LITE_DIV(optimized_ops, BroadcastDivSlow, int32_t); + } else { + TF_LITE_DIV(optimized_ops, Div, int32_t); + } + } + } else if (output->type == kTfLiteFloat32) { + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDivSlow, float); + } else { + TF_LITE_DIV(reference_ops, Div, float); + } + } else { + if (data->requires_broadcast) { + TF_LITE_DIV(optimized_ops, BroadcastDivSlow, float); + } else { + TF_LITE_DIV(optimized_ops, Div, float); + } + } + } +#undef TF_LITE_DIV +} + +template +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpData* data, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output) { + if (input1->type == kTfLiteUInt8 && input2->type == kTfLiteUInt8 && + output->type == kTfLiteUInt8) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); + op_params.input1_offset = -input1->params.zero_point; + op_params.input2_offset = -input2->params.zero_point; + op_params.output_offset = output->params.zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + bool need_broadcast = optimized_ops::ProcessBroadcastShapes( + GetTensorShape(input1), GetTensorShape(input2), &op_params); +#define TF_LITE_DIV(type, opname, dtype) \ + type::opname(op_params, GetTensorShape(input1), \ + GetTensorData(input1), GetTensorShape(input2), \ + GetTensorData(input2), GetTensorShape(output), \ + GetTensorData(output)) + if (kernel_type == kReference) { + if (need_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDivSlow, uint8_t); + } else { + TF_LITE_DIV(reference_ops, Div, uint8_t); + } + } else { + if (need_broadcast) { + TF_LITE_DIV(optimized_ops, BroadcastDivSlow, uint8_t); + } else { + TF_LITE_DIV(optimized_ops, Div, uint8_t); + } + } +#undef TF_LITE_DIV + } else { + TF_LITE_KERNEL_LOG( + context, "Unsupported combination of input and output types in Div."); + return kTfLiteError; + } + return kTfLiteOk; +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); + + const TfLiteTensor* input1; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kInputTensor1, &input1)); + const TfLiteTensor* input2; + TF_LITE_ENSURE_OK(context, + GetInputSafe(context, node, kInputTensor2, &input2)); + TfLiteTensor* output; + TF_LITE_ENSURE_OK(context, + GetOutputSafe(context, node, kOutputTensor, &output)); + + if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { + EvalDiv(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + TF_LITE_ENSURE_OK( + context, EvalQuantized(context, node, params, data, input1, + input2, output)); + } else { + TF_LITE_KERNEL_LOG( + context, + "Div only supports FLOAT32, INT32 and quantized UINT8 now, got %d.", + output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace div + +TfLiteRegistration* Register_DIV_REF() { + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, + div::Eval}; + return &r; +} + +TfLiteRegistration* Register_DIV_GENERIC_OPT() { + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, + div::Eval}; + return &r; +} + +TfLiteRegistration* Register_DIV_NEON_OPT() { + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, + div::Eval}; + return &r; +} + +TfLiteRegistration* Register_DIV() { +#ifdef USE_NEON + return Register_DIV_NEON_OPT(); +#else + return Register_DIV_GENERIC_OPT(); +#endif +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/div_test.cc b/tensorflow/lite/micro/kernels/div_test.cc new file mode 100644 index 00000000000..57836f9b180 --- /dev/null +++ b/tensorflow/lite/micro/kernels/div_test.cc @@ -0,0 +1,310 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include + +#include "flatbuffers/flatbuffers.h" // from @flatbuffers +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseDivOpModel : public SingleOpModel { + public: + BaseDivOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_DIV, BuiltinOptions_DivOptions, + CreateDivOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class IntegerDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + template + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// For quantized Div, the error shouldn't exceed (2*step + step^2). +inline float GetTolerance(int min, int max) { + const float kQuantizedStep = (max - min) / 255.0f; + const float kQuantizedTolerance = + 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST(FloatDivOpTest, NoActivation) { + FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.5, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.4, 1.0, 0.8, 1.6}))); +} + +TEST(FloatDivOpTest, ActivationRELU_N1_TO_1) { + FloatDivOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-1.0, 1.0, 0.8, 1.0}))); +} + +TEST(FloatDivOpTest, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.3, 0.8, 1.1, -2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.6, 0.5, -1.1, -0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-20.0, 1.0, 0.5, 1.6, -1.0, 20.0}))) + << "With shape number " << i; + } +} + +TEST(FloatDivOpTest, WithBroadcast) { + std::vector> test_shapes = { + {8}, {2, 4}, {2, 1, 4}, {1, 2, 2, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), + {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123, -0.32, 0.54}); + m.PopulateTensor(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23, -3.2, 5.4}))) + << "With shape number " << i; + } +} + +TEST(FloatDivOpTest, WithBroadcast5D) { + std::vector> test_shapes = {{1, 2, 1, 2, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), + {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123, -0.32, 0.54}); + m.PopulateTensor(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23, -3.2, 5.4}))) + << "With shape number " << i; + } +} + +TEST(IntegerDivOpTest, NoActivation) { + IntegerDivOpModel m({TensorType_INT32, {1, 2, 2, 1}}, + {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}}, + ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2, 2, -15, 8}); + m.PopulateTensor(m.input2(), {5, -2, -3, 5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, -1, 5, 1})); +} + +TEST(IntegerDivOpTest, ActivationRELU_N1_TO_1) { + IntegerDivOpModel m({TensorType_INT32, {1, 2, 2, 1}}, + {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}}, + ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-2, 2, -12, 8}); + m.PopulateTensor(m.input2(), {1, 2, -15, 5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 1, 0, 1})); +} + +TEST(IntegerDivOpTest, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + IntegerDivOpModel m({TensorType_INT32, test_shapes[i]}, + {TensorType_INT32, test_shapes[i]}, + {TensorType_INT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-20, 2, 3, 8, 11, -20}); + m.PopulateTensor(m.input2(), {1, 2, 6, 5, -11, -1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 1, 0, 1, -1, 20})) + << "With shape number " << i; + } +} + +TEST(IntegerDivOpTest, WithBroadcast) { + std::vector> test_shapes = { + {8}, {2, 4}, {2, 1, 4}, {1, 4, 1, 2}, {1, 2, 1, 2, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + IntegerDivOpModel m({TensorType_INT32, test_shapes[i]}, + {TensorType_INT32, {}}, // always a scalar + {TensorType_INT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-20, 21, 7, 8, 11, -123, -42, -48}); + m.PopulateTensor(m.input2(), {3}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({-6, 7, 2, 2, 3, -41, -14, -16})) + << "With shape number " << i; + } +} + +template +void QuantizedNoActivation() { + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + QuantizedDivOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-0.8, -0.2, 0.3, 0.7}); + m.QuantizeAndPopulate(m.input2(), {-0.8, 0.4, 0.8, 1.0}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({1.0, -0.5, 0.375, 0.7}, + kQuantizedTolerance))); +} + +TEST(QuantizedDivOpTest, QuantizedNoActivationUInt8) { + QuantizedNoActivation(); +} + +template +void QuantizedActivationRELU_N1_TO_1() { + const float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + const std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.5, 0.2, 0.6, 0.3}}; + const std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.5, -0.8, 0.5}}; + const std::vector> results = {{-1.0, 0.5, 1.0, -0.875}, + {-0.833, 0.4, -0.75, 0.6}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedDivOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {}, -1.0, 1.0}, + ActivationFunctionType_RELU_N1_TO_1); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedDivOpTest, QuantizedActivationRELU_N1_TO_1UInt8) { + QuantizedActivationRELU_N1_TO_1(); +} + +template +void QuantizedVariousInputShapes() { + const float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + const std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedDivOpModel m({tensor_type, test_shapes[i], -3.0, 3.0}, + {tensor_type, test_shapes[i], -3.0, 3.0}, + {tensor_type, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), + {-2.0, 0.2, 1.7, 0.9, 0.4, 2.0}); + m.QuantizeAndPopulate(m.input2(), + {1.3, 0.3, 1.1, 0.4, -1.1, 1.9}); + m.Invoke(); + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-1.538, 0.667, 1.545, 2.25, -0.364, 1.053}, kQuantizedTolerance))) + << "With shape number " << i; + } +} + +TEST(QuantizedDivOpTest, QuantizedVariousInputShapesUInt8) { + QuantizedVariousInputShapes(); +} + +template +void QuantizedWithBroadcast() { + const float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + const std::vector> test_shapes = { + {8}, {2, 4}, {2, 1, 4}, {1, 4, 1, 2}, {1, 2, 1, 2, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedDivOpModel m( + {tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0}, + {tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE); + m.QuantizeAndPopulate( + m.input1(), {-2.0, 0.2, 0.7, 0.8, -0.5, 1.1, -1.3, 1.2}); + m.QuantizeAndPopulate(m.input2(), {0.7}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.857, 0.286, 1.0, 1.143, -0.714, 1.571, -1.857, 1.714}, + kQuantizedTolerance))) + << "With shape number " << i; + } +} + +TEST(QuantizedDivOpTest, QuantizedWithBroadcastUInt8) { + QuantizedWithBroadcast(); +} + +} // namespace +} // namespace tflite