From 00c62a3f6e6afeb744214af31de5bc6fd4a6ecb6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Aug 2019 23:08:17 -0700 Subject: [PATCH] Add maximum and minimum ops for TF Micro PiperOrigin-RevId: 261267068 --- .../lite/experimental/micro/kernels/BUILD | 15 + .../micro/kernels/all_ops_resolver.cc | 5 +- .../micro/kernels/maximum_minimum.cc | 141 ++++++++ .../micro/kernels/maximum_minimum_test.cc | 314 ++++++++++++++++++ .../experimental/micro/tools/make/Makefile | 1 + tensorflow/lite/kernels/internal/BUILD | 2 + .../internal/reference/maximum_minimum.h | 61 ++++ .../internal/reference/reference_ops.h | 36 +- 8 files changed, 539 insertions(+), 36 deletions(-) create mode 100644 tensorflow/lite/experimental/micro/kernels/maximum_minimum.cc create mode 100644 tensorflow/lite/experimental/micro/kernels/maximum_minimum_test.cc create mode 100644 tensorflow/lite/kernels/internal/reference/maximum_minimum.h diff --git a/tensorflow/lite/experimental/micro/kernels/BUILD b/tensorflow/lite/experimental/micro/kernels/BUILD index 85cbaf986b6..ca013a304e4 100644 --- a/tensorflow/lite/experimental/micro/kernels/BUILD +++ b/tensorflow/lite/experimental/micro/kernels/BUILD @@ -20,6 +20,7 @@ cc_library( "elementwise.cc", "floor.cc", "fully_connected.cc", + "maximum_minimum.cc", "pooling.cc", "prelu.cc", "softmax.cc", @@ -63,6 +64,7 @@ cc_library( "elementwise.cc", "floor.cc", "fully_connected.cc", + "maximum_minimum.cc", "pooling.cc", "portable_optimized/depthwise_conv.cc", "prelu.cc", @@ -214,6 +216,19 @@ tflite_micro_cc_test( ], ) +tflite_micro_cc_test( + name = "maximum_minimum_test", + srcs = [ + "maximum_minimum_test.cc", + ], + deps = [ + ":all_ops_resolver", + "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/experimental/micro:micro_framework", + "//tensorflow/lite/experimental/micro/testing:micro_test", + ], +) + tflite_micro_cc_test( name = "arg_min_max_test", srcs = [ diff --git a/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc b/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc index dc86d034349..42f9b108832 100644 --- a/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc +++ b/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc @@ -25,9 +25,10 @@ TfLiteRegistration* Register_MAX_POOL_2D(); TfLiteRegistration* Register_ABS(); TfLiteRegistration* Register_PRELU(); TfLiteRegistration* Register_FLOOR(); +TfLiteRegistration* Register_MAXIMUM(); +TfLiteRegistration* Register_MINIMUM(); TfLiteRegistration* Register_ARG_MAX(); TfLiteRegistration* Register_ARG_MIN(); - AllOpsResolver::AllOpsResolver() { AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D()); AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED(), @@ -40,6 +41,8 @@ AllOpsResolver::AllOpsResolver() { AddBuiltin(BuiltinOperator_ABS, Register_ABS()); AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR()); + AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM()); + AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM()); AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX()); AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN()); } diff --git a/tensorflow/lite/experimental/micro/kernels/maximum_minimum.cc b/tensorflow/lite/experimental/micro/kernels/maximum_minimum.cc new file mode 100644 index 00000000000..bbbfb03f182 --- /dev/null +++ b/tensorflow/lite/experimental/micro/kernels/maximum_minimum.cc @@ -0,0 +1,141 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace maximum_minimum { +namespace { + +// This file has a reference implementation of TFMaximum/TFMinimum. +enum KernelType { + kReference, +}; + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + input1 = GetInput(context, node, kInputTensor1); + input2 = GetInput(context, node, kInputTensor2); + output = GetOutput(context, node, kOutputTensor); + } + const TfLiteTensor* input1; + const TfLiteTensor* input2; + TfLiteTensor* output; +}; + +struct MaximumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 > el2 ? el1 : el2; + } +}; + +struct MinimumOp { + template + static data_type op(data_type el1, data_type el2) { + return el1 < el2 ? el1 : el2; + } +}; + +} // namespace + +template +void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, + const OpContext& op_context) { + reference_ops::MaximumMinimumBroadcast4DSlow( + GetTensorShape(op_context.input1), + GetTensorData(op_context.input1), + GetTensorShape(op_context.input2), + GetTensorData(op_context.input2), + GetTensorShape(op_context.output), + GetTensorData(op_context.output), + op_type::template op); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + + if (kernel_type == kReference) { + switch (op_context.output->type) { + case kTfLiteFloat32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteUInt8: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt8: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt32: + TFLiteOperation(context, node, op_context); + break; + case kTfLiteInt64: + TFLiteOperation(context, node, op_context); + break; + default: + context->ReportError(context, + "Type %d is not supported by Maximum/Minimum.", + op_context.output->type); + return kTfLiteError; + } + } else { + context->ReportError(context, + "Kernel type not supported by Maximum/Minimum.", + op_context.output->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace maximum_minimum + +TfLiteRegistration* Register_MAXIMUM() { + static TfLiteRegistration r = { + /* init */ nullptr, + /* free */ nullptr, + /* prepare */ nullptr, + maximum_minimum::Eval}; + return &r; +} + +TfLiteRegistration* Register_MINIMUM() { + static TfLiteRegistration r = { + /* init */ nullptr, + /* free */ nullptr, + /* prepare */ nullptr, + maximum_minimum::Eval}; + return &r; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/experimental/micro/kernels/maximum_minimum_test.cc b/tensorflow/lite/experimental/micro/kernels/maximum_minimum_test.cc new file mode 100644 index 00000000000..b944b4bd841 --- /dev/null +++ b/tensorflow/lite/experimental/micro/kernels/maximum_minimum_test.cc @@ -0,0 +1,314 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h" +#include "tensorflow/lite/experimental/micro/simple_tensor_allocator.h" +#include "tensorflow/lite/experimental/micro/testing/micro_test.h" +#include "tensorflow/lite/experimental/micro/testing/test_utils.h" + +namespace tflite { +namespace testing { +namespace { + +void TestMaxMinFloat(tflite::BuiltinOperator op, + std::initializer_list input1_dims_data, + std::initializer_list input1_data, + std::initializer_list input2_dims_data, + std::initializer_list input2_data, + std::initializer_list expected_output_data, + std::initializer_list output_dims_data, + float* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input1_data, input1_dims, "input1_tensor"), + CreateFloatTensor(input2_data, input2_dims, "input2_tensor"), + CreateFloatTensor(output_data, output_dims, "output_tensor"), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = resolver.FindOp(op, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1}); + TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 2}); + TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0}); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = nullptr; + node.builtin_data = nullptr; + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], + 1e-5); + } +} + +void TestMaxMinQuantized( + tflite::BuiltinOperator op, std::initializer_list input1_dims_data, + std::initializer_list input1_data, float input1_min, + float input1_max, std::initializer_list input2_dims_data, + std::initializer_list input2_data, float input2_min, + float input2_max, std::initializer_list expected_output_data, + float output_min, float output_max, + std::initializer_list output_dims_data, uint8_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input1_data, input1_dims, "input1_tensor", + input1_min, input1_max), + CreateQuantizedTensor(input2_data, input2_dims, "input2_tensor", + input2_min, input2_max), + CreateQuantizedTensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = resolver.FindOp(op, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1}); + TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 2}); + TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0}); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = nullptr; + node.builtin_data = nullptr; + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + } +} + +void TestMaxMinQuantizedInt32( + tflite::BuiltinOperator op, std::initializer_list input1_dims_data, + std::initializer_list input1_data, float input1_min, + float input1_max, std::initializer_list input2_dims_data, + std::initializer_list input2_data, float input2_min, + float input2_max, std::initializer_list expected_output_data, + float output_min, float output_max, + std::initializer_list output_dims_data, int32_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateQuantized32Tensor(input1_data, input1_dims, "input1_tensor", + input1_min, input1_max), + CreateQuantized32Tensor(input2_data, input2_dims, "input2_tensor", + input2_min, input2_max), + CreateQuantized32Tensor(output_data, output_dims, "output_tensor", + output_min, output_max), + }; + + TfLiteContext context; + PopulateContext(tensors, tensors_size, &context); + + ::tflite::ops::micro::AllOpsResolver resolver; + const TfLiteRegistration* registration = resolver.FindOp(op, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + + TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1}); + TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 2}); + TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0}); + + TfLiteNode node; + node.inputs = inputs_array; + node.outputs = outputs_array; + node.temporaries = temporaries_array; + node.user_data = nullptr; + node.builtin_data = nullptr; + node.custom_initial_data = nullptr; + node.custom_initial_data_size = 0; + node.delegate = nullptr; + + if (registration->prepare) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); + } + + TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + + for (int i = 0; i < output_dims_count; ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + } +} + +} // namespace +} // namespace testing +} // namespace tflite + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(FloatTest) { + std::initializer_list data1 = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + std::initializer_list data2 = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + float output_data[6]; + + tflite::testing::TestMaxMinFloat( + tflite::BuiltinOperator_MAXIMUM, {3, 3, 1, 2}, + data1, // input1 shape and data + {3, 3, 1, 2}, data2, // input2 shape and data + {1.0, 0.0, 1.0, 12.0, -2.0, -1.43}, // expected output + {3, 3, 1, 2}, output_data); // output shape and data buffer + + tflite::testing::TestMaxMinFloat( + tflite::BuiltinOperator_MINIMUM, {3, 3, 1, 2}, + data1, // input1 shape and data + {3, 3, 1, 2}, data2, // input2 shape and data + {-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}, // expected output + {3, 3, 1, 2}, output_data); // output shape and data buffer +} + +TF_LITE_MICRO_TEST(Uint8Test) { + std::initializer_list data1 = {1, 0, 2, 11, 2, 23}; + std::initializer_list data2 = {0, 0, 1, 12, 255, 1}; + const float input1_min = -63.5; + const float input1_max = 64; + const float input2_min = -63.5; + const float input2_max = 64; + const float output_min = -63.5; + const float output_max = 64; + + uint8_t output_data[6]; + + tflite::testing::TestMaxMinQuantized( + tflite::BuiltinOperator_MAXIMUM, + // input1 shape, data and bounds + {3, 3, 1, 2}, data1, input1_min, input1_max, + // input2 shape, data and bounds + {3, 3, 1, 2}, data2, input2_min, input2_max, + // expected output + {1, 0, 2, 12, 255, 23}, + // output bounds, shape and data buffer + output_min, output_max, {3, 3, 1, 2}, output_data); + + tflite::testing::TestMaxMinQuantized( + tflite::BuiltinOperator_MINIMUM, + // input1 shape, data and bounds + {3, 3, 1, 2}, data1, input1_min, input1_max, + // input2 shape, data and bounds + {3, 3, 1, 2}, data2, input2_min, input2_max, + // expected output + {0, 0, 1, 11, 2, 1}, + // output bounds, shape and data buffer + output_min, output_max, {3, 3, 1, 2}, output_data); +} + +TF_LITE_MICRO_TEST(FloatWithBroadcastTest) { + std::initializer_list data1 = {1.0, 0.0, -1.0, -2.0, -1.44, 11.0}; + std::initializer_list data2 = {0.5, 2.0}; + float output_data[6]; + + tflite::testing::TestMaxMinFloat( + tflite::BuiltinOperator_MAXIMUM, {3, 3, 1, 2}, + data1, // input1 shape and data + {1, 2}, data2, // input2 shape and data + {1.0, 2.0, 0.5, 2.0, 0.5, 11.0}, // expected output + {3, 3, 1, 2}, output_data); // output shape and data buffer + + tflite::testing::TestMaxMinFloat( + tflite::BuiltinOperator_MINIMUM, {3, 3, 1, 2}, + data1, // input1 shape and data + {1, 2}, data2, // input2 shape and data + {0.5, 0.0, -1.0, -2.0, -1.44, 2.0}, // expected output + {3, 3, 1, 2}, output_data); // output shape and data buffer +} + +TF_LITE_MICRO_TEST(Int32WithBroadcastTest) { + const float input1_min = -63.5; + const float input1_max = 64; + const float input2_min = -63.5; + const float input2_max = 64; + const float output_min = -63.5; + const float output_max = 64; + std::initializer_list data1 = {1, 0, -1, -2, 3, 11}; + std::initializer_list data2 = {2}; + int32_t output_data[6]; + + tflite::testing::TestMaxMinQuantizedInt32( + tflite::BuiltinOperator_MAXIMUM, + // input1 shape, data and bounds + {3, 3, 1, 2}, data1, input1_min, input1_max, + // input2 shape, data and bounds + {1, 1}, data2, input2_min, input2_max, + // expected output + {2, 2, 2, 2, 3, 11}, + // output bounds, shape and data buffer + output_min, output_max, {3, 3, 1, 2}, output_data); + + tflite::testing::TestMaxMinQuantizedInt32( + tflite::BuiltinOperator_MINIMUM, + // input1 shape, data and bounds + {3, 3, 1, 2}, data1, input1_min, input1_max, + // input2 shape, data and bounds + {1, 1}, data2, input2_min, input2_max, + // expected output + {1, 0, -1, -2, 2, 2}, + // output bounds, shape and data buffer + output_min, output_max, {3, 3, 1, 2}, output_data); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/experimental/micro/tools/make/Makefile b/tensorflow/lite/experimental/micro/tools/make/Makefile index 36366128f60..f51be430df3 100644 --- a/tensorflow/lite/experimental/micro/tools/make/Makefile +++ b/tensorflow/lite/experimental/micro/tools/make/Makefile @@ -114,6 +114,7 @@ tensorflow/lite/kernels/internal/reference/floor.h \ tensorflow/lite/kernels/internal/reference/fully_connected.h \ tensorflow/lite/kernels/internal/reference/pooling.h \ tensorflow/lite/kernels/internal/reference/prelu.h \ +tensorflow/lite/kernels/internal/reference/maximum_minimum.h \ tensorflow/lite/kernels/internal/reference/softmax.h \ tensorflow/lite/kernels/internal/reference/arg_min_max.h \ tensorflow/lite/kernels/internal/round.h \ diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 7c5889f82e1..f1e91450fe1 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -366,6 +366,7 @@ cc_library( "reference/integer_ops/pooling.h", "reference/integer_ops/softmax.h", "reference/integer_ops/tanh.h", + "reference/maximum_minimum.h", "reference/pooling.h", "reference/prelu.h", "reference/reference_ops.h", @@ -409,6 +410,7 @@ cc_library( "reference/floor.h", "reference/fully_connected.h", "reference/legacy_reference_ops.h", + "reference/maximum_minimum.h", "reference/pooling.h", "reference/prelu.h", "reference/reference_ops.h", diff --git a/tensorflow/lite/kernels/internal/reference/maximum_minimum.h b/tensorflow/lite/kernels/internal/reference/maximum_minimum.h new file mode 100644 index 00000000000..480069aa13e --- /dev/null +++ b/tensorflow/lite/kernels/internal/reference/maximum_minimum.h @@ -0,0 +1,61 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ + +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace reference_ops { + +template +void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data, Op op) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + const RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); + + for (int b = 0; b < output_shape.Dims(0); ++b) { + for (int y = 0; y < output_shape.Dims(1); ++y) { + for (int x = 0; x < output_shape.Dims(2); ++x) { + for (int c = 0; c < output_shape.Dims(3); ++c) { + auto out_idx = Offset(output_shape, b, y, x, c); + auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); + auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); + auto in1_val = input1_data[in1_idx]; + auto in2_val = input2_data[in2_idx]; + output_data[out_idx] = op(in1_val, in2_val); + } + } + } + } +} + +} // namespace reference_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_ diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h index db22827dc79..932df39fe33 100644 --- a/tensorflow/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/conv.h" #include "tensorflow/lite/kernels/internal/reference/floor.h" #include "tensorflow/lite/kernels/internal/reference/fully_connected.h" +#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h" #include "tensorflow/lite/kernels/internal/reference/pooling.h" #include "tensorflow/lite/kernels/internal/reference/prelu.h" #include "tensorflow/lite/kernels/internal/reference/softmax.h" @@ -3538,41 +3539,6 @@ inline void Maximum(const RuntimeShape& input1_shape, const T* input1_data, Maximum(input1_shape, input1_data, input2_data, output_shape, output_data); } -template -void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const T* input2_data, - const RuntimeShape& unextended_output_shape, - T* output_data, Op op) { - gemmlowp::ScopedProfilingLabel label("MaximumMinimumBroadcast4DSlow"); - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - - for (int b = 0; b < output_shape.Dims(0); ++b) { - for (int y = 0; y < output_shape.Dims(1); ++y) { - for (int x = 0; x < output_shape.Dims(2); ++x) { - for (int c = 0; c < output_shape.Dims(3); ++c) { - auto out_idx = Offset(output_shape, b, y, x, c); - auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); - auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); - auto in1_val = input1_data[in1_idx]; - auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = op(in1_val, in2_val); - } - } - } - } -} - template void ArgMax(const RuntimeShape& input1_shape, const T1* input1_data, const T3* input2_data, const RuntimeShape& output_shape,