From ee940c2bea31724bce87b2dd064e2a3a6e15339a Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 24 Mar 2020 03:34:58 -0700 Subject: [PATCH] Add 5D support for TFLite Maximum Minimum PiperOrigin-RevId: 302632680 Change-Id: I1d14cc1afe01888d731e3e68a398f2907e18f174 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 6 +- .../lite/delegates/nnapi/nnapi_delegate.cc | 2 +- .../internal/reference/legacy_reference_ops.h | 6 +- .../internal/reference/maximum_minimum.h | 63 ++++++++++--------- tensorflow/lite/kernels/maximum_minimum.cc | 2 +- .../lite/kernels/maximum_minimum_test.cc | 41 ++++++++++++ tensorflow/lite/kernels/register.cc | 4 +- .../lite/micro/kernels/maximum_minimum.cc | 2 +- tensorflow/lite/testing/op_tests/maximum.py | 8 ++- tensorflow/lite/testing/op_tests/minimum.py | 8 ++- tensorflow/lite/toco/tflite/operator.cc | 4 +- .../lite/tools/versioning/op_version.cc | 24 ++++--- tensorflow/lite/tools/versioning/op_version.h | 2 +- .../lite/tools/versioning/op_version_test.cc | 46 +++++++++++++- 14 files changed, 156 insertions(+), 62 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 4f560913593..963eea3a746 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -1637,8 +1637,7 @@ def TFL_MaxUnpooling2DOp : } def TFL_MaximumOp : TFL_Op<"maximum", [ - ResultsBroadcastableShape, NoSideEffect, Commutative, SameOperandsAndResultsScale, - TFL_OperandHasRankLessThan<0, 4>, TFL_OperandHasRankLessThan<1, 4>]> { + ResultsBroadcastableShape, NoSideEffect, Commutative, SameOperandsAndResultsScale]> { let summary = "Max operator"; let description = [{ Element-wise max operation. @@ -1836,8 +1835,7 @@ def TFL_ReduceProdOp: TFL_Op<"reduce_prod", [NoSideEffect]> { } def TFL_MinimumOp : TFL_Op<"minimum", [ - ResultsBroadcastableShape, NoSideEffect, Commutative, SameOperandsAndResultsScale, - TFL_OperandHasRankLessThan<0, 4>, TFL_OperandHasRankLessThan<1, 4>]> { + ResultsBroadcastableShape, NoSideEffect, Commutative, SameOperandsAndResultsScale]> { let summary = "Min operator"; let description = [{ Element-wise min operation. diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index c26620f9ef3..25a09943394 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -2061,7 +2061,7 @@ bool NNAPIDelegateKernel::Validate( } break; case kTfLiteBuiltinMaximum: case kTfLiteBuiltinMinimum: { - ExpectMaxOpVersion(version, 2, &val_ctx); + ExpectMaxOpVersion(version, 3, &val_ctx); ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, &val_ctx); const auto input_type = context->tensors[node->inputs->data[0]].type; diff --git a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h index 61006bce47e..5e0cf7224ce 100644 --- a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h @@ -2147,9 +2147,9 @@ void TensorFlowMaximumMinimum(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, const Dims<4>& input2_dims, T* output_data, const Dims<4>& output_dims, Op op) { - MaximumMinimumBroadcast4DSlow(DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, - DimsToShape(output_dims), output_data, op); + MaximumMinimumBroadcastSlow(DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data, op); } template diff --git a/tensorflow/lite/kernels/internal/reference/maximum_minimum.h b/tensorflow/lite/kernels/internal/reference/maximum_minimum.h index 480069aa13e..cd11b4191ac 100644 --- a/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +++ b/tensorflow/lite/kernels/internal/reference/maximum_minimum.h @@ -21,37 +21,40 @@ limitations under the License. namespace tflite { namespace reference_ops { -template -void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape, - const T* input1_data, - const RuntimeShape& unextended_input2_shape, - const T* input2_data, - const RuntimeShape& unextended_output_shape, - T* output_data, Op op) { - TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); - TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); - const RuntimeShape output_shape = - RuntimeShape::ExtendedShape(4, unextended_output_shape); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, - unextended_input2_shape, &desc1, &desc2); - - for (int b = 0; b < output_shape.Dims(0); ++b) { - for (int y = 0; y < output_shape.Dims(1); ++y) { - for (int x = 0; x < output_shape.Dims(2); ++x) { - for (int c = 0; c < output_shape.Dims(3); ++c) { - auto out_idx = Offset(output_shape, b, y, x, c); - auto in1_idx = SubscriptToIndex(desc1, b, y, x, c); - auto in2_idx = SubscriptToIndex(desc2, b, y, x, c); - auto in1_val = input1_data[in1_idx]; - auto in2_val = input2_data[in2_idx]; - output_data[out_idx] = op(in1_val, in2_val); - } - } +template +void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape, + const T* input1_data, + const RuntimeShape& unextended_input2_shape, + const T* input2_data, + const RuntimeShape& unextended_output_shape, + T* output_data, Op op) { + // Uses element-wise calculation if broadcast is not required. + if (unextended_input1_shape == unextended_input2_shape) { + const int flat_size = + MatchingElementsSize(unextended_input1_shape, unextended_input2_shape, + unextended_output_shape); + for (int i = 0; i < flat_size; ++i) { + output_data[i] = op(input1_data[i], input2_data[i]); } + } else { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N); + + NdArrayDesc desc1; + NdArrayDesc desc2; + NdArrayDesc output_desc; + NdArrayDescsForElementwiseBroadcast( + unextended_input1_shape, unextended_input2_shape, &desc1, &desc2); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + auto maxmin_func = [&](int indexes[N]) { + output_data[SubscriptToIndex(output_desc, indexes)] = + op(input1_data[SubscriptToIndex(desc1, indexes)], + input2_data[SubscriptToIndex(desc2, indexes)]); + }; + NDOpsHelper(output_desc, maxmin_func); } } diff --git a/tensorflow/lite/kernels/maximum_minimum.cc b/tensorflow/lite/kernels/maximum_minimum.cc index 29ac306311b..3c6c524c13d 100644 --- a/tensorflow/lite/kernels/maximum_minimum.cc +++ b/tensorflow/lite/kernels/maximum_minimum.cc @@ -88,7 +88,7 @@ struct MinimumOp { template void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) { - reference_ops::MaximumMinimumBroadcast4DSlow( + reference_ops::MaximumMinimumBroadcastSlow( GetTensorShape(op_context.input1), GetTensorData(op_context.input1), GetTensorShape(op_context.input2), diff --git a/tensorflow/lite/kernels/maximum_minimum_test.cc b/tensorflow/lite/kernels/maximum_minimum_test.cc index 8ec93a542b8..b22435d3e97 100644 --- a/tensorflow/lite/kernels/maximum_minimum_test.cc +++ b/tensorflow/lite/kernels/maximum_minimum_test.cc @@ -186,5 +186,46 @@ TEST(MaximumOpTest, Int32WithBroadcastTest_ScalarY) { {TensorType_INT32, {}}, {TensorType_INT32, {3, 1, 2}}, data1, data2, {1, 0, -1, -2, 2, 2}, /*is_constant=*/true); } + +TEST(MaxMinOpTest, Int8Test8D) { + std::initializer_list data1 = {1, 0, 2, 11, 2, 23}; + std::initializer_list data2 = {0, 0, 1, 12, 123, 1}; + TestModel(BuiltinOperator_MAXIMUM, + {TensorType_INT8, {3, 1, 2, 1, 1, 1, 1, 1}}, + {TensorType_INT8, {3, 1, 2, 1, 1, 1, 1, 1}}, + {TensorType_INT8, {3, 1, 2, 1, 1, 1, 1, 1}}, data1, data2, + {1, 0, 2, 12, 123, 23}); + TestModel(BuiltinOperator_MINIMUM, + {TensorType_INT8, {3, 1, 2, 1, 1, 1, 1, 1}}, + {TensorType_INT8, {3, 1, 2, 1, 1, 1, 1, 1}}, + {TensorType_INT8, {3, 1, 2, 1, 1, 1, 1, 1}}, data1, data2, + {0, 0, 1, 11, 2, 1}); +} + +TEST(MaximumOpTest, FloatWithBroadcastTest5D) { + std::initializer_list data1 = {1.0, 0.0, -1.0, -2.0, -1.44, 11.0}; + std::initializer_list data2 = {0.5, 2.0}; + TestModel( + BuiltinOperator_MAXIMUM, {TensorType_FLOAT32, {3, 1, 1, 1, 2}}, + {TensorType_FLOAT32, {2}}, {TensorType_FLOAT32, {3, 1, 1, 1, 2}}, data1, + data2, {1.0, 2.0, 0.5, 2.0, 0.5, 11.0}); + TestModel( + BuiltinOperator_MINIMUM, {TensorType_FLOAT32, {3, 1, 1, 1, 2}}, + {TensorType_FLOAT32, {2}}, {TensorType_FLOAT32, {3, 1, 1, 1, 2}}, data1, + data2, {0.5, 0.0, -1.0, -2.0, -1.44, 2.0}); +} + +TEST(MaximumOpTest, Int32WithBroadcastTest5D) { + std::initializer_list data1 = {1, 0, -1, -2, 3, 11}; + std::initializer_list data2 = {2}; + TestModel( + BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2, 1, 1}}, + {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2, 1, 1}}, data1, + data2, {2, 2, 2, 2, 3, 11}); + TestModel( + BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2, 1, 1}}, + {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2, 1, 1}}, data1, + data2, {1, 0, -1, -2, 2, 2}); +} } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 1e148a0c1f5..626c092a2ab 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -164,10 +164,10 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_PRELU, Register_PRELU()); AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(), /* min_version */ 1, - /* max_version */ 3); + /* max_version */ 4); AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(), /* min_version */ 1, - /* max_version */ 3); + /* max_version */ 4); AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX(), /* min_version */ 1, /* max_version */ 2); diff --git a/tensorflow/lite/micro/kernels/maximum_minimum.cc b/tensorflow/lite/micro/kernels/maximum_minimum.cc index 12e62168127..4617767d2a0 100644 --- a/tensorflow/lite/micro/kernels/maximum_minimum.cc +++ b/tensorflow/lite/micro/kernels/maximum_minimum.cc @@ -68,7 +68,7 @@ struct MinimumOp { template void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) { - reference_ops::MaximumMinimumBroadcast4DSlow( + reference_ops::MaximumMinimumBroadcastSlow( GetTensorShape(op_context.input1), GetTensorData(op_context.input1), GetTensorShape(op_context.input2), diff --git a/tensorflow/lite/testing/op_tests/maximum.py b/tensorflow/lite/testing/op_tests/maximum.py index a78ac6eb8c7..e648f634955 100644 --- a/tensorflow/lite/testing/op_tests/maximum.py +++ b/tensorflow/lite/testing/op_tests/maximum.py @@ -29,8 +29,10 @@ def make_maximum_tests(options): test_parameters = [{ "input_dtype": [tf.float32], - "input_shape_1": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], - "input_shape_2": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + "input_shape_1": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3], + [5, 32, 32, 3, 1], [5, 32, 32, 3, 1]], + "input_shape_2": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3], + [5, 32, 32, 3, 3], [5, 32, 32, 3, 1]], "fully_quantize": [False, True], }] @@ -69,4 +71,4 @@ def make_maximum_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=16) + expected_tf_failures=44) diff --git a/tensorflow/lite/testing/op_tests/minimum.py b/tensorflow/lite/testing/op_tests/minimum.py index 1591acc291f..b45cc3543c1 100644 --- a/tensorflow/lite/testing/op_tests/minimum.py +++ b/tensorflow/lite/testing/op_tests/minimum.py @@ -29,8 +29,10 @@ def make_minimum_tests(options): test_parameters = [{ "input_dtype": [tf.float32], - "input_shape_1": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], - "input_shape_2": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + "input_shape_1": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3], + [5, 32, 32, 1, 1], [5, 32, 32, 1, 1]], + "input_shape_2": [[], [3], [1, 100], [4, 2, 3], [5, 224, 224, 3], + [5, 32, 32, 1, 1], [5, 32, 32, 1, 3]], "fully_quantize": [False, True], }] @@ -69,4 +71,4 @@ def make_minimum_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=16) + expected_tf_failures=44) diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 76a5889948a..f72dcc27235 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -274,10 +274,10 @@ class Sub : public BuiltinOperator 4) { return 3; } + if (op_sig.input_types.at(0) == TensorType_INT8) { + return 2; + } return 1; case BuiltinOperator_PACK: @@ -357,8 +360,8 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { return 1; case BuiltinOperator_SUB: - if (op_sig.options.sub.need_broadcast && - op_sig.options.sub.num_dims > 4) { + if (op_sig.options.broadcast.need_broadcast && + op_sig.options.broadcast.num_dims > 4) { return 3; } if (op_sig.input_types.at(0) == TensorType_INT8) { @@ -509,9 +512,12 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, op_sig.options.space_batch.num_dims = GetNumDims(subgraph, op, 0); } break; - case BuiltinOperator_SUB: { - op_sig.options.sub.need_broadcast = !HaveSameShapes(subgraph, op, 0, 1); - op_sig.options.sub.num_dims = + case BuiltinOperator_SUB: + case BuiltinOperator_MAXIMUM: + case BuiltinOperator_MINIMUM: { + op_sig.options.broadcast.need_broadcast = + !HaveSameShapes(subgraph, op, 0, 1); + op_sig.options.broadcast.num_dims = std::max(GetNumDims(subgraph, op, 0), GetNumDims(subgraph, op, 1)); } break; diff --git a/tensorflow/lite/tools/versioning/op_version.h b/tensorflow/lite/tools/versioning/op_version.h index e22e5403a8a..c1931bc4709 100644 --- a/tensorflow/lite/tools/versioning/op_version.h +++ b/tensorflow/lite/tools/versioning/op_version.h @@ -58,7 +58,7 @@ typedef struct { struct { int32_t num_dims; bool need_broadcast; - } sub; + } broadcast; } options; } OpSignature; diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc index 528fca4337c..95dccbf71bb 100644 --- a/tensorflow/lite/tools/versioning/op_version_test.cc +++ b/tensorflow/lite/tools/versioning/op_version_test.cc @@ -221,11 +221,53 @@ TEST(OpVersionTest, VersioningL2NormTest) { } TEST(OpVersionTest, VersioningMaxTest) { - SimpleVersioningTest(BuiltinOperator_MAXIMUM); + OpSignature fake_op_sig = { + .op = BuiltinOperator_MAXIMUM, + .input_types = std::vector{TensorType_INT8}, + }; + + fake_op_sig.options.broadcast.need_broadcast = true; + fake_op_sig.options.broadcast.num_dims = 5; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + fake_op_sig.options.broadcast.need_broadcast = false; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + fake_op_sig.options.broadcast.num_dims = 4; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + + fake_op_sig = { + .op = BuiltinOperator_MAXIMUM, + .input_types = std::vector{TensorType_UINT8}, + }; + fake_op_sig.options.broadcast.need_broadcast = true; + fake_op_sig.options.broadcast.num_dims = 5; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + fake_op_sig.options.broadcast.num_dims = 4; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); } TEST(OpVersionTest, VersioningMinTest) { - SimpleVersioningTest(BuiltinOperator_MINIMUM); + OpSignature fake_op_sig = { + .op = BuiltinOperator_MINIMUM, + .input_types = std::vector{TensorType_INT8}, + }; + + fake_op_sig.options.broadcast.need_broadcast = true; + fake_op_sig.options.broadcast.num_dims = 5; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + fake_op_sig.options.broadcast.need_broadcast = false; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + fake_op_sig.options.broadcast.num_dims = 4; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2); + + fake_op_sig = { + .op = BuiltinOperator_MINIMUM, + .input_types = std::vector{TensorType_UINT8}, + }; + fake_op_sig.options.broadcast.need_broadcast = true; + fake_op_sig.options.broadcast.num_dims = 5; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + fake_op_sig.options.broadcast.num_dims = 4; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); } TEST(OpVersionTest, VersioningMeanTest) {