diff --git a/tensorflow/lite/delegates/gpu/README.md b/tensorflow/lite/delegates/gpu/README.md index 2b216773c18..42d8e4b2caa 100644 --- a/tensorflow/lite/delegates/gpu/README.md +++ b/tensorflow/lite/delegates/gpu/README.md @@ -34,6 +34,8 @@ TFLite on GPU supports the following ops in 16-bit and 32-bit float precision: * `LOGISTIC v1` * `LSTM v2 (Basic LSTM only)` * `MAX_POOL_2D v1` +* `MAXIMUM v1` +* `MINIMUM v1` * `MUL v1` * `PAD v1` * `PRELU v1` diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index b6c6b1409f8..9fb3e45fe81 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -106,7 +106,9 @@ ElementwiseTwoInput::ElementwiseTwoInput(ElementwiseTwoInput&& operation) : ElementwiseOperation(std::move(operation)), link_index_(operation.link_index_), op_type_(operation.op_type_), - broadcast_(operation.broadcast_) {} + broadcast_(operation.broadcast_), + scalar_para_(operation.scalar_para_), + use_scalar_para_(operation.use_scalar_para_) {} ElementwiseTwoInput& ElementwiseTwoInput::operator=( ElementwiseTwoInput&& operation) { @@ -114,30 +116,43 @@ ElementwiseTwoInput& ElementwiseTwoInput::operator=( link_index_ = operation.link_index_; op_type_ = operation.op_type_; broadcast_ = operation.broadcast_; + scalar_para_ = operation.scalar_para_; + use_scalar_para_ = operation.use_scalar_para_; ElementwiseOperation::operator=(std::move(operation)); } return *this; } -void ElementwiseTwoInput::SetLinkIndex(int index) { link_index_ = index; } +void ElementwiseTwoInput::SetLinkIndex(int index) { + link_index_ = index; + if (use_scalar_para_) { + scalar_para_.SetName(absl::StrCat("scalar_para_", index)); + } +} std::string ElementwiseTwoInput::GetCoreCode( const LinkingContext& context) const { - const std::string size_name = "src_size_" + std::to_string(link_index_); - TensorCodeGenerator src_tensor( - absl::StrCat("src_data_", link_index_), - WHSPoint{size_name + ".x", size_name + ".y", size_name + ".z"}, - definition_.src_tensors[1]); - const std::string x_coord = broadcast_.width ? "0" : context.x_coord; - const std::string y_coord = broadcast_.height ? "0" : context.y_coord; - const std::string s_coord = broadcast_.channels ? "0" : context.s_coord; - const std::string second_var = "second_var_" + std::to_string(link_index_); - std::string result = " FLT4 " + second_var + " = " + - src_tensor.ReadWHS(x_coord, y_coord, s_coord) + ";\n"; - if (broadcast_.channels) { - result += " " + second_var + ".y = " + second_var + ".x;\n"; - result += " " + second_var + ".z = " + second_var + ".x;\n"; - result += " " + second_var + ".w = " + second_var + ".x;\n"; + std::string result; + std::string second_var; + if (use_scalar_para_) { + second_var = absl::StrCat("(FLT)(", scalar_para_.GetName(), ")"); + } else { + const std::string size_name = "src_size_" + std::to_string(link_index_); + TensorCodeGenerator src_tensor( + absl::StrCat("src_data_", link_index_), + WHSPoint{size_name + ".x", size_name + ".y", size_name + ".z"}, + definition_.src_tensors[1]); + const std::string x_coord = broadcast_.width ? "0" : context.x_coord; + const std::string y_coord = broadcast_.height ? "0" : context.y_coord; + const std::string s_coord = broadcast_.channels ? "0" : context.s_coord; + second_var = "second_var_" + std::to_string(link_index_); + result = " FLT4 " + second_var + " = " + + src_tensor.ReadWHS(x_coord, y_coord, s_coord) + ";\n"; + if (broadcast_.channels) { + result += " " + second_var + ".y = " + second_var + ".x;\n"; + result += " " + second_var + ".z = " + second_var + ".x;\n"; + result += " " + second_var + ".w = " + second_var + ".x;\n"; + } } switch (op_type_) { case OperationType::ADD: @@ -146,6 +161,12 @@ std::string ElementwiseTwoInput::GetCoreCode( case OperationType::DIV: result += "$0 /= $1;\n"; break; + case OperationType::MAXIMUM: + result += "$0 = max($0, $1);\n"; + break; + case OperationType::MINIMUM: + result += "$0 = min($0, $1);\n"; + break; case OperationType::MUL: result += "$0 *= $1;\n"; break; @@ -167,20 +188,44 @@ std::string ElementwiseTwoInput::GetCoreCode( std::string ElementwiseTwoInput::GetArgsDeclaration() const { std::string args; - absl::StrAppend(&args, ",\n", - GetTensorDeclaration(AccessType::READ, - absl::StrCat("src_data_", link_index_), - definition_.src_tensors[1])); - absl::StrAppend(&args, ",\n int4 src_size_", link_index_); + if (use_scalar_para_) { + absl::StrAppend(&args, ",\n ", scalar_para_.GetDeclaration()); + } else { + absl::StrAppend(&args, ",\n", + GetTensorDeclaration(AccessType::READ, + absl::StrCat("src_data_", link_index_), + definition_.src_tensors[1])); + absl::StrAppend(&args, ",\n int4 src_size_", link_index_); + } return args; } Status ElementwiseTwoInput::BindArguments(CLKernel* kernel) { - RETURN_IF_ERROR(kernel->SetMemoryAuto(src_[1]->GetMemoryPtr())); - RETURN_IF_ERROR(kernel->SetBytesAuto(src_[1]->GetWBatchedHSB())); + if (use_scalar_para_) { + RETURN_IF_ERROR(kernel->SetBytesAuto(scalar_para_)); + } else { + RETURN_IF_ERROR(kernel->SetMemoryAuto(src_[1]->GetMemoryPtr())); + RETURN_IF_ERROR(kernel->SetBytesAuto(src_[1]->GetWBatchedHSB())); + } return OkStatus(); } +ElementwiseTwoInput CreateElementwiseTwoInput( + const CreationContext& creation_context, const OperationDef& definition, + const OperationType& op_type, const BroadcastSettings& broadcast, + const ElementwiseAttributes& attr) { + ElementwiseTwoInput operation(definition, op_type, broadcast); + auto scalar = absl::get_if(&attr.param); + if (scalar) { + const auto scalar_precision = creation_context.device->IsPowerVR() + ? CalculationsPrecision::F32 + : definition.precision; + operation.SetScalarPara(FLT(scalar_precision, *scalar)); + } + operation.SetLinkIndex(0); + return operation; +} + ElementwiseTwoInput CreateElementwiseTwoInput( const OperationDef& definition, const OperationType& op_type, const BroadcastSettings& broadcast) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h index a09ddd1b7db..a70114d1081 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h @@ -63,7 +63,8 @@ class ElementwiseTwoInput : public ElementwiseOperation { const BroadcastSettings& broadcast) : ElementwiseOperation(definition), op_type_(op_type), - broadcast_(broadcast) {} + broadcast_(broadcast), + use_scalar_para_(false) {} // Move only ElementwiseTwoInput(ElementwiseTwoInput&& operation); @@ -75,13 +76,24 @@ class ElementwiseTwoInput : public ElementwiseOperation { std::string GetCoreCode(const LinkingContext& context) const override; std::string GetArgsDeclaration() const override; Status BindArguments(CLKernel* kernel) override; + inline void SetScalarPara(FLT scalar) { + scalar_para_ = scalar; + use_scalar_para_ = true; + } private: int link_index_; OperationType op_type_; BroadcastSettings broadcast_; + FLT scalar_para_; + bool use_scalar_para_; }; +ElementwiseTwoInput CreateElementwiseTwoInput( + const CreationContext& creation_context, const OperationDef& definition, + const OperationType& op_type, const BroadcastSettings& broadcast, + const ElementwiseAttributes& attr); + ElementwiseTwoInput CreateElementwiseTwoInput( const OperationDef& definition, const OperationType& op_type, const BroadcastSettings& broadcast); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index 24d30eecf25..aa1f83cc495 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -425,6 +425,118 @@ TEST_F(OpenCLOperationTest, Add) { } } +TEST_F(OpenCLOperationTest, Maxiumum) { + TensorFloat32 src_tensor_0, src_tensor_1; + src_tensor_0.shape = BHWC(1, 2, 1, 2); + src_tensor_1.shape = BHWC(1, 2, 1, 2); + src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f}; + src_tensor_1.data = {1.0f, 2.0f, 3.0f, -2.0f}; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + ElementwiseTwoInput operation = + CreateElementwiseTwoInput(op_def, OperationType::MAXIMUM); + ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, + creation_context_, &operation, + BHWC(1, 2, 1, 2), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {1.0f, 2.0f, 3.0f, -2.0f})); + } + } +} + +TEST_F(OpenCLOperationTest, MaxiumumWithScalar) { + TensorFloat32 src_tensor_0; + src_tensor_0.shape = BHWC(1, 4, 1, 1); + src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f}; + + ElementwiseAttributes attr; + attr.param = -1.0f; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + BroadcastSettings broadcast; + ElementwiseTwoInput operation = CreateElementwiseTwoInput( + creation_context_, op_def, OperationType::MAXIMUM, broadcast, attr); + ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, + BHWC(1, 4, 1, 1), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, -1.0f, 2.0f, -1.0f})); + } + } +} + +TEST_F(OpenCLOperationTest, Minimum) { + TensorFloat32 src_tensor_0, src_tensor_1; + src_tensor_0.shape = BHWC(1, 2, 1, 2); + src_tensor_1.shape = BHWC(1, 2, 1, 2); + src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f}; + src_tensor_1.data = {1.0f, 2.0f, 3.0f, -2.0f}; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + ElementwiseTwoInput operation = + CreateElementwiseTwoInput(op_def, OperationType::MINIMUM); + ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, + creation_context_, &operation, + BHWC(1, 2, 1, 2), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, -6.2f, 2.0f, -3.0f})); + } + } +} + +TEST_F(OpenCLOperationTest, MinimumWithScalar) { + TensorFloat32 src_tensor_0; + src_tensor_0.shape = BHWC(1, 4, 1, 1); + src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f}; + + ElementwiseAttributes attr; + attr.param = -1.0f; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + BroadcastSettings broadcast; + ElementwiseTwoInput operation = CreateElementwiseTwoInput( + creation_context_, op_def, OperationType::MINIMUM, broadcast, attr); + ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, + BHWC(1, 4, 1, 1), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {-1.0f, -6.2f, -1.0f, -3.0f})); + } + } +} + TEST_F(OpenCLOperationTest, Mul) { TensorFloat32 src_tensor_0, src_tensor_1; src_tensor_0.shape = BHWC(1, 2, 1, 2); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index e45a750b2fd..3153d7ddfd8 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -231,6 +231,8 @@ Status GPUOperationFromNode(const CreationContext& creation_context, return OkStatus(); } case OperationType::DIV: + case OperationType::MAXIMUM: + case OperationType::MINIMUM: case OperationType::POW: case OperationType::SQUARED_DIFF: case OperationType::SUB: { @@ -238,8 +240,10 @@ Status GPUOperationFromNode(const CreationContext& creation_context, broadcast.width = IsWidthBroadcastedForSecondInput(inputs); broadcast.height = IsHeightBroadcastedForSecondInput(inputs); broadcast.channels = IsChannelsBroadcastedForSecondInput(inputs); - ElementwiseTwoInput operation = - CreateElementwiseTwoInput(op_def, op_type, broadcast); + const auto attr = + absl::any_cast(node.operation.attributes); + ElementwiseTwoInput operation = CreateElementwiseTwoInput( + creation_context, op_def, op_type, broadcast, attr); *gpu_op = absl::make_unique(std::move(operation)); return OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index fc912f383ec..73d7e8821e8 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -389,6 +389,39 @@ Status CheckInputsOutputs(const TfLiteContext* context, return OkStatus(); } +// The function checks input tensors including 1 constant tensor. +Status CheckInputsOutputsAllowingOneConstInput(const TfLiteContext* context, + const TfLiteNode* tflite_node, + int inputs, int outputs) { + int number_of_const_inputs = 0; + int number_of_runtime_inputs = 0; + for (int i = 0; i < tflite_node->inputs->size; i++) { + if (IsConstantTensor(&context->tensors[tflite_node->inputs->data[i]])) { + number_of_const_inputs++; + } else { + number_of_runtime_inputs++; + } + } + if (tflite_node->inputs->size != inputs) { + return InternalError(absl::StrFormat( + "Expected %d input tensor(s), but node has %d input(s).", inputs, + tflite_node->inputs->size)); + } + if (number_of_const_inputs > 1) { + return InternalError(absl::StrFormat( + "Expected 1 const input tensor, but node has %d const input(s).", + number_of_const_inputs)); + } + int runtime_outputs = GetNumberOfRuntimeOutputsForNode(context, tflite_node); + if (runtime_outputs != outputs) { + return InternalError( + absl::StrFormat("Expected %d output tensor(s), but node has %d runtime " + "output(s).", + outputs, runtime_outputs)); + } + return OkStatus(); +} + // A parser responsible for parsing TFLite operation and adding it to a graph. class TFLiteOperationParser { public: @@ -642,6 +675,55 @@ Status ExtractTensorShape(const TfLiteTensor& tflite_tensor, BHWC* bhwc) { } } +Status ParseInputsWithConstTensor(Node* node, ObjectReader* reader, + TensorOrScalar* tensor_or_scalar) { + const std::string& opname = node->operation.type; + + // Determine runtime/constant tensors. + const TfLiteTensor* input0 = reader->GetInputTensor(0); + if (!input0) { + return InvalidArgumentError("Couldn't get the 1st input tensor for " + + opname); + } + const TfLiteTensor* input1 = reader->GetInputTensor(1); + if (!input1) { + return InvalidArgumentError("Couldn't get the 2nd input tensor for " + + opname); + } + const bool constant_tensor0 = IsConstantTensor(input0); + const bool constant_tensor1 = IsConstantTensor(input1); + if (constant_tensor0 && constant_tensor1) { + return InvalidArgumentError("No runtime input tensors for " + opname); + } + const bool runtime_tensor0 = !constant_tensor0; + const bool runtime_tensor1 = !constant_tensor1; + + if (runtime_tensor0 && runtime_tensor1) { + RETURN_IF_ERROR(reader->AddInput(node, 0)); + RETURN_IF_ERROR(reader->AddInput(node, 1)); + } else { + int runtime_tensor = 0; + int constant_tensor = 1; + TfLiteIntArray* constant_dims = input1->dims; + if (constant_tensor0 && runtime_tensor1) { + runtime_tensor = 1; + constant_tensor = 0; + constant_dims = input0->dims; + } + RETURN_IF_ERROR(reader->AddInput(node, runtime_tensor)); + if (constant_dims->size <= 0) { + Tensor tensor; + RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); + *tensor_or_scalar = tensor.data[0]; + } else { + Tensor tensor; + RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); + *tensor_or_scalar = std::move(tensor); + } + } + return OkStatus(); +} + class AddOperationParser : public TFLiteOperationParser { public: Status IsSupported(const TfLiteContext* context, @@ -663,51 +745,11 @@ class AddOperationParser : public TFLiteOperationParser { // considers 2 input cases. The underlying GPU shader programs can accept // more inputs, but the logic below would have to be expanded. - // Determine runtime/constant tensors. - const TfLiteTensor* input0 = reader->GetInputTensor(0); - if (!input0) { - return InvalidArgumentError("Couldn't get the 1st input tensor for ADD."); - } - const TfLiteTensor* input1 = reader->GetInputTensor(1); - if (!input1) { - return InvalidArgumentError("Couldn't get the 2nd input tensor for ADD."); - } - const bool constant_tensor0 = IsConstantTensor(input0); - const bool constant_tensor1 = IsConstantTensor(input1); - if (constant_tensor0 && constant_tensor1) { - return InvalidArgumentError("No runtime input tensors for ADD."); - } - const bool runtime_tensor0 = !constant_tensor0; - const bool runtime_tensor1 = !constant_tensor1; - Node* node = graph->NewNode(); node->operation.type = ToString(OperationType::ADD); RETURN_IF_ERROR(reader->AddOutputs(node)); - AddAttributes attr; - if (runtime_tensor0 && runtime_tensor1) { - RETURN_IF_ERROR(reader->AddInput(node, 0)); - RETURN_IF_ERROR(reader->AddInput(node, 1)); - } else { - int runtime_tensor = 0; - int constant_tensor = 1; - TfLiteIntArray* constant_dims = input1->dims; - if (constant_tensor0 && runtime_tensor1) { - runtime_tensor = 1; - constant_tensor = 0; - constant_dims = input0->dims; - } - RETURN_IF_ERROR(reader->AddInput(node, runtime_tensor)); - if (constant_dims->size <= 0) { - Tensor tensor; - RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); - attr.param = tensor.data[0]; - } else { - Tensor tensor; - RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); - attr.param = std::move(tensor); - } - } + RETURN_IF_ERROR(ParseInputsWithConstTensor(node, reader, &attr.param)); node->operation.attributes = std::move(attr); const auto* tf_options = reinterpret_cast(tflite_node->builtin_data); @@ -1053,6 +1095,11 @@ class ElementwiseOperationParser : public TFLiteOperationParser { } else if (IsTwoArgumentOperation()) { RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node, /*inputs=*/2, /*outputs=*/1)); + } else if (IsTwoArgumentOperationWithConst()) { + RETURN_IF_ERROR(CheckInputsOutputsAllowingOneConstInput(context, + tflite_node, + /*inputs=*/2, + /*outputs=*/1)); } else { return InvalidArgumentError("Op can only handle 1 or 2 operand(s)."); } @@ -1103,6 +1150,16 @@ class ElementwiseOperationParser : public TFLiteOperationParser { RETURN_IF_ERROR( MaybeFuseActivationToTheSingleOutput(activation, graph, node)); } + } else if (IsTwoArgumentOperationWithConst()) { + ElementwiseAttributes attr; + RETURN_IF_ERROR(ParseInputsWithConstTensor(node, reader, &attr.param)); + auto const_vector = + absl::get_if<::tflite::gpu::Tensor>( + &attr.param); + if (const_vector) { + return InvalidArgumentError("Constant vector is not supported"); + } + node->operation.attributes = std::move(attr); } else { return InvalidArgumentError("Incorrect operation type passed"); } @@ -1161,6 +1218,16 @@ class ElementwiseOperationParser : public TFLiteOperationParser { } } + bool IsTwoArgumentOperationWithConst() const { + switch (operation_type_) { + case OperationType::MINIMUM: + case OperationType::MAXIMUM: + return true; + default: + return false; + } + } + OperationType operation_type_; }; @@ -2547,10 +2614,16 @@ std::unique_ptr NewOperationParser( return absl::make_unique(OperationType::LOG); case kTfLiteBuiltinLstm: return absl::make_unique(); + case kTfLiteBuiltinMaximum: + return absl::make_unique( + OperationType::MAXIMUM); case kTfLiteBuiltinMaxPool2d: return absl::make_unique(PoolingType::MAX); case kTfLiteBuiltinMean: return absl::make_unique(); + case kTfLiteBuiltinMinimum: + return absl::make_unique( + OperationType::MINIMUM); case kTfLiteBuiltinMirrorPad: return absl::make_unique(/*mirror_pad=*/true); case kTfLiteBuiltinMul: diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index a4b3e2669a0..0d5c3429a49 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -98,10 +98,14 @@ std::string ToString(enum OperationType op) { return "log"; case OperationType::LSTM: return "lstm"; + case OperationType::MAXIMUM: + return "maximum"; case OperationType::MAX_UNPOOLING_2D: return "max_unpooling"; case OperationType::MEAN: return "mean"; + case OperationType::MINIMUM: + return "minimum"; case OperationType::MUL: return "mul"; case OperationType::PAD: @@ -165,8 +169,10 @@ OperationType OperationTypeFromString(const std::string& name) { {"hard_swish", OperationType::HARD_SWISH}, {"log", OperationType::LOG}, {"lstm", OperationType::LSTM}, + {"maximum", OperationType::MAXIMUM}, {"max_unpooling", OperationType::MAX_UNPOOLING_2D}, {"mean", OperationType::MEAN}, + {"minimum", OperationType::MINIMUM}, {"mul", OperationType::MUL}, {"pad", OperationType::PAD}, {"pooling_2d", OperationType::POOLING_2D}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index d58c82d4a26..87bb3ec383f 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -47,8 +47,10 @@ enum class OperationType { HARD_SWISH, LOG, LSTM, + MAXIMUM, MAX_UNPOOLING_2D, MEAN, + MINIMUM, MUL, PAD, POOLING_2D, @@ -75,6 +77,9 @@ std::string ToString(enum OperationType op); OperationType OperationTypeFromString(const std::string& name); +typedef absl::variant, float> + TensorOrScalar; + struct Padding2D { Padding2D() = default; Padding2D& operator=(const Padding2D& value); @@ -352,8 +357,7 @@ struct LstmAttributes { }; struct MultiplyAttributes { - absl::variant, float> - param; + TensorOrScalar param; }; enum class SamplingType { @@ -435,8 +439,7 @@ struct SliceAttributes { BHWC CalculateOutputShape(const BHWC& input, const SliceAttributes& attr); struct AddAttributes { - absl::variant, float> - param; + TensorOrScalar param; }; struct FullyConnectedAttributes { @@ -452,6 +455,10 @@ BHWC CalculateOutputShape(const BHWC& input, // @return shape of a tensor after Mean operation is applied to the given input. BHWC CalculateOutputShape(const BHWC& input, const MeanAttributes& attr); +struct ElementwiseAttributes { + TensorOrScalar param; +}; + struct ReshapeAttributes { BHWC new_shape; }; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc index 9215eac7602..7ba2dd871e7 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc @@ -139,6 +139,14 @@ class ElementwiseTwoArguments : public NodeShader { source = "value_0 /= value_1;"; break; } + case OperationType::MAXIMUM: { + source = "value_0 = max(value_0, value_1);"; + break; + } + case OperationType::MINIMUM: { + source = "value_0 = min(value_0, value_1);"; + break; + } case OperationType::POW: { // From documentation : // The result is undefined if x<0 or if x=0 and y≤0. @@ -167,6 +175,37 @@ class ElementwiseTwoArguments : public NodeShader { return OkStatus(); } + Status ImplementElementwiseWithScalar(const GenerationContext& ctx, + const float scalar, + GeneratedCode* generated_code) const { + std::string source; + switch (operation_type_) { + case OperationType::MAXIMUM: { + source = "value_0 = max(value_0, $scalar$);"; + break; + } + case OperationType::MINIMUM: { + source = "value_0 = min(value_0, $scalar$);"; + break; + } + + default: + return InvalidArgumentError( + "Incorrect elementwise with scalar operation type."); + } + *generated_code = { + /*parameters=*/{{"scalar", scalar}}, + /*objects=*/{}, + /*shared_variables=*/{}, + /*workload=*/uint3(), + /*workgroup=*/uint3(), + /*source_code=*/source, + /*input=*/IOStructure::AUTO, + /*output=*/IOStructure::AUTO, + }; + return OkStatus(); + } + bool IsSupportedBroadcast(const GenerationContext& ctx) const { auto inputs = ctx.graph->FindInputs(ctx.node->id); auto outputs = ctx.graph->FindOutputs(ctx.node->id); @@ -219,8 +258,15 @@ class ElementwiseTwoArguments : public NodeShader { if (IsSupportedBroadcast(ctx)) { return ImplementElementwiseBroadcast(ctx, generated_code); } + auto attr = + absl::any_cast(ctx.node->operation.attributes); + auto scalar = absl::get_if(&attr.param); + if (scalar) { + return ImplementElementwiseWithScalar(ctx, *scalar, generated_code); + } return InvalidArgumentError( - "This case is not supported by subtract operation"); + "This case is not supported by elementwise with two arguments " + "operation"); } private: @@ -244,6 +290,8 @@ std::unique_ptr NewElementwiseNodeShader( case OperationType::TANH: return absl::make_unique(operation_type); case OperationType::DIV: + case OperationType::MAXIMUM: + case OperationType::MINIMUM: case OperationType::POW: case OperationType::SQUARED_DIFF: case OperationType::SUB: diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc index 6743664f7e2..e597cc898e9 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc @@ -100,6 +100,64 @@ TEST(ElementwiseTest, Log) { Pointwise(FloatNear(1e-6), {0.0, 1.14473, 0.0, 0.0})); } +TEST(ElementwiseTest, Maximum) { + OperationType op_type = OperationType::MAXIMUM; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, -3.0})); + ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, -2.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {1.0, 2.0, 3.0, -2.0})); +} + +TEST(ElementwiseTest, MaximumWithScalar) { + OperationType op_type = OperationType::MAXIMUM; + const BHWC shape(1, 2, 2, 1); + ElementwiseAttributes attr; + attr.param = -1.0f; + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/std::move(attr)}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, -3.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, -1.0, 2.0, -1.0})); +} + +TEST(ElementwiseTest, Minimum) { + OperationType op_type = OperationType::MINIMUM; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape), GetTensorRef(1, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, -3.0})); + ASSERT_TRUE(model.PopulateTensor(1, {1.0, 2.0, 3.0, -2.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0, -6.2, 2.0, -3.0})); +} + +TEST(ElementwiseTest, MinimumWithScalar) { + OperationType op_type = OperationType::MINIMUM; + const BHWC shape(1, 2, 2, 1); + ElementwiseAttributes attr; + attr.param = -1.0f; + SingleOpModel model( + {/*type=*/ToString(op_type), /*attributes=*/std::move(attr)}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(2, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, -3.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {-1.0, -6.2, -1.0, -3.0})); +} + TEST(ElementwiseTest, Pow) { OperationType op_type = OperationType::POW; const BHWC shape(1, 2, 2, 1); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index 005aa7dfd38..924f7dbf1ec 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -96,6 +96,8 @@ class Registry : public NodeShader { insert_elementwise_op(Type::DIV); insert_elementwise_op(Type::HARD_SWISH); insert_elementwise_op(Type::LOG); + insert_elementwise_op(Type::MAXIMUM); + insert_elementwise_op(Type::MINIMUM); insert_elementwise_op(Type::POW); insert_elementwise_op(Type::RSQRT); insert_elementwise_op(Type::SIGMOID); diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 8cf7e34a523..b7179cb98f5 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -266,10 +266,12 @@ Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, case OperationType::TANH: *tasks = ElementwiseWithOneInput(node_id, inputs[0], outputs[0], op_type); break; - case OperationType::SUB: case OperationType::DIV: + case OperationType::MAXIMUM: + case OperationType::MINIMUM: case OperationType::POW: case OperationType::SQUARED_DIFF: + case OperationType::SUB: *tasks = ElementwiseWithTwoInputs(node_id, inputs, outputs[0], op_type); break; case OperationType::BATCH_NORMALIZATION: