From 107a35a5519d183b04dce7ca3877a1c29e62c42f Mon Sep 17 00:00:00 2001 From: Juhyun Lee Date: Wed, 3 Jul 2019 13:53:48 -0700 Subject: [PATCH] TFLite GPU: Make GPU delegate recognize MobileNet v3. PiperOrigin-RevId: 256432626 --- .../delegates/gpu/common/model_builder.cc | 239 ++++++++++++------ .../lite/delegates/gpu/gl/kernels/BUILD | 2 +- .../lite/delegates/gpu/gl/kernels/mul.cc | 59 ++--- 3 files changed, 189 insertions(+), 111 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 17cf8febdc0..2bbbef73b5e 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -263,24 +263,22 @@ class ObjectReader { tflite_node_(tflite_node), tensor_to_value_(tensor_to_value) {} - Status ReadValue(uint32_t idx, Value>** value) { + Status ReadValue(uint32_t idx, Value>** value) const { if (idx >= tflite_node_->inputs->size) { return OutOfRangeError(StrCat("ReadValue: input tensor index: ", idx)); } - RETURN_IF_ERROR( - ReadValueByTensorIdx(tflite_node_->inputs->data[idx], value)); - return OkStatus(); + return ReadValueByTensorIdx(tflite_node_->inputs->data[idx], value); } - int GetNumberOfRuntimeInputs() { + int GetNumberOfRuntimeInputs() const { return GetNumberOfRuntimeInputsForNode(context_, tflite_node_); } - Status GetTensorDims(uint32_t idx, TfLiteIntArray* dimensions) { + Status GetTensorDims(uint32_t idx, TfLiteIntArray* dimensions) const { if (idx >= tflite_node_->inputs->size) { return OutOfRangeError(StrCat("Input tensor index: ", idx)); } - int32_t tensor_idx = tflite_node_->inputs->data[idx]; + const int tensor_idx = tflite_node_->inputs->data[idx]; if (tensor_idx < 0 || tensor_idx > context_->tensors_size) { return OutOfRangeError(StrCat("Tensor index: ", tensor_idx)); } @@ -330,7 +328,7 @@ class ObjectReader { } Status ReadValueByTensorIdx(uint32_t tensor_idx, - Value>** value) { + Value>** value) const { if (tensor_idx >= tensor_to_value_->size()) { return OutOfRangeError( StrCat("ReadValue: input tensor index: ", tensor_idx)); @@ -351,6 +349,12 @@ class ObjectReader { return OkStatus(); } + TfLiteTensor* GetInputTensor(int index) const { + return index >= 0 && index < tflite_node_->inputs->size + ? context_->tensors + tflite_node_->inputs->data[index] + : nullptr; + } + private: GraphFloat32* graph_ = nullptr; const TfLiteContext* context_ = nullptr; @@ -1019,37 +1023,64 @@ class AddOperationParser : public TFLiteOperationParser { const TfLiteNode* tflite_node, const TfLiteRegistration* registration) final { RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 1)); - // TODO(eignasheva): add shapes check. + if (tflite_node->inputs->size != 2) { + return UnimplementedError("ADD requires two input tensors."); + } + // TODO(eignasheva): Add shapes check. TfLiteAddParams* tf_options = nullptr; - RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - return OkStatus(); + return RetrieveBuiltinData(tflite_node, &tf_options); } + Status Parse(const TfLiteNode* tflite_node, const TfLiteRegistration* registration, GraphFloat32* graph, ObjectReader* reader) final { + // TFLite currently only supports 2 input ADDs. Thus, the logic below only + // considers 2 input cases. The underlying GPU shader programs can accept + // more inputs, but the logic below would have to be expanded. + + // Determine runtime/constant tensors. + const TfLiteTensor* input0 = reader->GetInputTensor(0); + if (!input0) { + return InvalidArgumentError("Couldn't get the 1st input tensor for ADD."); + } + const TfLiteTensor* input1 = reader->GetInputTensor(1); + if (!input1) { + return InvalidArgumentError("Couldn't get the 2nd input tensor for ADD."); + } + const bool constant_tensor0 = IsConstantTensor(input0); + const bool constant_tensor1 = IsConstantTensor(input1); + if (constant_tensor0 && constant_tensor1) { + return InvalidArgumentError("No runtime input tensors for ADD."); + } + const bool runtime_tensor0 = !constant_tensor0; + const bool runtime_tensor1 = !constant_tensor1; + Node* node = graph->NewNode(); node->operation.type = ToString(OperationType::ADD); RETURN_IF_ERROR(reader->AddOutputs(node)); AddAttributes attr; - for (int idx = 0; idx < tflite_node->inputs->size; ++idx) { - if (!reader->AddInput(node, idx).ok()) { - if (tflite_node->inputs->size != 2) { - return InvalidArgumentError( - "Broadcast Add should accept 2 inputs, one input tensor and " - "broadcasted tensor"); - } - TfLiteIntArray dims; - RETURN_IF_ERROR(reader->GetTensorDims(1, &dims)); - if (dims.size <= 0) { - Tensor tensor; - RETURN_IF_ERROR(reader->ReadTensor(1, &tensor)); - attr.param = tensor.data[0]; - } else { - Tensor tensor; - RETURN_IF_ERROR(reader->ReadTensor(1, &tensor)); - attr.param = std::move(tensor); - } + if (runtime_tensor0 && runtime_tensor1) { + RETURN_IF_ERROR(reader->AddInput(node, 0)); + RETURN_IF_ERROR(reader->AddInput(node, 1)); + } else { + int runtime_tensor = 0; + int constant_tensor = 1; + TfLiteIntArray* constant_dims = input1->dims; + if (constant_tensor0 && runtime_tensor1) { + runtime_tensor = 1; + constant_tensor = 0; + constant_dims = input0->dims; + } + RETURN_IF_ERROR(reader->AddInput(node, runtime_tensor)); + if (constant_dims->size <= 0) { + Tensor tensor; + RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); + attr.param = tensor.data[0]; + } else { + Tensor tensor; + RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); + attr.param = std::move(tensor); } } node->operation.attributes = std::move(attr); @@ -1059,9 +1090,8 @@ class AddOperationParser : public TFLiteOperationParser { if (!tf_options) { return InternalError("Missing tflite params"); } - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); - return OkStatus(); + return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph, + node); } }; @@ -1427,41 +1457,119 @@ class ReLuOperationParser : public TFLiteOperationParser { int clip_; }; +Status ExtractTensorShape(const TfLiteTensor& tflite_tensor, BHWC* bhwc) { + const TfLiteIntArray* dims = tflite_tensor.dims; + switch (dims->size) { + case 1: + *bhwc = BHWC(dims->data[0], 1, 1, 1); + return OkStatus(); + case 2: + *bhwc = BHWC(dims->data[0], 1, 1, dims->data[1]); + return OkStatus(); + case 3: + *bhwc = BHWC(dims->data[0], 1, dims->data[1], dims->data[2]); + return OkStatus(); + case 4: + *bhwc = BHWC(dims->data[0], dims->data[1], dims->data[2], dims->data[3]); + return OkStatus(); + default: + return InvalidArgumentError( + absl::StrCat("Tensor \"", tflite_tensor.name, + "\" has bad input dims size: ", dims->size, ".")); + } +} + class MulOperationParser : public TFLiteOperationParser { public: Status IsSupported(const TfLiteContext* context, const TfLiteNode* tflite_node, const TfLiteRegistration* registration) final { RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 1)); - // TODO(eignasheva): add params check + if (tflite_node->inputs->size != 2) { + return UnimplementedError("MUL requires two input tensors."); + } + // TODO(eignasheva): Add params check. return OkStatus(); } + Status Parse(const TfLiteNode* tflite_node, const TfLiteRegistration* registration, GraphFloat32* graph, ObjectReader* reader) final { - Node* node = graph->NewNode(); - if (reader->GetNumberOfRuntimeInputs() == 2) { - // ApplyMask operation - node->operation.type = ToString(OperationType::APPLY_MASK); - RETURN_IF_ERROR(reader->AddInput(node, 0)); - RETURN_IF_ERROR(reader->AddInput(node, 1)); - } else { - node->operation.type = ToString(OperationType::MULTIPLY_SCALAR); - RETURN_IF_ERROR(reader->AddInput(node, 0)); - MultiplyScalarAttributes attr; - TfLiteIntArray dims; - RETURN_IF_ERROR(reader->GetTensorDims(1, &dims)); - if (dims.size <= 0) { - Tensor tensor; - RETURN_IF_ERROR(reader->ReadTensor(1, &tensor)); - attr.param = tensor.data[0]; - } else { - Tensor tensor; - RETURN_IF_ERROR(reader->ReadTensor(1, &tensor)); - attr.param = std::move(tensor); - } - node->operation.attributes = std::move(attr); + // Determine runtime/constant tensors. + const TfLiteTensor* input0 = reader->GetInputTensor(0); + if (!input0) { + return InvalidArgumentError("Couldn't get the 1st input tensor for MUL."); } + const TfLiteTensor* input1 = reader->GetInputTensor(1); + if (!input1) { + return InvalidArgumentError("Couldn't get the 2nd input tensor for MUL."); + } + const bool constant_tensor0 = IsConstantTensor(input0); + const bool constant_tensor1 = IsConstantTensor(input1); + if (constant_tensor0 && constant_tensor1) { + return InvalidArgumentError("No runtime input tensors for MUL."); + } + const bool runtime_tensor0 = !constant_tensor0; + const bool runtime_tensor1 = !constant_tensor1; + + // Parse for APPLY_MASK. The "larger" input tensor must be bound to 1st + // input and the "smaller" input tensor ("mask") must be bound to 2nd input. + if (runtime_tensor0 && runtime_tensor1) { + BHWC shape0; + RETURN_IF_ERROR(ExtractTensorShape(*input0, &shape0)); + BHWC shape1; + RETURN_IF_ERROR(ExtractTensorShape(*input1, &shape1)); + int input_tensor0 = 0; + int input_tensor1 = 1; + if (shape0.h <= shape1.h && shape0.w <= shape1.w && + shape0.c == shape1.c) { + input_tensor0 = 1; + input_tensor1 = 0; + } + return ParseApplyMask(input_tensor0, input_tensor1, graph, reader); + } + + // Parse for MULTIPLY_SCALAR. The runtime input tensor must be bound to 1st + // input and the constant input tensor must be bound to 2nd input. + int runtime_tensor = 0; + int constant_tensor = 1; + TfLiteIntArray* constant_dims = input1->dims; + if (constant_tensor0 && runtime_tensor1) { + runtime_tensor = 1; + constant_tensor = 0; + constant_dims = input0->dims; + } + return ParseMultiplyScalar(runtime_tensor, constant_tensor, constant_dims, + graph, reader); + } + + private: + Status ParseApplyMask(int input_tensor0, int input_tensor1, + GraphFloat32* graph, ObjectReader* reader) { + Node* node = graph->NewNode(); + node->operation.type = ToString(OperationType::APPLY_MASK); + RETURN_IF_ERROR(reader->AddInput(node, input_tensor0)); + RETURN_IF_ERROR(reader->AddInput(node, input_tensor1)); + return reader->AddOutputs(node); + } + + Status ParseMultiplyScalar(int runtime_tensor, int constant_tensor, + const TfLiteIntArray* constant_dims, + GraphFloat32* graph, ObjectReader* reader) { + Node* node = graph->NewNode(); + node->operation.type = ToString(OperationType::MULTIPLY_SCALAR); + RETURN_IF_ERROR(reader->AddInput(node, runtime_tensor)); + MultiplyScalarAttributes attr; + if (constant_dims->size <= 0) { + Tensor tensor; + RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); + attr.param = tensor.data[0]; + } else { + Tensor tensor; + RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); + attr.param = std::move(tensor); + } + node->operation.attributes = std::move(attr); return reader->AddOutputs(node); } }; @@ -1963,26 +2071,7 @@ std::unique_ptr NewOperationParser( Status ConvertTfLiteTensorToTensorRef(const TfLiteTensor& tflite_tensor, TensorRef* tensor_ref) { tensor_ref->type = ToDataType(tflite_tensor.type); - const TfLiteIntArray* dims = tflite_tensor.dims; - switch (dims->size) { - case 1: - tensor_ref->shape = BHWC(dims->data[0], 1, 1, 1); - break; - case 2: - tensor_ref->shape = BHWC(dims->data[0], 1, 1, dims->data[1]); - break; - case 3: - tensor_ref->shape = BHWC(dims->data[0], 1, dims->data[1], dims->data[2]); - break; - case 4: - tensor_ref->shape = - BHWC(dims->data[0], dims->data[1], dims->data[2], dims->data[3]); - break; - default: - return InvalidArgumentError(StrCat( - "Tensor ref has unsupported number of dimensions: ", dims->size)); - } - return OkStatus(); + return ExtractTensorShape(tflite_tensor, &tensor_ref->shape); } Status IsSupported(const TfLiteContext* context, TfLiteNode* node, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index e07c3126284..97b96129a15 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -286,12 +286,12 @@ cc_library( srcs = ["mul.cc"], hdrs = ["mul.h"], deps = [ - "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc index eb94013937b..f57eaa70578 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" @@ -33,34 +34,21 @@ namespace { class ApplyMask : public NodeShader { public: static bool IsSupported(const GenerationContext& ctx) { - auto inputs = ctx.graph->FindInputs(ctx.node->id); + const auto inputs = ctx.graph->FindInputs(ctx.node->id); + if (inputs.size() != 2) return false; + const auto& shape0 = inputs[0]->tensor.shape; + const auto& shape1 = inputs[1]->tensor.shape; - // Implementation requires 2 input tensors: source and mask. - if (inputs.size() != 2) { - return false; - } + // [H, W, C] x [H, W, 0][0] + if (shape1.c == 1) return true; - auto src_shape = inputs[0]->tensor.shape; - auto mask_shape = inputs[1]->tensor.shape; + if (shape0.c != shape1.c) return false; - // Height and width dimensions of the two input tensors must be the same. - if (src_shape.h != mask_shape.h || src_shape.w != mask_shape.w) { - return false; - } + // [H, W, C] x [H, W, C] + if (shape0.h == shape1.h && shape0.w == shape1.w) return true; - // Broadcast will be done if mask tensor has 1 channel. - if (mask_shape.c == 1) { - return true; - } - - // Bitwise multiplication will be done if mask tensor has the same amount of - // channels as source tensor. - if (src_shape.c == mask_shape.c) { - return true; - } - - // Other cases are not supported. - return false; + // [H, W, C] x [0, 0, C] + return shape1.h == 1 && shape1.w == 1; } Status GenerateCode(const GenerationContext& ctx, @@ -69,19 +57,20 @@ class ApplyMask : public NodeShader { return InvalidArgumentError( "This case is not supported by apply mask operation"); } - auto inputs = ctx.graph->FindInputs(ctx.node->id); + const auto inputs = ctx.graph->FindInputs(ctx.node->id); + const auto& shape0 = inputs[0]->tensor.shape; + const auto& shape1 = inputs[1]->tensor.shape; - std::string source; - if (inputs[1]->tensor.shape.c == 1) { - // Broadcast case, mask channels size == 1. - source = - "value_0 = $input_data_0[gid.x, gid.y, gid.z]$ * " - "$input_data_1[gid.x, gid.y, 0]$.x;"; + std::string source = "value_0 = $input_data_0[gid.x, gid.y, gid.z]$ * "; + if (shape1.c == 1) { + // [H, W, C] x [H, W, 0][0] + absl::StrAppend(&source, "$input_data_1[gid.x, gid.y, 0]$.x;"); + } else if (shape0.h == shape1.h && shape0.w == shape1.w) { + // [H, W, C] x [H, W, C] + absl::StrAppend(&source, "$input_data_1[gid.x, gid.y, gid.z]$;"); } else { - // Bitwise multiplication case, src channels size == mask channels size. - source = - "value_0 = $input_data_0[gid.x, gid.y, gid.z]$ * " - "$input_data_1[gid.x, gid.y, 0]$;"; + // [H, W, C] x [0, 0, C] + absl::StrAppend(&source, "$input_data_1[0, 0, gid.z]$;"); } *generated_code = {