diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD index fe7f69f24db..8a010660e48 100644 --- a/tensorflow/lite/delegates/gpu/metal/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/BUILD @@ -35,7 +35,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common:winograd_util", "//tensorflow/lite/delegates/gpu/metal/kernels", - "//tensorflow/lite/delegates/gpu/metal/kernels:custom_registry", + "//tensorflow/lite/delegates/gpu/metal/selectors:operation_selector", "//tensorflow/lite/delegates/gpu/metal/selectors:subgraph", ], ) diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 9134509e051..02ee339a8a8 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -28,500 +28,12 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/winograd_util.h" #include "tensorflow/lite/delegates/gpu/metal/compiled_model.h" #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/add.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/concat.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/conv.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/pooling.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/prelu.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/relu.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/reshape.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/resize.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/slice.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/softmax.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h" -#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h" +#include "tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h" #include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h" namespace tflite { namespace gpu { namespace metal { -namespace { - -std::unique_ptr SelectDepthWiseConv( - const OperationDef& op_def, const DepthwiseConvolution2DAttributes& attr) { - if (CheckDepthWiseConv3x3Stride1x1Support(attr)) { - auto gpu_op = DepthWiseConv3x3Stride1x1(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } else if (CheckDepthWiseConv3x3Stride2Support(attr)) { - auto gpu_op = DepthWiseConv3x3Stride2(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } else { - auto gpu_op = DepthWiseConvolution(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } -} - -std::unique_ptr SelectConvolutionTransposed( - const OperationDef& op_def, const ConvolutionTransposedAttributes& attr, - const GpuInfo& gpu_info) { - if (CheckConvolutionTransposed4x4Support(attr)) { - auto gpu_op = ConvolutionTransposed4x4(op_def, attr, gpu_info); - return absl::make_unique(std::move(gpu_op)); - } else { - auto gpu_op = ConvolutionTransposed(op_def, attr, gpu_info); - return absl::make_unique(std::move(gpu_op)); - } -} - -std::unique_ptr SelectQuantizeAndDequantize( - const OperationDef& op_def, const QuantizeAndDequantizeAttributes& attr) { - auto gpu_op = QuantizeAndDequantize(op_def, attr); - return absl::make_unique(std::move(gpu_op)); -} - -std::unique_ptr SelectPReLU( - const OperationDef& op_def, const BHWC& src_shape, - const PReLUAttributes& attr) { - auto alpha = absl::get_if>(&attr.alpha); - if (alpha) { - auto gpu_op = PReLU(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } - auto alpha3d = absl::get_if>(&attr.alpha); - if (!alpha3d) { - return {}; - } - if (alpha3d->shape.h != src_shape.h || alpha3d->shape.w != src_shape.w || - alpha3d->shape.c != src_shape.c) { - return {}; - } - auto gpu_op = PReLUFull(op_def, attr); - return absl::make_unique(std::move(gpu_op)); -} - -std::unique_ptr SelectReshape( - const OperationDef& op_def, const BHWC& src_shape, - const ReshapeAttributes& attr) { - if (src_shape.c % 4 == 0 && attr.new_shape.c % 4 == 0) { - auto gpu_op = Reshapex4(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } else { - auto gpu_op = Reshape(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } -} - -std::unique_ptr SelectSoftmax(const OperationDef& op_def, - const BHWC& src_shape, - const GpuInfo& gpu_info) { - if (src_shape.w == 1 && src_shape.h == 1) { - auto gpu_op = Softmax1x1(op_def, gpu_info); - return absl::make_unique(std::move(gpu_op)); - } else { - auto gpu_op = Softmax(op_def); - return absl::make_unique(std::move(gpu_op)); - } -} - -std::unique_ptr SelectSpaceToDepth( - const OperationDef& op_def, const SpaceToDepthAttributes& attr) { - auto gpu_op = SpaceToDepth(op_def, attr); - return absl::make_unique(std::move(gpu_op)); -} - -std::unique_ptr SelectWinograd4x4To36( - const OperationDef& op_def, const Winograd4x4To36Attributes& attr, - const GpuInfo& gpu_info) { - if (gpu_info.IsApple()) { - auto gpu_op = Winograd4x4To36(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } else { - auto gpu_op = Winograd4x4To36TileX6(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } -} - -std::unique_ptr SelectWinograd36To4x4( - const OperationDef& op_def, const Winograd36To4x4Attributes& attr, - const GpuInfo& gpu_info) { - if (gpu_info.IsApple()) { - auto gpu_op = Winograd36To4x4(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } else { - auto gpu_op = Winograd36To4x4Tile4x1(op_def, attr); - return absl::make_unique(std::move(gpu_op)); - } -} - -bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr, - const GpuInfo& gpu_info, - const BHWC& dst_shape) { - const int tiles_x = DivideRoundUp(dst_shape.w, 4); - const int tiles_y = DivideRoundUp(dst_shape.h, 4); - const int total_tiles = tiles_x * tiles_y; - const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - int min_depth = 16; - const int min_tiles = 32; - if (total_tiles >= min_tiles * 8) { - min_depth /= 4; - min_depth = std::max(min_depth, 8); - } else if (total_tiles >= min_tiles * 4) { - min_depth /= 2; - min_depth = std::max(min_depth, 8); - } - const bool recommended_channels = - src_depth >= min_depth && dst_depth >= min_depth; - const bool recommended_hw = total_tiles >= min_tiles; - return recommended_channels && recommended_hw; -} - -absl::Status WinogradFromNode(const GpuInfo& gpu_info, - const std::vector& inputs, - const std::vector& outputs, - const OperationDef& op_def, - const BHWC& input_shape, const BHWC& output_shape, - const Convolution2DAttributes& attr, - GPUOperationsSubgraph* gpu_subgraph) { - if (!IsSuitableForWinograd4x4To6x6(attr)) { - return absl::UnimplementedError("No implementation for this case."); - } - if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) { - return absl::UnimplementedError("Not recommended for this case."); - } - - const int tiles_x = DivideRoundUp(output_shape.w, 4); - const int tiles_y = DivideRoundUp(output_shape.h, 4); - const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c}; - const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c}; - TensorDescriptor tensor_desc = op_def.src_tensors[0]; - gpu_subgraph->new_tensors = {{shape_0, tensor_desc}, {shape_1, tensor_desc}}; - gpu_subgraph->operations.clear(); - gpu_subgraph->operations.resize(3); - - OperationDef winograd_up_def; - winograd_up_def.precision = op_def.precision; - winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]); - winograd_up_def.dst_tensors.push_back(op_def.src_tensors[0]); - auto& winograd_up = gpu_subgraph->operations[0]; - Winograd4x4To36Attributes wino_up_attr; - wino_up_attr.padding = attr.padding; - winograd_up.operation = - SelectWinograd4x4To36(winograd_up_def, wino_up_attr, gpu_info); - winograd_up.input_ids = {static_cast(inputs[0]->id)}; - winograd_up.output_ids = {-1}; - - OperationDef conv_def; - conv_def.precision = op_def.precision; - conv_def.src_tensors.push_back(op_def.src_tensors[0]); - conv_def.dst_tensors.push_back(op_def.src_tensors[0]); - auto& conv = gpu_subgraph->operations[1]; - conv.input_ids = {-1}; - conv.output_ids = {-2}; - auto gpu_op = ConvolutionWino4x4To6x6(conv_def, shape_1, attr, gpu_info); - conv.operation = absl::make_unique(std::move(gpu_op)); - OperationDef winograd_down_def; - winograd_down_def.precision = op_def.precision; - winograd_down_def.src_tensors.push_back(op_def.src_tensors[0]); - winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]); - auto& winograd_down = gpu_subgraph->operations[2]; - winograd_down.input_ids = {-2}; - winograd_down.output_ids = {static_cast(outputs[0]->id)}; - Winograd36To4x4Attributes wino_down_attr; - wino_down_attr.output_shape = outputs[0]->tensor.shape; - wino_down_attr.biases = attr.bias; - winograd_down.operation = - SelectWinograd36To4x4(winograd_down_def, wino_down_attr, gpu_info); - return absl::OkStatus(); -} - -absl::Status GPUOperationFromNode(const GpuInfo& gpu_info, - const OperationDef& op_def, - const std::vector& inputs, - const std::vector& outputs, - const Node& node, - GPUOperationsSubgraph* gpu_subgraph) { - std::unique_ptr* task = - InitSingleOpSubgraph(inputs, outputs, gpu_subgraph); - auto op_type = OperationTypeFromString(node.operation.type); - switch (op_type) { - case OperationType::ADD: { - if (inputs.size() == 1) { - if (node.operation.attributes.has_value()) { - auto attr = - absl::any_cast(node.operation.attributes); - auto gpu_op = ElementwiseWithOneInputAndConstantArguent( - op_def, op_type, attr.param); - *task = absl::make_unique(std::move(gpu_op)); - } else { - return absl::UnimplementedError( - "Missing attributes for single input op: " + node.operation.type); - } - } else if (inputs.size() == 2) { - auto gpu_op = - ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type); - *task = absl::make_unique(std::move(gpu_op)); - } else { // more than 2 inputs - auto gpu_op = Add(op_def); - *task = absl::make_unique(std::move(gpu_op)); - } - break; - } - case OperationType::CONCAT: { - std::vector input_shapes; - for (auto& input : inputs) { - input_shapes.push_back(input->tensor.shape); - } - auto gpu_op = Concat( - op_def, absl::any_cast(node.operation.attributes), - input_shapes); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::CONVOLUTION_2D: { - if (inputs.size() != 1) { - return absl::UnimplementedError( - "Convolution does not support more than 1 runtime tensor"); - } - auto attr = - absl::any_cast(node.operation.attributes); - auto input_shape = inputs[0]->tensor.shape; - auto output_shape = outputs[0]->tensor.shape; - if (WinogradFromNode(gpu_info, inputs, outputs, op_def, input_shape, - output_shape, attr, gpu_subgraph) - .ok()) { - return absl::OkStatus(); - } else { - auto gpu_op = ConvolutionGeneric(op_def, output_shape, attr, gpu_info); - *task = absl::make_unique(std::move(gpu_op)); - } - break; - } - case OperationType::CONVOLUTION_TRANSPOSED: - if (inputs.size() != 1) { - return absl::UnimplementedError( - "Convolution Transposed does not support more than 1 runtime " - "tensor"); - } - *task = SelectConvolutionTransposed( - op_def, - absl::any_cast( - node.operation.attributes), - gpu_info); - break; - case OperationType::DEPTHWISE_CONVOLUTION: - if (inputs.size() != 1) { - return absl::UnimplementedError( - "DepthWise Convolution does not support more than 1 runtime " - "tensor"); - } - *task = SelectDepthWiseConv( - op_def, absl::any_cast( - node.operation.attributes)); - break; - case OperationType::FULLY_CONNECTED: { - auto gpu_op = FullyConnected( - op_def, - absl::any_cast(node.operation.attributes), - gpu_info); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::MAX_UNPOOLING_2D: { - auto gpu_op = MaxUnpooling( - op_def, - absl::any_cast(node.operation.attributes)); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::MEAN: { - auto attr = absl::any_cast(node.operation.attributes); - if (attr.dims != std::set({Axis::HEIGHT, Axis::WIDTH})) { - return absl::UnimplementedError("Mean supports HW axis only in Metal"); - } - auto gpu_op = Mean(op_def, attr); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::MUL: - if (inputs.size() == 1) { - if (node.operation.attributes.has_value()) { - auto attr = - absl::any_cast(node.operation.attributes); - auto gpu_op = ElementwiseWithOneInputAndConstantArguent( - op_def, op_type, attr.param); - *task = absl::make_unique(std::move(gpu_op)); - } else { - return absl::UnimplementedError( - "Missing attributes for single input op: " + node.operation.type); - } - } else if (inputs.size() == 2) { - auto gpu_op = - ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type); - *task = absl::make_unique(std::move(gpu_op)); - } - break; - case OperationType::PAD: { - auto attr = absl::any_cast(node.operation.attributes); - if (attr.appended.b != 0 || attr.prepended.b != 0) { - return absl::UnimplementedError("Padding for BATCH is not supported."); - } - auto gpu_op = Padding(op_def, attr); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::POOLING_2D: { - auto attr = - absl::any_cast(node.operation.attributes); - auto pooling_op_def = op_def; - pooling_op_def.dst_tensors = {op_def.dst_tensors[0]}; - auto gpu_op = Pooling(op_def, attr, false); - gpu_subgraph->operations[0].operation = - absl::make_unique(std::move(gpu_op)); - gpu_subgraph->operations[0].input_ids = {static_cast(inputs[0]->id)}; - gpu_subgraph->operations[0].output_ids = { - static_cast(outputs[0]->id)}; - if (attr.type == PoolingType::MAX && attr.output_indices) { - gpu_subgraph->operations.push_back({}); - auto gpu_ind_op = Pooling(op_def, attr, true); - gpu_subgraph->operations[1].operation = - absl::make_unique(std::move(gpu_ind_op)); - gpu_subgraph->operations[1].input_ids = { - static_cast(inputs[0]->id)}; - gpu_subgraph->operations[1].output_ids = { - static_cast(outputs[1]->id)}; - } - break; - } - case OperationType::PRELU: { - const auto src_shape = inputs[0]->tensor.shape; - *task = SelectPReLU( - op_def, src_shape, - absl::any_cast(node.operation.attributes)); - break; - } - case OperationType::RELU: { - auto gpu_op = ReLU( - op_def, absl::any_cast(node.operation.attributes)); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::QUANTIZE_AND_DEQUANTIZE: - *task = SelectQuantizeAndDequantize( - op_def, absl::any_cast( - node.operation.attributes)); - break; - case OperationType::RESHAPE: { - const auto src_shape = inputs[0]->tensor.shape; - *task = SelectReshape( - op_def, src_shape, - absl::any_cast(node.operation.attributes)); - break; - } - case OperationType::RESIZE: { - auto gpu_op = - Resize(op_def, - absl::any_cast(node.operation.attributes)); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::SLICE: { - auto gpu_op = Slice( - op_def, absl::any_cast(node.operation.attributes)); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::SOFTMAX: { - auto attr = absl::any_cast(node.operation.attributes); - if (attr.axis != Axis::CHANNELS) { - return absl::UnimplementedError( - "Softmax supports only CHANNELS dimension"); - } - const auto src_shape = inputs[0]->tensor.shape; - *task = SelectSoftmax(op_def, src_shape, gpu_info); - break; - } - case OperationType::SPACE_TO_DEPTH: - *task = SelectSpaceToDepth(op_def, absl::any_cast( - node.operation.attributes)); - break; - case OperationType::ABS: - case OperationType::COPY: - case OperationType::COS: - case OperationType::ELU: - case OperationType::EXP: - case OperationType::HARD_SWISH: - case OperationType::LOG: - case OperationType::NEG: - case OperationType::RSQRT: - case OperationType::SIGMOID: - case OperationType::SIN: - case OperationType::SQRT: - case OperationType::SQUARE: - case OperationType::TANH: { - auto gpu_op = ElementwiseWithOneInput(op_def, op_type); - *task = absl::make_unique(std::move(gpu_op)); - break; - } - case OperationType::DIV: - case OperationType::MAXIMUM: - case OperationType::MINIMUM: - case OperationType::POW: - case OperationType::SQUARED_DIFF: - case OperationType::SUB: { - if (inputs.size() == 1) { - if (node.operation.attributes.has_value()) { - auto attr = - absl::any_cast(node.operation.attributes); - auto gpu_op = ElementwiseWithOneInputAndConstantArguent( - op_def, op_type, attr.param); - *task = absl::make_unique(std::move(gpu_op)); - } else { - return absl::UnimplementedError( - "Missing attributes for single input op: " + node.operation.type); - } - } else if (inputs.size() == 2) { - auto gpu_op = - ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type); - *task = absl::make_unique(std::move(gpu_op)); - } - } break; - case OperationType::BATCH_NORMALIZATION: - case OperationType::BATCH_TO_SPACE: - case OperationType::BATCHED_MATMUL: - case OperationType::CONST: - case OperationType::LSTM: - // TODO(b/162763635): implement MeanStddevNormalization for Metal. - case OperationType::MEAN_STDDEV_NORMALIZATION: - case OperationType::REDUCE_MAXIMUM: - case OperationType::REDUCE_MINIMUM: - case OperationType::REDUCE_PRODUCT: - case OperationType::REDUCE_SUM: - // comparison operations - case OperationType::LESS: - case OperationType::LESS_EQUAL: - case OperationType::EQUAL: - case OperationType::NOT_EQUAL: - case OperationType::GREATER: - case OperationType::GREATER_EQUAL: - case OperationType::SPACE_TO_BATCH: - case OperationType::TRANSPOSE: - case OperationType::UNKNOWN: - return absl::UnimplementedError("Unsupported op: " + node.operation.type); - } - return absl::OkStatus(); -} - -} // namespace absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info, CalculationsPrecision precision, @@ -537,81 +49,54 @@ absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info, } int node_linear_id = 0; for (const auto& node : graph.nodes()) { - std::vector inputs; - for (auto& input : graph.FindInputs(node->id)) { - inputs.push_back(static_cast(input->id)); + auto inputs = graph.FindInputs(node->id); + auto outputs = graph.FindOutputs(node->id); + DataType data_type = DeduceDataTypeFromPrecision(precision); + TensorDescriptor tensor_descriptor = + TensorDescriptor{data_type, TensorStorageType::BUFFER, Layout::HWC}; + OperationDef op_def; + op_def.precision = precision; + for (int j = 0; j < inputs.size(); ++j) { + op_def.src_tensors.push_back(tensor_descriptor); } - std::vector outputs; - for (auto& output : graph.FindOutputs(node->id)) { - outputs.push_back(static_cast(output->id)); + for (int j = 0; j < outputs.size(); ++j) { + op_def.dst_tensors.push_back(tensor_descriptor); } - std::vector node_descs; - std::vector custom_tasks; - auto custom_status = RegisterCustomOps(graph, node, inputs, outputs, - precision, &custom_tasks); - if (!custom_status.ok()) { - auto inputs = graph.FindInputs(node->id); - auto outputs = graph.FindOutputs(node->id); - DataType data_type = DeduceDataTypeFromPrecision(precision); - TensorDescriptor tensor_descriptor = - TensorDescriptor{data_type, TensorStorageType::BUFFER, Layout::HWC}; - OperationDef op_def; - op_def.precision = precision; - for (int j = 0; j < inputs.size(); ++j) { - op_def.src_tensors.push_back(tensor_descriptor); - } - for (int j = 0; j < outputs.size(); ++j) { - op_def.dst_tensors.push_back(tensor_descriptor); - } - GPUOperationsSubgraph gpu_subgraph; - RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, inputs, outputs, - *node, &gpu_subgraph)); - std::map mapping_to_global_ids; - for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) { - const auto& t = gpu_subgraph.new_tensors[j]; - last_value_id++; - compiled_model->tensor_shapes[last_value_id] = t.first; - mapping_to_global_ids[j] = last_value_id; - } - for (auto& gpu_op : gpu_subgraph.operations) { - NodeDescriptor metal_node; - metal_node.task = std::move(gpu_op.operation); - metal_node.src_tensors_ids.resize(gpu_op.input_ids.size()); - for (int j = 0; j < gpu_op.input_ids.size(); ++j) { - int id = gpu_op.input_ids[j]; - if (id >= 0) { - metal_node.src_tensors_ids[j] = id; - } else { - metal_node.src_tensors_ids[j] = mapping_to_global_ids[-(id + 1)]; - } + GPUOperationsSubgraph gpu_subgraph; + RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, inputs, outputs, + *node, &gpu_subgraph)); + std::map mapping_to_global_ids; + for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) { + const auto& t = gpu_subgraph.new_tensors[j]; + last_value_id++; + compiled_model->tensor_shapes[last_value_id] = t.first; + mapping_to_global_ids[j] = last_value_id; + } + for (auto& gpu_op : gpu_subgraph.operations) { + NodeDescriptor metal_node; + metal_node.task = std::move(gpu_op.operation); + metal_node.src_tensors_ids.resize(gpu_op.input_ids.size()); + for (int j = 0; j < gpu_op.input_ids.size(); ++j) { + int id = gpu_op.input_ids[j]; + if (id >= 0) { + metal_node.src_tensors_ids[j] = id; + } else { + metal_node.src_tensors_ids[j] = mapping_to_global_ids[-(id + 1)]; } - metal_node.dst_tensors_ids.resize(gpu_op.output_ids.size()); - for (int j = 0; j < gpu_op.output_ids.size(); ++j) { - int id = gpu_op.output_ids[j]; - if (id >= 0) { - metal_node.dst_tensors_ids[j] = id; - } else { - metal_node.dst_tensors_ids[j] = mapping_to_global_ids[-(id + 1)]; - } + } + metal_node.dst_tensors_ids.resize(gpu_op.output_ids.size()); + for (int j = 0; j < gpu_op.output_ids.size(); ++j) { + int id = gpu_op.output_ids[j]; + if (id >= 0) { + metal_node.dst_tensors_ids[j] = id; + } else { + metal_node.dst_tensors_ids[j] = mapping_to_global_ids[-(id + 1)]; } - metal_node.description = - node->operation.type + " " + std::to_string(node->id); - node_descs.push_back(std::move(metal_node)); } - } else { - for (auto& custom_task : custom_tasks) { - NodeDescriptor node_desc; - node_desc.task = custom_task; - node_desc.description = - node->operation.type + "_" + std::to_string(node->id); - node_desc.src_tensors_ids = inputs; - node_desc.dst_tensors_ids = outputs; - node_descs.push_back(node_desc); - } - } - for (auto& node_desc : node_descs) { - node_desc.id = node_linear_id++; - compiled_model->nodes.push_back(node_desc); + metal_node.description = + node->operation.type + " " + std::to_string(node->id); + metal_node.id = node_linear_id++; + compiled_model->nodes.push_back(std::move(metal_node)); } } return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index 31b1258332d..ee7f0e8ad73 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -168,18 +168,6 @@ macos_unit_test( deps = [":conv_test_lib"], ) -cc_library( - name = "custom_registry", - srcs = ["custom_registry.cc"], - hdrs = ["custom_registry.h"], - deps = [ - "//tensorflow/lite/delegates/gpu/common:model", - "//tensorflow/lite/delegates/gpu/common:precision", - "//tensorflow/lite/delegates/gpu/common:status", - "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - ], -) - cc_library( name = "depthwise_conv", srcs = ["depthwise_conv.cc"], diff --git a/tensorflow/lite/delegates/gpu/metal/selectors/BUILD b/tensorflow/lite/delegates/gpu/metal/selectors/BUILD index e31e79913bb..9f5b8852677 100644 --- a/tensorflow/lite/delegates/gpu/metal/selectors/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/selectors/BUILD @@ -3,6 +3,37 @@ package( licenses = ["notice"], # Apache 2.0 ) +cc_library( + name = "default_selector", + hdrs = ["default_selector.h"], + deps = [ + ":subgraph", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/metal/selectors/default:default_selector", # buildcleaner: keep + ], +) + +cc_library( + name = "operation_selector", + srcs = ["operation_selector.cc"], + hdrs = ["operation_selector.h"], + deps = [ + ":default_selector", + ":subgraph", + "//tensorflow/lite/delegates/gpu/common:gpu_info", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:precision", + "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:util", + "//tensorflow/lite/delegates/gpu/common:winograd_util", + "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", + "//tensorflow/lite/delegates/gpu/metal/kernels", + ], +) + cc_library( name = "subgraph", srcs = ["subgraph.cc"], diff --git a/tensorflow/lite/delegates/gpu/metal/selectors/default/BUILD b/tensorflow/lite/delegates/gpu/metal/selectors/default/BUILD new file mode 100644 index 00000000000..a9f1dde7133 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/selectors/default/BUILD @@ -0,0 +1,16 @@ +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "default_selector", + srcs = ["default_selector.cc"], + deps = [ + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/metal/selectors:subgraph", + "@com_google_absl//absl/strings", + ], +) diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc b/tensorflow/lite/delegates/gpu/metal/selectors/default/default_selector.cc similarity index 53% rename from tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc rename to tensorflow/lite/delegates/gpu/metal/selectors/default/default_selector.cc index fa97160b3a2..eb8a8f7afe0 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.cc +++ b/tensorflow/lite/delegates/gpu/metal/selectors/default/default_selector.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,25 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h" - -#include +#include +#include "absl/strings/str_cat.h" #include "tensorflow/lite/delegates/gpu/common/model.h" -#include "tensorflow/lite/delegates/gpu/common/precision.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" -#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h" namespace tflite { namespace gpu { namespace metal { -absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node, - const std::vector& inputs, - const std::vector& outputs, - CalculationsPrecision precision, - std::vector* tasks) { - return absl::UnimplementedError("Unsupported op: " + node->operation.type); +absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def, + const std::vector& inputs, + const std::vector& outputs, const Node& node, + GPUOperationsSubgraph* gpu_subgraph) { + return absl::UnimplementedError( + absl::StrCat("No selector for ", node.operation.type)); } } // namespace metal diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h b/tensorflow/lite/delegates/gpu/metal/selectors/default_selector.h similarity index 50% rename from tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h rename to tensorflow/lite/delegates/gpu/metal/selectors/default_selector.h index 2f08b74051c..75033c3ba5f 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h +++ b/tensorflow/lite/delegates/gpu/metal/selectors/default_selector.h @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,29 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_CUSTOM_REGISTRY_H_ -#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_CUSTOM_REGISTRY_H_ +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_DEFAULT_SELECTOR_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_DEFAULT_SELECTOR_H_ -#include +#include #include "tensorflow/lite/delegates/gpu/common/model.h" -#include "tensorflow/lite/delegates/gpu/common/precision.h" #include "tensorflow/lite/delegates/gpu/common/status.h" -#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h" namespace tflite { namespace gpu { namespace metal { -// Registers custom operations. -absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node, - const std::vector& inputs, - const std::vector& outputs, - CalculationsPrecision precision, - std::vector* tasks); +absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def, + const std::vector& inputs, + const std::vector& outputs, const Node& node, + GPUOperationsSubgraph* gpu_subgraph); } // namespace metal } // namespace gpu } // namespace tflite -#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_CUSTOM_REGISTRY_H_ +#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_DEFAULT_SELECTOR_H_ diff --git a/tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.cc new file mode 100644 index 00000000000..2556a265f8f --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.cc @@ -0,0 +1,529 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h" + +#include + +#include "absl/strings/substitute.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" +#include "tensorflow/lite/delegates/gpu/common/util.h" +#include "tensorflow/lite/delegates/gpu/common/winograd_util.h" +#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/add.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/concat.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/conv.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/pooling.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/prelu.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/relu.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/reshape.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/resize.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/slice.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/softmax.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h" +#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h" +#include "tensorflow/lite/delegates/gpu/metal/selectors/default_selector.h" +#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h" + +namespace tflite { +namespace gpu { +namespace metal { +namespace { + +std::unique_ptr SelectDepthWiseConv( + const OperationDef& op_def, const DepthwiseConvolution2DAttributes& attr) { + if (CheckDepthWiseConv3x3Stride1x1Support(attr)) { + auto gpu_op = DepthWiseConv3x3Stride1x1(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } else if (CheckDepthWiseConv3x3Stride2Support(attr)) { + auto gpu_op = DepthWiseConv3x3Stride2(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } else { + auto gpu_op = DepthWiseConvolution(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } +} + +std::unique_ptr SelectConvolutionTransposed( + const OperationDef& op_def, const ConvolutionTransposedAttributes& attr, + const GpuInfo& gpu_info) { + if (CheckConvolutionTransposed4x4Support(attr)) { + auto gpu_op = ConvolutionTransposed4x4(op_def, attr, gpu_info); + return absl::make_unique(std::move(gpu_op)); + } else { + auto gpu_op = ConvolutionTransposed(op_def, attr, gpu_info); + return absl::make_unique(std::move(gpu_op)); + } +} + +std::unique_ptr SelectQuantizeAndDequantize( + const OperationDef& op_def, const QuantizeAndDequantizeAttributes& attr) { + auto gpu_op = QuantizeAndDequantize(op_def, attr); + return absl::make_unique(std::move(gpu_op)); +} + +std::unique_ptr SelectPReLU( + const OperationDef& op_def, const BHWC& src_shape, + const PReLUAttributes& attr) { + auto alpha = absl::get_if>(&attr.alpha); + if (alpha) { + auto gpu_op = PReLU(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } + auto alpha3d = absl::get_if>(&attr.alpha); + if (!alpha3d) { + return {}; + } + if (alpha3d->shape.h != src_shape.h || alpha3d->shape.w != src_shape.w || + alpha3d->shape.c != src_shape.c) { + return {}; + } + auto gpu_op = PReLUFull(op_def, attr); + return absl::make_unique(std::move(gpu_op)); +} + +std::unique_ptr SelectReshape( + const OperationDef& op_def, const BHWC& src_shape, + const ReshapeAttributes& attr) { + if (src_shape.c % 4 == 0 && attr.new_shape.c % 4 == 0) { + auto gpu_op = Reshapex4(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } else { + auto gpu_op = Reshape(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } +} + +std::unique_ptr SelectSoftmax(const OperationDef& op_def, + const BHWC& src_shape, + const GpuInfo& gpu_info) { + if (src_shape.w == 1 && src_shape.h == 1) { + auto gpu_op = Softmax1x1(op_def, gpu_info); + return absl::make_unique(std::move(gpu_op)); + } else { + auto gpu_op = Softmax(op_def); + return absl::make_unique(std::move(gpu_op)); + } +} + +std::unique_ptr SelectSpaceToDepth( + const OperationDef& op_def, const SpaceToDepthAttributes& attr) { + auto gpu_op = SpaceToDepth(op_def, attr); + return absl::make_unique(std::move(gpu_op)); +} + +std::unique_ptr SelectWinograd4x4To36( + const OperationDef& op_def, const Winograd4x4To36Attributes& attr, + const GpuInfo& gpu_info) { + if (gpu_info.IsApple()) { + auto gpu_op = Winograd4x4To36(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } else { + auto gpu_op = Winograd4x4To36TileX6(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } +} + +std::unique_ptr SelectWinograd36To4x4( + const OperationDef& op_def, const Winograd36To4x4Attributes& attr, + const GpuInfo& gpu_info) { + if (gpu_info.IsApple()) { + auto gpu_op = Winograd36To4x4(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } else { + auto gpu_op = Winograd36To4x4Tile4x1(op_def, attr); + return absl::make_unique(std::move(gpu_op)); + } +} + +bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr, + const GpuInfo& gpu_info, + const BHWC& dst_shape) { + const int tiles_x = DivideRoundUp(dst_shape.w, 4); + const int tiles_y = DivideRoundUp(dst_shape.h, 4); + const int total_tiles = tiles_x * tiles_y; + const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); + const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); + int min_depth = 16; + const int min_tiles = 32; + if (total_tiles >= min_tiles * 8) { + min_depth /= 4; + min_depth = std::max(min_depth, 8); + } else if (total_tiles >= min_tiles * 4) { + min_depth /= 2; + min_depth = std::max(min_depth, 8); + } + const bool recommended_channels = + src_depth >= min_depth && dst_depth >= min_depth; + const bool recommended_hw = total_tiles >= min_tiles; + return recommended_channels && recommended_hw; +} + +absl::Status WinogradFromNode(const GpuInfo& gpu_info, + const std::vector& inputs, + const std::vector& outputs, + const OperationDef& op_def, + const BHWC& input_shape, const BHWC& output_shape, + const Convolution2DAttributes& attr, + GPUOperationsSubgraph* gpu_subgraph) { + if (!IsSuitableForWinograd4x4To6x6(attr)) { + return absl::UnimplementedError("No implementation for this case."); + } + if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) { + return absl::UnimplementedError("Not recommended for this case."); + } + + const int tiles_x = DivideRoundUp(output_shape.w, 4); + const int tiles_y = DivideRoundUp(output_shape.h, 4); + const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c}; + const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c}; + TensorDescriptor tensor_desc = op_def.src_tensors[0]; + gpu_subgraph->new_tensors = {{shape_0, tensor_desc}, {shape_1, tensor_desc}}; + gpu_subgraph->operations.clear(); + gpu_subgraph->operations.resize(3); + + OperationDef winograd_up_def; + winograd_up_def.precision = op_def.precision; + winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]); + winograd_up_def.dst_tensors.push_back(op_def.src_tensors[0]); + auto& winograd_up = gpu_subgraph->operations[0]; + Winograd4x4To36Attributes wino_up_attr; + wino_up_attr.padding = attr.padding; + winograd_up.operation = + SelectWinograd4x4To36(winograd_up_def, wino_up_attr, gpu_info); + winograd_up.input_ids = {static_cast(inputs[0]->id)}; + winograd_up.output_ids = {-1}; + + OperationDef conv_def; + conv_def.precision = op_def.precision; + conv_def.src_tensors.push_back(op_def.src_tensors[0]); + conv_def.dst_tensors.push_back(op_def.src_tensors[0]); + auto& conv = gpu_subgraph->operations[1]; + conv.input_ids = {-1}; + conv.output_ids = {-2}; + auto gpu_op = ConvolutionWino4x4To6x6(conv_def, shape_1, attr, gpu_info); + conv.operation = absl::make_unique(std::move(gpu_op)); + OperationDef winograd_down_def; + winograd_down_def.precision = op_def.precision; + winograd_down_def.src_tensors.push_back(op_def.src_tensors[0]); + winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]); + auto& winograd_down = gpu_subgraph->operations[2]; + winograd_down.input_ids = {-2}; + winograd_down.output_ids = {static_cast(outputs[0]->id)}; + Winograd36To4x4Attributes wino_down_attr; + wino_down_attr.output_shape = outputs[0]->tensor.shape; + wino_down_attr.biases = attr.bias; + winograd_down.operation = + SelectWinograd36To4x4(winograd_down_def, wino_down_attr, gpu_info); + return absl::OkStatus(); +} + +} // namespace + +absl::Status GPUOperationFromNode(const GpuInfo& gpu_info, + const OperationDef& op_def, + const std::vector& inputs, + const std::vector& outputs, + const Node& node, + GPUOperationsSubgraph* gpu_subgraph) { + std::unique_ptr* task = + InitSingleOpSubgraph(inputs, outputs, gpu_subgraph); + auto op_type = OperationTypeFromString(node.operation.type); + switch (op_type) { + case OperationType::ADD: { + if (inputs.size() == 1) { + if (node.operation.attributes.has_value()) { + auto attr = + absl::any_cast(node.operation.attributes); + auto gpu_op = ElementwiseWithOneInputAndConstantArguent( + op_def, op_type, attr.param); + *task = absl::make_unique(std::move(gpu_op)); + } else { + return absl::UnimplementedError( + "Missing attributes for single input op: " + node.operation.type); + } + } else if (inputs.size() == 2) { + auto gpu_op = + ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type); + *task = absl::make_unique(std::move(gpu_op)); + } else { // more than 2 inputs + auto gpu_op = Add(op_def); + *task = absl::make_unique(std::move(gpu_op)); + } + break; + } + case OperationType::CONCAT: { + std::vector input_shapes; + for (auto& input : inputs) { + input_shapes.push_back(input->tensor.shape); + } + auto gpu_op = Concat( + op_def, absl::any_cast(node.operation.attributes), + input_shapes); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::CONVOLUTION_2D: { + if (inputs.size() != 1) { + return absl::UnimplementedError( + "Convolution does not support more than 1 runtime tensor"); + } + auto attr = + absl::any_cast(node.operation.attributes); + auto input_shape = inputs[0]->tensor.shape; + auto output_shape = outputs[0]->tensor.shape; + if (WinogradFromNode(gpu_info, inputs, outputs, op_def, input_shape, + output_shape, attr, gpu_subgraph) + .ok()) { + return absl::OkStatus(); + } else { + auto gpu_op = ConvolutionGeneric(op_def, output_shape, attr, gpu_info); + *task = absl::make_unique(std::move(gpu_op)); + } + break; + } + case OperationType::CONVOLUTION_TRANSPOSED: + if (inputs.size() != 1) { + return absl::UnimplementedError( + "Convolution Transposed does not support more than 1 runtime " + "tensor"); + } + *task = SelectConvolutionTransposed( + op_def, + absl::any_cast( + node.operation.attributes), + gpu_info); + break; + case OperationType::DEPTHWISE_CONVOLUTION: + if (inputs.size() != 1) { + return absl::UnimplementedError( + "DepthWise Convolution does not support more than 1 runtime " + "tensor"); + } + *task = SelectDepthWiseConv( + op_def, absl::any_cast( + node.operation.attributes)); + break; + case OperationType::FULLY_CONNECTED: { + auto gpu_op = FullyConnected( + op_def, + absl::any_cast(node.operation.attributes), + gpu_info); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::MAX_UNPOOLING_2D: { + auto gpu_op = MaxUnpooling( + op_def, + absl::any_cast(node.operation.attributes)); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::MEAN: { + auto attr = absl::any_cast(node.operation.attributes); + if (attr.dims != std::set({Axis::HEIGHT, Axis::WIDTH})) { + return absl::UnimplementedError("Mean supports HW axis only in Metal"); + } + auto gpu_op = Mean(op_def, attr); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::MUL: + if (inputs.size() == 1) { + if (node.operation.attributes.has_value()) { + auto attr = + absl::any_cast(node.operation.attributes); + auto gpu_op = ElementwiseWithOneInputAndConstantArguent( + op_def, op_type, attr.param); + *task = absl::make_unique(std::move(gpu_op)); + } else { + return absl::UnimplementedError( + "Missing attributes for single input op: " + node.operation.type); + } + } else if (inputs.size() == 2) { + auto gpu_op = + ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type); + *task = absl::make_unique(std::move(gpu_op)); + } + break; + case OperationType::PAD: { + auto attr = absl::any_cast(node.operation.attributes); + if (attr.appended.b != 0 || attr.prepended.b != 0) { + return absl::UnimplementedError("Padding for BATCH is not supported."); + } + auto gpu_op = Padding(op_def, attr); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::POOLING_2D: { + auto attr = + absl::any_cast(node.operation.attributes); + auto pooling_op_def = op_def; + pooling_op_def.dst_tensors = {op_def.dst_tensors[0]}; + auto gpu_op = Pooling(op_def, attr, false); + gpu_subgraph->operations[0].operation = + absl::make_unique(std::move(gpu_op)); + gpu_subgraph->operations[0].input_ids = {static_cast(inputs[0]->id)}; + gpu_subgraph->operations[0].output_ids = { + static_cast(outputs[0]->id)}; + if (attr.type == PoolingType::MAX && attr.output_indices) { + gpu_subgraph->operations.push_back({}); + auto gpu_ind_op = Pooling(op_def, attr, true); + gpu_subgraph->operations[1].operation = + absl::make_unique(std::move(gpu_ind_op)); + gpu_subgraph->operations[1].input_ids = { + static_cast(inputs[0]->id)}; + gpu_subgraph->operations[1].output_ids = { + static_cast(outputs[1]->id)}; + } + break; + } + case OperationType::PRELU: { + const auto src_shape = inputs[0]->tensor.shape; + *task = SelectPReLU( + op_def, src_shape, + absl::any_cast(node.operation.attributes)); + break; + } + case OperationType::RELU: { + auto gpu_op = ReLU( + op_def, absl::any_cast(node.operation.attributes)); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::QUANTIZE_AND_DEQUANTIZE: + *task = SelectQuantizeAndDequantize( + op_def, absl::any_cast( + node.operation.attributes)); + break; + case OperationType::RESHAPE: { + const auto src_shape = inputs[0]->tensor.shape; + *task = SelectReshape( + op_def, src_shape, + absl::any_cast(node.operation.attributes)); + break; + } + case OperationType::RESIZE: { + auto gpu_op = + Resize(op_def, + absl::any_cast(node.operation.attributes)); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::SLICE: { + auto gpu_op = Slice( + op_def, absl::any_cast(node.operation.attributes)); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::SOFTMAX: { + auto attr = absl::any_cast(node.operation.attributes); + if (attr.axis != Axis::CHANNELS) { + return absl::UnimplementedError( + "Softmax supports only CHANNELS dimension"); + } + const auto src_shape = inputs[0]->tensor.shape; + *task = SelectSoftmax(op_def, src_shape, gpu_info); + break; + } + case OperationType::SPACE_TO_DEPTH: + *task = SelectSpaceToDepth(op_def, absl::any_cast( + node.operation.attributes)); + break; + case OperationType::ABS: + case OperationType::COPY: + case OperationType::COS: + case OperationType::ELU: + case OperationType::EXP: + case OperationType::HARD_SWISH: + case OperationType::LOG: + case OperationType::NEG: + case OperationType::RSQRT: + case OperationType::SIGMOID: + case OperationType::SIN: + case OperationType::SQRT: + case OperationType::SQUARE: + case OperationType::TANH: { + auto gpu_op = ElementwiseWithOneInput(op_def, op_type); + *task = absl::make_unique(std::move(gpu_op)); + break; + } + case OperationType::DIV: + case OperationType::MAXIMUM: + case OperationType::MINIMUM: + case OperationType::POW: + case OperationType::SQUARED_DIFF: + case OperationType::SUB: { + if (inputs.size() == 1) { + if (node.operation.attributes.has_value()) { + auto attr = + absl::any_cast(node.operation.attributes); + auto gpu_op = ElementwiseWithOneInputAndConstantArguent( + op_def, op_type, attr.param); + *task = absl::make_unique(std::move(gpu_op)); + } else { + return absl::UnimplementedError( + "Missing attributes for single input op: " + node.operation.type); + } + } else if (inputs.size() == 2) { + auto gpu_op = + ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type); + *task = absl::make_unique(std::move(gpu_op)); + } + } break; + case OperationType::BATCH_NORMALIZATION: + case OperationType::BATCH_TO_SPACE: + case OperationType::BATCHED_MATMUL: + case OperationType::CONST: + case OperationType::LSTM: + // TODO(b/162763635): implement MeanStddevNormalization for Metal. + case OperationType::MEAN_STDDEV_NORMALIZATION: + case OperationType::REDUCE_MAXIMUM: + case OperationType::REDUCE_MINIMUM: + case OperationType::REDUCE_PRODUCT: + case OperationType::REDUCE_SUM: + // comparison operations + case OperationType::LESS: + case OperationType::LESS_EQUAL: + case OperationType::EQUAL: + case OperationType::NOT_EQUAL: + case OperationType::GREATER: + case OperationType::GREATER_EQUAL: + case OperationType::SPACE_TO_BATCH: + case OperationType::TRANSPOSE: + return absl::UnimplementedError("Unsupported op: " + node.operation.type); + default: + return SelectDefault(gpu_info, op_def, inputs, outputs, node, + gpu_subgraph); + } + return absl::OkStatus(); +} + +} // namespace metal +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h b/tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h new file mode 100644 index 00000000000..64f6b291031 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h @@ -0,0 +1,39 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_OPERATION_SELECTOR_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_OPERATION_SELECTOR_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h" + +namespace tflite { +namespace gpu { +namespace metal { + +absl::Status GPUOperationFromNode(const GpuInfo& gpu_info, + const OperationDef& op_def, + const std::vector& inputs, + const std::vector& outputs, + const Node& node, + GPUOperationsSubgraph* gpu_subgraph); +} // namespace metal +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_OPERATION_SELECTOR_H_