Added operation_selector for Metal backend.
Making it similar to gpu/common/ structure. PiperOrigin-RevId: 348573563 Change-Id: I993f124c4d3fac14c397495aaf131e6614c5317f
This commit is contained in:
parent
dde0d4f92c
commit
60325944ed
@ -35,7 +35,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"//tensorflow/lite/delegates/gpu/common:winograd_util",
|
||||
"//tensorflow/lite/delegates/gpu/metal/kernels",
|
||||
"//tensorflow/lite/delegates/gpu/metal/kernels:custom_registry",
|
||||
"//tensorflow/lite/delegates/gpu/metal/selectors:operation_selector",
|
||||
"//tensorflow/lite/delegates/gpu/metal/selectors:subgraph",
|
||||
],
|
||||
)
|
||||
|
@ -28,500 +28,12 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compiled_model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/add.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/concat.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/pooling.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/prelu.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/relu.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/reshape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/resize.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/slice.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/softmax.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
namespace {
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectDepthWiseConv(
|
||||
const OperationDef& op_def, const DepthwiseConvolution2DAttributes& attr) {
|
||||
if (CheckDepthWiseConv3x3Stride1x1Support(attr)) {
|
||||
auto gpu_op = DepthWiseConv3x3Stride1x1(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else if (CheckDepthWiseConv3x3Stride2Support(attr)) {
|
||||
auto gpu_op = DepthWiseConv3x3Stride2(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = DepthWiseConvolution(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectConvolutionTransposed(
|
||||
const OperationDef& op_def, const ConvolutionTransposedAttributes& attr,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (CheckConvolutionTransposed4x4Support(attr)) {
|
||||
auto gpu_op = ConvolutionTransposed4x4(op_def, attr, gpu_info);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = ConvolutionTransposed(op_def, attr, gpu_info);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectQuantizeAndDequantize(
|
||||
const OperationDef& op_def, const QuantizeAndDequantizeAttributes& attr) {
|
||||
auto gpu_op = QuantizeAndDequantize(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectPReLU(
|
||||
const OperationDef& op_def, const BHWC& src_shape,
|
||||
const PReLUAttributes& attr) {
|
||||
auto alpha = absl::get_if<Tensor<Linear, DataType::FLOAT32>>(&attr.alpha);
|
||||
if (alpha) {
|
||||
auto gpu_op = PReLU(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
auto alpha3d = absl::get_if<Tensor<HWC, DataType::FLOAT32>>(&attr.alpha);
|
||||
if (!alpha3d) {
|
||||
return {};
|
||||
}
|
||||
if (alpha3d->shape.h != src_shape.h || alpha3d->shape.w != src_shape.w ||
|
||||
alpha3d->shape.c != src_shape.c) {
|
||||
return {};
|
||||
}
|
||||
auto gpu_op = PReLUFull(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectReshape(
|
||||
const OperationDef& op_def, const BHWC& src_shape,
|
||||
const ReshapeAttributes& attr) {
|
||||
if (src_shape.c % 4 == 0 && attr.new_shape.c % 4 == 0) {
|
||||
auto gpu_op = Reshapex4(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Reshape(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectSoftmax(const OperationDef& op_def,
|
||||
const BHWC& src_shape,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (src_shape.w == 1 && src_shape.h == 1) {
|
||||
auto gpu_op = Softmax1x1(op_def, gpu_info);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Softmax(op_def);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectSpaceToDepth(
|
||||
const OperationDef& op_def, const SpaceToDepthAttributes& attr) {
|
||||
auto gpu_op = SpaceToDepth(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectWinograd4x4To36(
|
||||
const OperationDef& op_def, const Winograd4x4To36Attributes& attr,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (gpu_info.IsApple()) {
|
||||
auto gpu_op = Winograd4x4To36(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Winograd4x4To36TileX6(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectWinograd36To4x4(
|
||||
const OperationDef& op_def, const Winograd36To4x4Attributes& attr,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (gpu_info.IsApple()) {
|
||||
auto gpu_op = Winograd36To4x4(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Winograd36To4x4Tile4x1(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
|
||||
const GpuInfo& gpu_info,
|
||||
const BHWC& dst_shape) {
|
||||
const int tiles_x = DivideRoundUp(dst_shape.w, 4);
|
||||
const int tiles_y = DivideRoundUp(dst_shape.h, 4);
|
||||
const int total_tiles = tiles_x * tiles_y;
|
||||
const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
|
||||
const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
|
||||
int min_depth = 16;
|
||||
const int min_tiles = 32;
|
||||
if (total_tiles >= min_tiles * 8) {
|
||||
min_depth /= 4;
|
||||
min_depth = std::max(min_depth, 8);
|
||||
} else if (total_tiles >= min_tiles * 4) {
|
||||
min_depth /= 2;
|
||||
min_depth = std::max(min_depth, 8);
|
||||
}
|
||||
const bool recommended_channels =
|
||||
src_depth >= min_depth && dst_depth >= min_depth;
|
||||
const bool recommended_hw = total_tiles >= min_tiles;
|
||||
return recommended_channels && recommended_hw;
|
||||
}
|
||||
|
||||
absl::Status WinogradFromNode(const GpuInfo& gpu_info,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs,
|
||||
const OperationDef& op_def,
|
||||
const BHWC& input_shape, const BHWC& output_shape,
|
||||
const Convolution2DAttributes& attr,
|
||||
GPUOperationsSubgraph* gpu_subgraph) {
|
||||
if (!IsSuitableForWinograd4x4To6x6(attr)) {
|
||||
return absl::UnimplementedError("No implementation for this case.");
|
||||
}
|
||||
if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
|
||||
return absl::UnimplementedError("Not recommended for this case.");
|
||||
}
|
||||
|
||||
const int tiles_x = DivideRoundUp(output_shape.w, 4);
|
||||
const int tiles_y = DivideRoundUp(output_shape.h, 4);
|
||||
const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
|
||||
const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
|
||||
TensorDescriptor tensor_desc = op_def.src_tensors[0];
|
||||
gpu_subgraph->new_tensors = {{shape_0, tensor_desc}, {shape_1, tensor_desc}};
|
||||
gpu_subgraph->operations.clear();
|
||||
gpu_subgraph->operations.resize(3);
|
||||
|
||||
OperationDef winograd_up_def;
|
||||
winograd_up_def.precision = op_def.precision;
|
||||
winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
|
||||
winograd_up_def.dst_tensors.push_back(op_def.src_tensors[0]);
|
||||
auto& winograd_up = gpu_subgraph->operations[0];
|
||||
Winograd4x4To36Attributes wino_up_attr;
|
||||
wino_up_attr.padding = attr.padding;
|
||||
winograd_up.operation =
|
||||
SelectWinograd4x4To36(winograd_up_def, wino_up_attr, gpu_info);
|
||||
winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
|
||||
winograd_up.output_ids = {-1};
|
||||
|
||||
OperationDef conv_def;
|
||||
conv_def.precision = op_def.precision;
|
||||
conv_def.src_tensors.push_back(op_def.src_tensors[0]);
|
||||
conv_def.dst_tensors.push_back(op_def.src_tensors[0]);
|
||||
auto& conv = gpu_subgraph->operations[1];
|
||||
conv.input_ids = {-1};
|
||||
conv.output_ids = {-2};
|
||||
auto gpu_op = ConvolutionWino4x4To6x6(conv_def, shape_1, attr, gpu_info);
|
||||
conv.operation = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
OperationDef winograd_down_def;
|
||||
winograd_down_def.precision = op_def.precision;
|
||||
winograd_down_def.src_tensors.push_back(op_def.src_tensors[0]);
|
||||
winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
|
||||
auto& winograd_down = gpu_subgraph->operations[2];
|
||||
winograd_down.input_ids = {-2};
|
||||
winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
|
||||
Winograd36To4x4Attributes wino_down_attr;
|
||||
wino_down_attr.output_shape = outputs[0]->tensor.shape;
|
||||
wino_down_attr.biases = attr.bias;
|
||||
winograd_down.operation =
|
||||
SelectWinograd36To4x4(winograd_down_def, wino_down_attr, gpu_info);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
|
||||
const OperationDef& op_def,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs,
|
||||
const Node& node,
|
||||
GPUOperationsSubgraph* gpu_subgraph) {
|
||||
std::unique_ptr<ComputeTaskDescriptor>* task =
|
||||
InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
|
||||
auto op_type = OperationTypeFromString(node.operation.type);
|
||||
switch (op_type) {
|
||||
case OperationType::ADD: {
|
||||
if (inputs.size() == 1) {
|
||||
if (node.operation.attributes.has_value()) {
|
||||
auto attr =
|
||||
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
|
||||
auto gpu_op = ElementwiseWithOneInputAndConstantArguent(
|
||||
op_def, op_type, attr.param);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Missing attributes for single input op: " + node.operation.type);
|
||||
}
|
||||
} else if (inputs.size() == 2) {
|
||||
auto gpu_op =
|
||||
ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else { // more than 2 inputs
|
||||
auto gpu_op = Add(op_def);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::CONCAT: {
|
||||
std::vector<BHWC> input_shapes;
|
||||
for (auto& input : inputs) {
|
||||
input_shapes.push_back(input->tensor.shape);
|
||||
}
|
||||
auto gpu_op = Concat(
|
||||
op_def, absl::any_cast<ConcatAttributes>(node.operation.attributes),
|
||||
input_shapes);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::CONVOLUTION_2D: {
|
||||
if (inputs.size() != 1) {
|
||||
return absl::UnimplementedError(
|
||||
"Convolution does not support more than 1 runtime tensor");
|
||||
}
|
||||
auto attr =
|
||||
absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
|
||||
auto input_shape = inputs[0]->tensor.shape;
|
||||
auto output_shape = outputs[0]->tensor.shape;
|
||||
if (WinogradFromNode(gpu_info, inputs, outputs, op_def, input_shape,
|
||||
output_shape, attr, gpu_subgraph)
|
||||
.ok()) {
|
||||
return absl::OkStatus();
|
||||
} else {
|
||||
auto gpu_op = ConvolutionGeneric(op_def, output_shape, attr, gpu_info);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::CONVOLUTION_TRANSPOSED:
|
||||
if (inputs.size() != 1) {
|
||||
return absl::UnimplementedError(
|
||||
"Convolution Transposed does not support more than 1 runtime "
|
||||
"tensor");
|
||||
}
|
||||
*task = SelectConvolutionTransposed(
|
||||
op_def,
|
||||
absl::any_cast<ConvolutionTransposedAttributes>(
|
||||
node.operation.attributes),
|
||||
gpu_info);
|
||||
break;
|
||||
case OperationType::DEPTHWISE_CONVOLUTION:
|
||||
if (inputs.size() != 1) {
|
||||
return absl::UnimplementedError(
|
||||
"DepthWise Convolution does not support more than 1 runtime "
|
||||
"tensor");
|
||||
}
|
||||
*task = SelectDepthWiseConv(
|
||||
op_def, absl::any_cast<DepthwiseConvolution2DAttributes>(
|
||||
node.operation.attributes));
|
||||
break;
|
||||
case OperationType::FULLY_CONNECTED: {
|
||||
auto gpu_op = FullyConnected(
|
||||
op_def,
|
||||
absl::any_cast<FullyConnectedAttributes>(node.operation.attributes),
|
||||
gpu_info);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::MAX_UNPOOLING_2D: {
|
||||
auto gpu_op = MaxUnpooling(
|
||||
op_def,
|
||||
absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::MEAN: {
|
||||
auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
|
||||
if (attr.dims != std::set<Axis>({Axis::HEIGHT, Axis::WIDTH})) {
|
||||
return absl::UnimplementedError("Mean supports HW axis only in Metal");
|
||||
}
|
||||
auto gpu_op = Mean(op_def, attr);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::MUL:
|
||||
if (inputs.size() == 1) {
|
||||
if (node.operation.attributes.has_value()) {
|
||||
auto attr =
|
||||
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
|
||||
auto gpu_op = ElementwiseWithOneInputAndConstantArguent(
|
||||
op_def, op_type, attr.param);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Missing attributes for single input op: " + node.operation.type);
|
||||
}
|
||||
} else if (inputs.size() == 2) {
|
||||
auto gpu_op =
|
||||
ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
break;
|
||||
case OperationType::PAD: {
|
||||
auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
|
||||
if (attr.appended.b != 0 || attr.prepended.b != 0) {
|
||||
return absl::UnimplementedError("Padding for BATCH is not supported.");
|
||||
}
|
||||
auto gpu_op = Padding(op_def, attr);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::POOLING_2D: {
|
||||
auto attr =
|
||||
absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
|
||||
auto pooling_op_def = op_def;
|
||||
pooling_op_def.dst_tensors = {op_def.dst_tensors[0]};
|
||||
auto gpu_op = Pooling(op_def, attr, false);
|
||||
gpu_subgraph->operations[0].operation =
|
||||
absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
gpu_subgraph->operations[0].input_ids = {static_cast<int>(inputs[0]->id)};
|
||||
gpu_subgraph->operations[0].output_ids = {
|
||||
static_cast<int>(outputs[0]->id)};
|
||||
if (attr.type == PoolingType::MAX && attr.output_indices) {
|
||||
gpu_subgraph->operations.push_back({});
|
||||
auto gpu_ind_op = Pooling(op_def, attr, true);
|
||||
gpu_subgraph->operations[1].operation =
|
||||
absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_ind_op));
|
||||
gpu_subgraph->operations[1].input_ids = {
|
||||
static_cast<int>(inputs[0]->id)};
|
||||
gpu_subgraph->operations[1].output_ids = {
|
||||
static_cast<int>(outputs[1]->id)};
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::PRELU: {
|
||||
const auto src_shape = inputs[0]->tensor.shape;
|
||||
*task = SelectPReLU(
|
||||
op_def, src_shape,
|
||||
absl::any_cast<PReLUAttributes>(node.operation.attributes));
|
||||
break;
|
||||
}
|
||||
case OperationType::RELU: {
|
||||
auto gpu_op = ReLU(
|
||||
op_def, absl::any_cast<ReLUAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::QUANTIZE_AND_DEQUANTIZE:
|
||||
*task = SelectQuantizeAndDequantize(
|
||||
op_def, absl::any_cast<QuantizeAndDequantizeAttributes>(
|
||||
node.operation.attributes));
|
||||
break;
|
||||
case OperationType::RESHAPE: {
|
||||
const auto src_shape = inputs[0]->tensor.shape;
|
||||
*task = SelectReshape(
|
||||
op_def, src_shape,
|
||||
absl::any_cast<ReshapeAttributes>(node.operation.attributes));
|
||||
break;
|
||||
}
|
||||
case OperationType::RESIZE: {
|
||||
auto gpu_op =
|
||||
Resize(op_def,
|
||||
absl::any_cast<Resize2DAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::SLICE: {
|
||||
auto gpu_op = Slice(
|
||||
op_def, absl::any_cast<SliceAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::SOFTMAX: {
|
||||
auto attr = absl::any_cast<SoftmaxAttributes>(node.operation.attributes);
|
||||
if (attr.axis != Axis::CHANNELS) {
|
||||
return absl::UnimplementedError(
|
||||
"Softmax supports only CHANNELS dimension");
|
||||
}
|
||||
const auto src_shape = inputs[0]->tensor.shape;
|
||||
*task = SelectSoftmax(op_def, src_shape, gpu_info);
|
||||
break;
|
||||
}
|
||||
case OperationType::SPACE_TO_DEPTH:
|
||||
*task = SelectSpaceToDepth(op_def, absl::any_cast<SpaceToDepthAttributes>(
|
||||
node.operation.attributes));
|
||||
break;
|
||||
case OperationType::ABS:
|
||||
case OperationType::COPY:
|
||||
case OperationType::COS:
|
||||
case OperationType::ELU:
|
||||
case OperationType::EXP:
|
||||
case OperationType::HARD_SWISH:
|
||||
case OperationType::LOG:
|
||||
case OperationType::NEG:
|
||||
case OperationType::RSQRT:
|
||||
case OperationType::SIGMOID:
|
||||
case OperationType::SIN:
|
||||
case OperationType::SQRT:
|
||||
case OperationType::SQUARE:
|
||||
case OperationType::TANH: {
|
||||
auto gpu_op = ElementwiseWithOneInput(op_def, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::DIV:
|
||||
case OperationType::MAXIMUM:
|
||||
case OperationType::MINIMUM:
|
||||
case OperationType::POW:
|
||||
case OperationType::SQUARED_DIFF:
|
||||
case OperationType::SUB: {
|
||||
if (inputs.size() == 1) {
|
||||
if (node.operation.attributes.has_value()) {
|
||||
auto attr =
|
||||
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
|
||||
auto gpu_op = ElementwiseWithOneInputAndConstantArguent(
|
||||
op_def, op_type, attr.param);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Missing attributes for single input op: " + node.operation.type);
|
||||
}
|
||||
} else if (inputs.size() == 2) {
|
||||
auto gpu_op =
|
||||
ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
} break;
|
||||
case OperationType::BATCH_NORMALIZATION:
|
||||
case OperationType::BATCH_TO_SPACE:
|
||||
case OperationType::BATCHED_MATMUL:
|
||||
case OperationType::CONST:
|
||||
case OperationType::LSTM:
|
||||
// TODO(b/162763635): implement MeanStddevNormalization for Metal.
|
||||
case OperationType::MEAN_STDDEV_NORMALIZATION:
|
||||
case OperationType::REDUCE_MAXIMUM:
|
||||
case OperationType::REDUCE_MINIMUM:
|
||||
case OperationType::REDUCE_PRODUCT:
|
||||
case OperationType::REDUCE_SUM:
|
||||
// comparison operations
|
||||
case OperationType::LESS:
|
||||
case OperationType::LESS_EQUAL:
|
||||
case OperationType::EQUAL:
|
||||
case OperationType::NOT_EQUAL:
|
||||
case OperationType::GREATER:
|
||||
case OperationType::GREATER_EQUAL:
|
||||
case OperationType::SPACE_TO_BATCH:
|
||||
case OperationType::TRANSPOSE:
|
||||
case OperationType::UNKNOWN:
|
||||
return absl::UnimplementedError("Unsupported op: " + node.operation.type);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
|
||||
CalculationsPrecision precision,
|
||||
@ -537,81 +49,54 @@ absl::Status Compile(const GraphFloat32& graph, const GpuInfo& gpu_info,
|
||||
}
|
||||
int node_linear_id = 0;
|
||||
for (const auto& node : graph.nodes()) {
|
||||
std::vector<ValueId> inputs;
|
||||
for (auto& input : graph.FindInputs(node->id)) {
|
||||
inputs.push_back(static_cast<ValueId>(input->id));
|
||||
auto inputs = graph.FindInputs(node->id);
|
||||
auto outputs = graph.FindOutputs(node->id);
|
||||
DataType data_type = DeduceDataTypeFromPrecision(precision);
|
||||
TensorDescriptor tensor_descriptor =
|
||||
TensorDescriptor{data_type, TensorStorageType::BUFFER, Layout::HWC};
|
||||
OperationDef op_def;
|
||||
op_def.precision = precision;
|
||||
for (int j = 0; j < inputs.size(); ++j) {
|
||||
op_def.src_tensors.push_back(tensor_descriptor);
|
||||
}
|
||||
std::vector<ValueId> outputs;
|
||||
for (auto& output : graph.FindOutputs(node->id)) {
|
||||
outputs.push_back(static_cast<ValueId>(output->id));
|
||||
for (int j = 0; j < outputs.size(); ++j) {
|
||||
op_def.dst_tensors.push_back(tensor_descriptor);
|
||||
}
|
||||
std::vector<NodeDescriptor> node_descs;
|
||||
std::vector<ComputeTaskDescriptorPtr> custom_tasks;
|
||||
auto custom_status = RegisterCustomOps(graph, node, inputs, outputs,
|
||||
precision, &custom_tasks);
|
||||
if (!custom_status.ok()) {
|
||||
auto inputs = graph.FindInputs(node->id);
|
||||
auto outputs = graph.FindOutputs(node->id);
|
||||
DataType data_type = DeduceDataTypeFromPrecision(precision);
|
||||
TensorDescriptor tensor_descriptor =
|
||||
TensorDescriptor{data_type, TensorStorageType::BUFFER, Layout::HWC};
|
||||
OperationDef op_def;
|
||||
op_def.precision = precision;
|
||||
for (int j = 0; j < inputs.size(); ++j) {
|
||||
op_def.src_tensors.push_back(tensor_descriptor);
|
||||
}
|
||||
for (int j = 0; j < outputs.size(); ++j) {
|
||||
op_def.dst_tensors.push_back(tensor_descriptor);
|
||||
}
|
||||
GPUOperationsSubgraph gpu_subgraph;
|
||||
RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, inputs, outputs,
|
||||
*node, &gpu_subgraph));
|
||||
std::map<int, ValueId> mapping_to_global_ids;
|
||||
for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) {
|
||||
const auto& t = gpu_subgraph.new_tensors[j];
|
||||
last_value_id++;
|
||||
compiled_model->tensor_shapes[last_value_id] = t.first;
|
||||
mapping_to_global_ids[j] = last_value_id;
|
||||
}
|
||||
for (auto& gpu_op : gpu_subgraph.operations) {
|
||||
NodeDescriptor metal_node;
|
||||
metal_node.task = std::move(gpu_op.operation);
|
||||
metal_node.src_tensors_ids.resize(gpu_op.input_ids.size());
|
||||
for (int j = 0; j < gpu_op.input_ids.size(); ++j) {
|
||||
int id = gpu_op.input_ids[j];
|
||||
if (id >= 0) {
|
||||
metal_node.src_tensors_ids[j] = id;
|
||||
} else {
|
||||
metal_node.src_tensors_ids[j] = mapping_to_global_ids[-(id + 1)];
|
||||
}
|
||||
GPUOperationsSubgraph gpu_subgraph;
|
||||
RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, inputs, outputs,
|
||||
*node, &gpu_subgraph));
|
||||
std::map<int, ValueId> mapping_to_global_ids;
|
||||
for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) {
|
||||
const auto& t = gpu_subgraph.new_tensors[j];
|
||||
last_value_id++;
|
||||
compiled_model->tensor_shapes[last_value_id] = t.first;
|
||||
mapping_to_global_ids[j] = last_value_id;
|
||||
}
|
||||
for (auto& gpu_op : gpu_subgraph.operations) {
|
||||
NodeDescriptor metal_node;
|
||||
metal_node.task = std::move(gpu_op.operation);
|
||||
metal_node.src_tensors_ids.resize(gpu_op.input_ids.size());
|
||||
for (int j = 0; j < gpu_op.input_ids.size(); ++j) {
|
||||
int id = gpu_op.input_ids[j];
|
||||
if (id >= 0) {
|
||||
metal_node.src_tensors_ids[j] = id;
|
||||
} else {
|
||||
metal_node.src_tensors_ids[j] = mapping_to_global_ids[-(id + 1)];
|
||||
}
|
||||
metal_node.dst_tensors_ids.resize(gpu_op.output_ids.size());
|
||||
for (int j = 0; j < gpu_op.output_ids.size(); ++j) {
|
||||
int id = gpu_op.output_ids[j];
|
||||
if (id >= 0) {
|
||||
metal_node.dst_tensors_ids[j] = id;
|
||||
} else {
|
||||
metal_node.dst_tensors_ids[j] = mapping_to_global_ids[-(id + 1)];
|
||||
}
|
||||
}
|
||||
metal_node.dst_tensors_ids.resize(gpu_op.output_ids.size());
|
||||
for (int j = 0; j < gpu_op.output_ids.size(); ++j) {
|
||||
int id = gpu_op.output_ids[j];
|
||||
if (id >= 0) {
|
||||
metal_node.dst_tensors_ids[j] = id;
|
||||
} else {
|
||||
metal_node.dst_tensors_ids[j] = mapping_to_global_ids[-(id + 1)];
|
||||
}
|
||||
metal_node.description =
|
||||
node->operation.type + " " + std::to_string(node->id);
|
||||
node_descs.push_back(std::move(metal_node));
|
||||
}
|
||||
} else {
|
||||
for (auto& custom_task : custom_tasks) {
|
||||
NodeDescriptor node_desc;
|
||||
node_desc.task = custom_task;
|
||||
node_desc.description =
|
||||
node->operation.type + "_" + std::to_string(node->id);
|
||||
node_desc.src_tensors_ids = inputs;
|
||||
node_desc.dst_tensors_ids = outputs;
|
||||
node_descs.push_back(node_desc);
|
||||
}
|
||||
}
|
||||
for (auto& node_desc : node_descs) {
|
||||
node_desc.id = node_linear_id++;
|
||||
compiled_model->nodes.push_back(node_desc);
|
||||
metal_node.description =
|
||||
node->operation.type + " " + std::to_string(node->id);
|
||||
metal_node.id = node_linear_id++;
|
||||
compiled_model->nodes.push_back(std::move(metal_node));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
|
@ -168,18 +168,6 @@ macos_unit_test(
|
||||
deps = [":conv_test_lib"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "custom_registry",
|
||||
srcs = ["custom_registry.cc"],
|
||||
hdrs = ["custom_registry.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "depthwise_conv",
|
||||
srcs = ["depthwise_conv.cc"],
|
||||
|
@ -3,6 +3,37 @@ package(
|
||||
licenses = ["notice"], # Apache 2.0
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "default_selector",
|
||||
hdrs = ["default_selector.h"],
|
||||
deps = [
|
||||
":subgraph",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/metal/selectors/default:default_selector", # buildcleaner: keep
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "operation_selector",
|
||||
srcs = ["operation_selector.cc"],
|
||||
hdrs = ["operation_selector.h"],
|
||||
deps = [
|
||||
":default_selector",
|
||||
":subgraph",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"//tensorflow/lite/delegates/gpu/common:winograd_util",
|
||||
"//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor",
|
||||
"//tensorflow/lite/delegates/gpu/metal/kernels",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "subgraph",
|
||||
srcs = ["subgraph.cc"],
|
||||
|
16
tensorflow/lite/delegates/gpu/metal/selectors/default/BUILD
Normal file
16
tensorflow/lite/delegates/gpu/metal/selectors/default/BUILD
Normal file
@ -0,0 +1,16 @@
|
||||
package(
|
||||
default_visibility = ["//visibility:public"],
|
||||
licenses = ["notice"], # Apache 2.0
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "default_selector",
|
||||
srcs = ["default_selector.cc"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/metal/selectors:subgraph",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
@ -1,4 +1,4 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -13,25 +13,24 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/custom_registry.h"
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
|
||||
absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node,
|
||||
const std::vector<ValueId>& inputs,
|
||||
const std::vector<ValueId>& outputs,
|
||||
CalculationsPrecision precision,
|
||||
std::vector<ComputeTaskDescriptorPtr>* tasks) {
|
||||
return absl::UnimplementedError("Unsupported op: " + node->operation.type);
|
||||
absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs, const Node& node,
|
||||
GPUOperationsSubgraph* gpu_subgraph) {
|
||||
return absl::UnimplementedError(
|
||||
absl::StrCat("No selector for ", node.operation.type));
|
||||
}
|
||||
|
||||
} // namespace metal
|
@ -1,4 +1,4 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@ -13,29 +13,26 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_CUSTOM_REGISTRY_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_CUSTOM_REGISTRY_H_
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_DEFAULT_SELECTOR_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_DEFAULT_SELECTOR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
|
||||
// Registers custom operations.
|
||||
absl::Status RegisterCustomOps(const GraphFloat32& graph, const Node* node,
|
||||
const std::vector<ValueId>& inputs,
|
||||
const std::vector<ValueId>& outputs,
|
||||
CalculationsPrecision precision,
|
||||
std::vector<ComputeTaskDescriptorPtr>* tasks);
|
||||
absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs, const Node& node,
|
||||
GPUOperationsSubgraph* gpu_subgraph);
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_CUSTOM_REGISTRY_H_
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_DEFAULT_SELECTOR_H_
|
@ -0,0 +1,529 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/operation_selector.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/add.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/concat.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/mean.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/padding.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/pooling.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/prelu.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/quantize_and_dequantize.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/relu.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/reshape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/resize.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/slice.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/softmax.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/kernels/winograd.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/default_selector.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
namespace {
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectDepthWiseConv(
|
||||
const OperationDef& op_def, const DepthwiseConvolution2DAttributes& attr) {
|
||||
if (CheckDepthWiseConv3x3Stride1x1Support(attr)) {
|
||||
auto gpu_op = DepthWiseConv3x3Stride1x1(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else if (CheckDepthWiseConv3x3Stride2Support(attr)) {
|
||||
auto gpu_op = DepthWiseConv3x3Stride2(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = DepthWiseConvolution(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectConvolutionTransposed(
|
||||
const OperationDef& op_def, const ConvolutionTransposedAttributes& attr,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (CheckConvolutionTransposed4x4Support(attr)) {
|
||||
auto gpu_op = ConvolutionTransposed4x4(op_def, attr, gpu_info);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = ConvolutionTransposed(op_def, attr, gpu_info);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectQuantizeAndDequantize(
|
||||
const OperationDef& op_def, const QuantizeAndDequantizeAttributes& attr) {
|
||||
auto gpu_op = QuantizeAndDequantize(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectPReLU(
|
||||
const OperationDef& op_def, const BHWC& src_shape,
|
||||
const PReLUAttributes& attr) {
|
||||
auto alpha = absl::get_if<Tensor<Linear, DataType::FLOAT32>>(&attr.alpha);
|
||||
if (alpha) {
|
||||
auto gpu_op = PReLU(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
auto alpha3d = absl::get_if<Tensor<HWC, DataType::FLOAT32>>(&attr.alpha);
|
||||
if (!alpha3d) {
|
||||
return {};
|
||||
}
|
||||
if (alpha3d->shape.h != src_shape.h || alpha3d->shape.w != src_shape.w ||
|
||||
alpha3d->shape.c != src_shape.c) {
|
||||
return {};
|
||||
}
|
||||
auto gpu_op = PReLUFull(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectReshape(
|
||||
const OperationDef& op_def, const BHWC& src_shape,
|
||||
const ReshapeAttributes& attr) {
|
||||
if (src_shape.c % 4 == 0 && attr.new_shape.c % 4 == 0) {
|
||||
auto gpu_op = Reshapex4(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Reshape(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectSoftmax(const OperationDef& op_def,
|
||||
const BHWC& src_shape,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (src_shape.w == 1 && src_shape.h == 1) {
|
||||
auto gpu_op = Softmax1x1(op_def, gpu_info);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Softmax(op_def);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectSpaceToDepth(
|
||||
const OperationDef& op_def, const SpaceToDepthAttributes& attr) {
|
||||
auto gpu_op = SpaceToDepth(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectWinograd4x4To36(
|
||||
const OperationDef& op_def, const Winograd4x4To36Attributes& attr,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (gpu_info.IsApple()) {
|
||||
auto gpu_op = Winograd4x4To36(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Winograd4x4To36TileX6(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<ComputeTaskDescriptor> SelectWinograd36To4x4(
|
||||
const OperationDef& op_def, const Winograd36To4x4Attributes& attr,
|
||||
const GpuInfo& gpu_info) {
|
||||
if (gpu_info.IsApple()) {
|
||||
auto gpu_op = Winograd36To4x4(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
auto gpu_op = Winograd36To4x4Tile4x1(op_def, attr);
|
||||
return absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
}
|
||||
|
||||
bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
|
||||
const GpuInfo& gpu_info,
|
||||
const BHWC& dst_shape) {
|
||||
const int tiles_x = DivideRoundUp(dst_shape.w, 4);
|
||||
const int tiles_y = DivideRoundUp(dst_shape.h, 4);
|
||||
const int total_tiles = tiles_x * tiles_y;
|
||||
const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
|
||||
const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
|
||||
int min_depth = 16;
|
||||
const int min_tiles = 32;
|
||||
if (total_tiles >= min_tiles * 8) {
|
||||
min_depth /= 4;
|
||||
min_depth = std::max(min_depth, 8);
|
||||
} else if (total_tiles >= min_tiles * 4) {
|
||||
min_depth /= 2;
|
||||
min_depth = std::max(min_depth, 8);
|
||||
}
|
||||
const bool recommended_channels =
|
||||
src_depth >= min_depth && dst_depth >= min_depth;
|
||||
const bool recommended_hw = total_tiles >= min_tiles;
|
||||
return recommended_channels && recommended_hw;
|
||||
}
|
||||
|
||||
absl::Status WinogradFromNode(const GpuInfo& gpu_info,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs,
|
||||
const OperationDef& op_def,
|
||||
const BHWC& input_shape, const BHWC& output_shape,
|
||||
const Convolution2DAttributes& attr,
|
||||
GPUOperationsSubgraph* gpu_subgraph) {
|
||||
if (!IsSuitableForWinograd4x4To6x6(attr)) {
|
||||
return absl::UnimplementedError("No implementation for this case.");
|
||||
}
|
||||
if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
|
||||
return absl::UnimplementedError("Not recommended for this case.");
|
||||
}
|
||||
|
||||
const int tiles_x = DivideRoundUp(output_shape.w, 4);
|
||||
const int tiles_y = DivideRoundUp(output_shape.h, 4);
|
||||
const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
|
||||
const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
|
||||
TensorDescriptor tensor_desc = op_def.src_tensors[0];
|
||||
gpu_subgraph->new_tensors = {{shape_0, tensor_desc}, {shape_1, tensor_desc}};
|
||||
gpu_subgraph->operations.clear();
|
||||
gpu_subgraph->operations.resize(3);
|
||||
|
||||
OperationDef winograd_up_def;
|
||||
winograd_up_def.precision = op_def.precision;
|
||||
winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
|
||||
winograd_up_def.dst_tensors.push_back(op_def.src_tensors[0]);
|
||||
auto& winograd_up = gpu_subgraph->operations[0];
|
||||
Winograd4x4To36Attributes wino_up_attr;
|
||||
wino_up_attr.padding = attr.padding;
|
||||
winograd_up.operation =
|
||||
SelectWinograd4x4To36(winograd_up_def, wino_up_attr, gpu_info);
|
||||
winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
|
||||
winograd_up.output_ids = {-1};
|
||||
|
||||
OperationDef conv_def;
|
||||
conv_def.precision = op_def.precision;
|
||||
conv_def.src_tensors.push_back(op_def.src_tensors[0]);
|
||||
conv_def.dst_tensors.push_back(op_def.src_tensors[0]);
|
||||
auto& conv = gpu_subgraph->operations[1];
|
||||
conv.input_ids = {-1};
|
||||
conv.output_ids = {-2};
|
||||
auto gpu_op = ConvolutionWino4x4To6x6(conv_def, shape_1, attr, gpu_info);
|
||||
conv.operation = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
OperationDef winograd_down_def;
|
||||
winograd_down_def.precision = op_def.precision;
|
||||
winograd_down_def.src_tensors.push_back(op_def.src_tensors[0]);
|
||||
winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
|
||||
auto& winograd_down = gpu_subgraph->operations[2];
|
||||
winograd_down.input_ids = {-2};
|
||||
winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
|
||||
Winograd36To4x4Attributes wino_down_attr;
|
||||
wino_down_attr.output_shape = outputs[0]->tensor.shape;
|
||||
wino_down_attr.biases = attr.bias;
|
||||
winograd_down.operation =
|
||||
SelectWinograd36To4x4(winograd_down_def, wino_down_attr, gpu_info);
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
|
||||
const OperationDef& op_def,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs,
|
||||
const Node& node,
|
||||
GPUOperationsSubgraph* gpu_subgraph) {
|
||||
std::unique_ptr<ComputeTaskDescriptor>* task =
|
||||
InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
|
||||
auto op_type = OperationTypeFromString(node.operation.type);
|
||||
switch (op_type) {
|
||||
case OperationType::ADD: {
|
||||
if (inputs.size() == 1) {
|
||||
if (node.operation.attributes.has_value()) {
|
||||
auto attr =
|
||||
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
|
||||
auto gpu_op = ElementwiseWithOneInputAndConstantArguent(
|
||||
op_def, op_type, attr.param);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Missing attributes for single input op: " + node.operation.type);
|
||||
}
|
||||
} else if (inputs.size() == 2) {
|
||||
auto gpu_op =
|
||||
ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else { // more than 2 inputs
|
||||
auto gpu_op = Add(op_def);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::CONCAT: {
|
||||
std::vector<BHWC> input_shapes;
|
||||
for (auto& input : inputs) {
|
||||
input_shapes.push_back(input->tensor.shape);
|
||||
}
|
||||
auto gpu_op = Concat(
|
||||
op_def, absl::any_cast<ConcatAttributes>(node.operation.attributes),
|
||||
input_shapes);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::CONVOLUTION_2D: {
|
||||
if (inputs.size() != 1) {
|
||||
return absl::UnimplementedError(
|
||||
"Convolution does not support more than 1 runtime tensor");
|
||||
}
|
||||
auto attr =
|
||||
absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
|
||||
auto input_shape = inputs[0]->tensor.shape;
|
||||
auto output_shape = outputs[0]->tensor.shape;
|
||||
if (WinogradFromNode(gpu_info, inputs, outputs, op_def, input_shape,
|
||||
output_shape, attr, gpu_subgraph)
|
||||
.ok()) {
|
||||
return absl::OkStatus();
|
||||
} else {
|
||||
auto gpu_op = ConvolutionGeneric(op_def, output_shape, attr, gpu_info);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::CONVOLUTION_TRANSPOSED:
|
||||
if (inputs.size() != 1) {
|
||||
return absl::UnimplementedError(
|
||||
"Convolution Transposed does not support more than 1 runtime "
|
||||
"tensor");
|
||||
}
|
||||
*task = SelectConvolutionTransposed(
|
||||
op_def,
|
||||
absl::any_cast<ConvolutionTransposedAttributes>(
|
||||
node.operation.attributes),
|
||||
gpu_info);
|
||||
break;
|
||||
case OperationType::DEPTHWISE_CONVOLUTION:
|
||||
if (inputs.size() != 1) {
|
||||
return absl::UnimplementedError(
|
||||
"DepthWise Convolution does not support more than 1 runtime "
|
||||
"tensor");
|
||||
}
|
||||
*task = SelectDepthWiseConv(
|
||||
op_def, absl::any_cast<DepthwiseConvolution2DAttributes>(
|
||||
node.operation.attributes));
|
||||
break;
|
||||
case OperationType::FULLY_CONNECTED: {
|
||||
auto gpu_op = FullyConnected(
|
||||
op_def,
|
||||
absl::any_cast<FullyConnectedAttributes>(node.operation.attributes),
|
||||
gpu_info);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::MAX_UNPOOLING_2D: {
|
||||
auto gpu_op = MaxUnpooling(
|
||||
op_def,
|
||||
absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::MEAN: {
|
||||
auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
|
||||
if (attr.dims != std::set<Axis>({Axis::HEIGHT, Axis::WIDTH})) {
|
||||
return absl::UnimplementedError("Mean supports HW axis only in Metal");
|
||||
}
|
||||
auto gpu_op = Mean(op_def, attr);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::MUL:
|
||||
if (inputs.size() == 1) {
|
||||
if (node.operation.attributes.has_value()) {
|
||||
auto attr =
|
||||
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
|
||||
auto gpu_op = ElementwiseWithOneInputAndConstantArguent(
|
||||
op_def, op_type, attr.param);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Missing attributes for single input op: " + node.operation.type);
|
||||
}
|
||||
} else if (inputs.size() == 2) {
|
||||
auto gpu_op =
|
||||
ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
break;
|
||||
case OperationType::PAD: {
|
||||
auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
|
||||
if (attr.appended.b != 0 || attr.prepended.b != 0) {
|
||||
return absl::UnimplementedError("Padding for BATCH is not supported.");
|
||||
}
|
||||
auto gpu_op = Padding(op_def, attr);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::POOLING_2D: {
|
||||
auto attr =
|
||||
absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
|
||||
auto pooling_op_def = op_def;
|
||||
pooling_op_def.dst_tensors = {op_def.dst_tensors[0]};
|
||||
auto gpu_op = Pooling(op_def, attr, false);
|
||||
gpu_subgraph->operations[0].operation =
|
||||
absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
gpu_subgraph->operations[0].input_ids = {static_cast<int>(inputs[0]->id)};
|
||||
gpu_subgraph->operations[0].output_ids = {
|
||||
static_cast<int>(outputs[0]->id)};
|
||||
if (attr.type == PoolingType::MAX && attr.output_indices) {
|
||||
gpu_subgraph->operations.push_back({});
|
||||
auto gpu_ind_op = Pooling(op_def, attr, true);
|
||||
gpu_subgraph->operations[1].operation =
|
||||
absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_ind_op));
|
||||
gpu_subgraph->operations[1].input_ids = {
|
||||
static_cast<int>(inputs[0]->id)};
|
||||
gpu_subgraph->operations[1].output_ids = {
|
||||
static_cast<int>(outputs[1]->id)};
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::PRELU: {
|
||||
const auto src_shape = inputs[0]->tensor.shape;
|
||||
*task = SelectPReLU(
|
||||
op_def, src_shape,
|
||||
absl::any_cast<PReLUAttributes>(node.operation.attributes));
|
||||
break;
|
||||
}
|
||||
case OperationType::RELU: {
|
||||
auto gpu_op = ReLU(
|
||||
op_def, absl::any_cast<ReLUAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::QUANTIZE_AND_DEQUANTIZE:
|
||||
*task = SelectQuantizeAndDequantize(
|
||||
op_def, absl::any_cast<QuantizeAndDequantizeAttributes>(
|
||||
node.operation.attributes));
|
||||
break;
|
||||
case OperationType::RESHAPE: {
|
||||
const auto src_shape = inputs[0]->tensor.shape;
|
||||
*task = SelectReshape(
|
||||
op_def, src_shape,
|
||||
absl::any_cast<ReshapeAttributes>(node.operation.attributes));
|
||||
break;
|
||||
}
|
||||
case OperationType::RESIZE: {
|
||||
auto gpu_op =
|
||||
Resize(op_def,
|
||||
absl::any_cast<Resize2DAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::SLICE: {
|
||||
auto gpu_op = Slice(
|
||||
op_def, absl::any_cast<SliceAttributes>(node.operation.attributes));
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::SOFTMAX: {
|
||||
auto attr = absl::any_cast<SoftmaxAttributes>(node.operation.attributes);
|
||||
if (attr.axis != Axis::CHANNELS) {
|
||||
return absl::UnimplementedError(
|
||||
"Softmax supports only CHANNELS dimension");
|
||||
}
|
||||
const auto src_shape = inputs[0]->tensor.shape;
|
||||
*task = SelectSoftmax(op_def, src_shape, gpu_info);
|
||||
break;
|
||||
}
|
||||
case OperationType::SPACE_TO_DEPTH:
|
||||
*task = SelectSpaceToDepth(op_def, absl::any_cast<SpaceToDepthAttributes>(
|
||||
node.operation.attributes));
|
||||
break;
|
||||
case OperationType::ABS:
|
||||
case OperationType::COPY:
|
||||
case OperationType::COS:
|
||||
case OperationType::ELU:
|
||||
case OperationType::EXP:
|
||||
case OperationType::HARD_SWISH:
|
||||
case OperationType::LOG:
|
||||
case OperationType::NEG:
|
||||
case OperationType::RSQRT:
|
||||
case OperationType::SIGMOID:
|
||||
case OperationType::SIN:
|
||||
case OperationType::SQRT:
|
||||
case OperationType::SQUARE:
|
||||
case OperationType::TANH: {
|
||||
auto gpu_op = ElementwiseWithOneInput(op_def, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
break;
|
||||
}
|
||||
case OperationType::DIV:
|
||||
case OperationType::MAXIMUM:
|
||||
case OperationType::MINIMUM:
|
||||
case OperationType::POW:
|
||||
case OperationType::SQUARED_DIFF:
|
||||
case OperationType::SUB: {
|
||||
if (inputs.size() == 1) {
|
||||
if (node.operation.attributes.has_value()) {
|
||||
auto attr =
|
||||
absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
|
||||
auto gpu_op = ElementwiseWithOneInputAndConstantArguent(
|
||||
op_def, op_type, attr.param);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Missing attributes for single input op: " + node.operation.type);
|
||||
}
|
||||
} else if (inputs.size() == 2) {
|
||||
auto gpu_op =
|
||||
ElementwiseWithTwoInputs(op_def, inputs[1]->tensor.shape, op_type);
|
||||
*task = absl::make_unique<ComputeTaskDescriptor>(std::move(gpu_op));
|
||||
}
|
||||
} break;
|
||||
case OperationType::BATCH_NORMALIZATION:
|
||||
case OperationType::BATCH_TO_SPACE:
|
||||
case OperationType::BATCHED_MATMUL:
|
||||
case OperationType::CONST:
|
||||
case OperationType::LSTM:
|
||||
// TODO(b/162763635): implement MeanStddevNormalization for Metal.
|
||||
case OperationType::MEAN_STDDEV_NORMALIZATION:
|
||||
case OperationType::REDUCE_MAXIMUM:
|
||||
case OperationType::REDUCE_MINIMUM:
|
||||
case OperationType::REDUCE_PRODUCT:
|
||||
case OperationType::REDUCE_SUM:
|
||||
// comparison operations
|
||||
case OperationType::LESS:
|
||||
case OperationType::LESS_EQUAL:
|
||||
case OperationType::EQUAL:
|
||||
case OperationType::NOT_EQUAL:
|
||||
case OperationType::GREATER:
|
||||
case OperationType::GREATER_EQUAL:
|
||||
case OperationType::SPACE_TO_BATCH:
|
||||
case OperationType::TRANSPOSE:
|
||||
return absl::UnimplementedError("Unsupported op: " + node.operation.type);
|
||||
default:
|
||||
return SelectDefault(gpu_info, op_def, inputs, outputs, node,
|
||||
gpu_subgraph);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
@ -0,0 +1,39 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_OPERATION_SELECTOR_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_OPERATION_SELECTOR_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/metal/selectors/subgraph.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace metal {
|
||||
|
||||
absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
|
||||
const OperationDef& op_def,
|
||||
const std::vector<Value*>& inputs,
|
||||
const std::vector<Value*>& outputs,
|
||||
const Node& node,
|
||||
GPUOperationsSubgraph* gpu_subgraph);
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_METAL_SELECTORS_OPERATION_SELECTOR_H_
|
Loading…
Reference in New Issue
Block a user