Added new operation types(for reduce operations).
Added OpenCL selection of reduce ops. PiperOrigin-RevId: 329362643 Change-Id: I9d44d2cc1471ae2a57d0d5c731a526dbf600382c
This commit is contained in:
parent
e1c867f994
commit
0377e1f434
@ -29,13 +29,13 @@ namespace {
|
||||
std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
|
||||
const OperationType& op_type) {
|
||||
std::string c = GetCommonDefines(op_def.precision);
|
||||
if (op_type == OperationType::ADD) {
|
||||
if (op_type == OperationType::REDUCE_SUM) {
|
||||
c += "#define OP(a, b) ((a) + (b))\n";
|
||||
} else if (op_type == OperationType::MUL) {
|
||||
} else if (op_type == OperationType::REDUCE_PRODUCT) {
|
||||
c += "#define OP(a, b) ((a) * (b))\n";
|
||||
} else if (op_type == OperationType::MAXIMUM) {
|
||||
} else if (op_type == OperationType::REDUCE_MAXIMUM) {
|
||||
c += "#define OP(a, b) max(a, b)\n";
|
||||
} else if (op_type == OperationType::MINIMUM) {
|
||||
} else if (op_type == OperationType::REDUCE_MINIMUM) {
|
||||
c += "#define OP(a, b) min(a, b)\n";
|
||||
}
|
||||
c += "__kernel void main_function($0) {\n";
|
||||
@ -43,9 +43,9 @@ std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
|
||||
c += " int Y = get_global_id(1);\n";
|
||||
c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
|
||||
"return;\n";
|
||||
if (op_type == OperationType::ADD) {
|
||||
if (op_type == OperationType::REDUCE_SUM) {
|
||||
c += " FLT4 reduced = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
|
||||
} else if (op_type == OperationType::MUL) {
|
||||
} else if (op_type == OperationType::REDUCE_PRODUCT) {
|
||||
c += " FLT4 reduced = (FLT4)(1.0f, 1.0f, 1.0f, 1.0f);\n";
|
||||
} else {
|
||||
c += " FLT4 V0 = args.src_tensor.Read(X, Y, 0);\n";
|
||||
@ -80,6 +80,7 @@ std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
|
||||
} // namespace
|
||||
|
||||
GPUOperation CreateReduce(const OperationDef& definition,
|
||||
const ReduceAttributes& attr,
|
||||
const OperationType& op_type) {
|
||||
GPUOperation op(definition);
|
||||
auto src_desc = definition.src_tensors[0];
|
||||
|
@ -24,6 +24,7 @@ namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
GPUOperation CreateReduce(const OperationDef& definition,
|
||||
const ReduceAttributes& attr,
|
||||
const OperationType& op_type);
|
||||
|
||||
} // namespace cl
|
||||
|
@ -37,6 +37,8 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 1, 5);
|
||||
src_tensor.data = {1.1, 2.1, 0.7, 0.3, 1.2, 3.1, 4.1, 0.0, 1.0, 4.4};
|
||||
ReduceAttributes attr;
|
||||
attr.axis = Axis::CHANNELS;
|
||||
|
||||
for (auto storage : env_.GetSupportedStorages()) {
|
||||
for (auto precision : env_.GetSupportedPrecisions()) {
|
||||
@ -47,7 +49,8 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
GPUOperation operation = CreateReduce(op_def, OperationType::ADD);
|
||||
GPUOperation operation =
|
||||
CreateReduce(op_def, attr, OperationType::REDUCE_SUM);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 1, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {5.4f, 12.6f}));
|
||||
@ -59,6 +62,8 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) {
|
||||
TensorFloat32 src_tensor;
|
||||
src_tensor.shape = BHWC(1, 2, 1, 2);
|
||||
src_tensor.data = {1.1, 2.0, 3.1, 4.0};
|
||||
ReduceAttributes attr;
|
||||
attr.axis = Axis::CHANNELS;
|
||||
|
||||
for (auto storage : env_.GetSupportedStorages()) {
|
||||
for (auto precision : env_.GetSupportedPrecisions()) {
|
||||
@ -69,7 +74,8 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
GPUOperation operation = CreateReduce(op_def, OperationType::MUL);
|
||||
GPUOperation operation =
|
||||
CreateReduce(op_def, attr, OperationType::REDUCE_PRODUCT);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 1, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {2.2f, 12.4f}));
|
||||
@ -82,6 +88,8 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) {
|
||||
src_tensor.shape = BHWC(1, 2, 1, 6);
|
||||
src_tensor.data = {1.1, 2.0, -0.3, -100.0, 32.6, 1.1,
|
||||
-3.1, -4.0, -5.0, -7.0, -2.0, -100.0};
|
||||
ReduceAttributes attr;
|
||||
attr.axis = Axis::CHANNELS;
|
||||
|
||||
for (auto storage : env_.GetSupportedStorages()) {
|
||||
for (auto precision : env_.GetSupportedPrecisions()) {
|
||||
@ -92,7 +100,8 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
GPUOperation operation = CreateReduce(op_def, OperationType::MAXIMUM);
|
||||
GPUOperation operation =
|
||||
CreateReduce(op_def, attr, OperationType::REDUCE_MAXIMUM);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 1, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {32.6f, -2.0f}));
|
||||
@ -105,6 +114,8 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) {
|
||||
src_tensor.shape = BHWC(1, 2, 1, 6);
|
||||
src_tensor.data = {1.1, 2.0, -0.3, -100.0, 32.6, 1.1,
|
||||
-3.1, -4.0, -5.0, -7.0, -2.0, 100.0};
|
||||
ReduceAttributes attr;
|
||||
attr.axis = Axis::CHANNELS;
|
||||
|
||||
for (auto storage : env_.GetSupportedStorages()) {
|
||||
for (auto precision : env_.GetSupportedPrecisions()) {
|
||||
@ -115,7 +126,8 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) {
|
||||
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
|
||||
TensorFloat32 dst_tensor;
|
||||
GPUOperation operation = CreateReduce(op_def, OperationType::MINIMUM);
|
||||
GPUOperation operation =
|
||||
CreateReduce(op_def, attr, OperationType::REDUCE_MINIMUM);
|
||||
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
|
||||
BHWC(1, 2, 1, 1), &dst_tensor));
|
||||
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {-100.0f, -7.0f}));
|
||||
|
@ -110,6 +110,7 @@ cc_library(
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:elementwise",
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:mean_stddev_normalization",
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:reduce",
|
||||
"//tensorflow/lite/delegates/gpu/cl/selectors:default_selector",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/reduce.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h"
|
||||
@ -364,6 +365,19 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info,
|
||||
return absl::UnimplementedError(absl::StrCat(
|
||||
"No support of ", node.operation.type, " with this parameters"));
|
||||
}
|
||||
case OperationType::REDUCE_MAXIMUM:
|
||||
case OperationType::REDUCE_MINIMUM:
|
||||
case OperationType::REDUCE_PRODUCT:
|
||||
case OperationType::REDUCE_SUM: {
|
||||
auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
|
||||
if (attr.axis != Axis::CHANNELS) {
|
||||
return absl::UnimplementedError(
|
||||
"Currently we can reduce only in channels dimension.");
|
||||
}
|
||||
GPUOperation operation = CreateReduce(op_def, attr, op_type);
|
||||
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
default:
|
||||
return SelectDefault(device_info, op_def, hints, inputs, outputs, node,
|
||||
gpu_subgraph);
|
||||
|
@ -132,6 +132,14 @@ std::string ToString(enum OperationType op) {
|
||||
return "prelu";
|
||||
case OperationType::QUANTIZE_AND_DEQUANTIZE:
|
||||
return "quantize_and_dequantize";
|
||||
case OperationType::REDUCE_MAXIMUM:
|
||||
return "reduce_maximum";
|
||||
case OperationType::REDUCE_MINIMUM:
|
||||
return "reduce_minimum";
|
||||
case OperationType::REDUCE_PRODUCT:
|
||||
return "reduce_product";
|
||||
case OperationType::REDUCE_SUM:
|
||||
return "reduce_sum";
|
||||
case OperationType::RELU:
|
||||
return "relu";
|
||||
case OperationType::RESHAPE:
|
||||
@ -201,6 +209,10 @@ OperationType OperationTypeFromString(const std::string& name) {
|
||||
{"pow", OperationType::POW},
|
||||
{"prelu", OperationType::PRELU},
|
||||
{"quantize_and_dequantize", OperationType::QUANTIZE_AND_DEQUANTIZE},
|
||||
{"reduce_maximum", OperationType::REDUCE_MAXIMUM},
|
||||
{"reduce_minimum", OperationType::REDUCE_MINIMUM},
|
||||
{"reduce_product", OperationType::REDUCE_PRODUCT},
|
||||
{"reduce_sum", OperationType::REDUCE_SUM},
|
||||
{"relu", OperationType::RELU},
|
||||
{"resize", OperationType::RESIZE},
|
||||
{"reshape", OperationType::RESHAPE},
|
||||
|
@ -63,6 +63,10 @@ enum class OperationType {
|
||||
PRELU,
|
||||
// Used to accurately run inference on quantized models.
|
||||
QUANTIZE_AND_DEQUANTIZE,
|
||||
REDUCE_MAXIMUM,
|
||||
REDUCE_MINIMUM,
|
||||
REDUCE_PRODUCT,
|
||||
REDUCE_SUM,
|
||||
RELU,
|
||||
RESHAPE,
|
||||
RESIZE,
|
||||
@ -359,6 +363,10 @@ struct PReLUAttributes {
|
||||
alpha;
|
||||
};
|
||||
|
||||
struct ReduceAttributes {
|
||||
Axis axis = Axis::UNKNOWN;
|
||||
};
|
||||
|
||||
struct SoftmaxAttributes {
|
||||
Axis axis = Axis::UNKNOWN;
|
||||
};
|
||||
|
@ -408,6 +408,10 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
|
||||
case OperationType::LSTM:
|
||||
// TODO(b/162763635): implement MeanStddevNormalization for Metal.
|
||||
case OperationType::MEAN_STDDEV_NORMALIZATION:
|
||||
case OperationType::REDUCE_MAXIMUM:
|
||||
case OperationType::REDUCE_MINIMUM:
|
||||
case OperationType::REDUCE_PRODUCT:
|
||||
case OperationType::REDUCE_SUM:
|
||||
case OperationType::SPACE_TO_BATCH:
|
||||
case OperationType::TRANSPOSE:
|
||||
case OperationType::UNKNOWN:
|
||||
|
Loading…
x
Reference in New Issue
Block a user