Added new operation types(for reduce operations).

Added OpenCL selection of reduce ops.

PiperOrigin-RevId: 329362643
Change-Id: I9d44d2cc1471ae2a57d0d5c731a526dbf600382c
This commit is contained in:
Raman Sarokin 2020-08-31 13:08:13 -07:00 committed by TensorFlower Gardener
parent e1c867f994
commit 0377e1f434
8 changed files with 63 additions and 10 deletions

View File

@ -29,13 +29,13 @@ namespace {
std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
const OperationType& op_type) {
std::string c = GetCommonDefines(op_def.precision);
if (op_type == OperationType::ADD) {
if (op_type == OperationType::REDUCE_SUM) {
c += "#define OP(a, b) ((a) + (b))\n";
} else if (op_type == OperationType::MUL) {
} else if (op_type == OperationType::REDUCE_PRODUCT) {
c += "#define OP(a, b) ((a) * (b))\n";
} else if (op_type == OperationType::MAXIMUM) {
} else if (op_type == OperationType::REDUCE_MAXIMUM) {
c += "#define OP(a, b) max(a, b)\n";
} else if (op_type == OperationType::MINIMUM) {
} else if (op_type == OperationType::REDUCE_MINIMUM) {
c += "#define OP(a, b) min(a, b)\n";
}
c += "__kernel void main_function($0) {\n";
@ -43,9 +43,9 @@ std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
c += " int Y = get_global_id(1);\n";
c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
"return;\n";
if (op_type == OperationType::ADD) {
if (op_type == OperationType::REDUCE_SUM) {
c += " FLT4 reduced = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
} else if (op_type == OperationType::MUL) {
} else if (op_type == OperationType::REDUCE_PRODUCT) {
c += " FLT4 reduced = (FLT4)(1.0f, 1.0f, 1.0f, 1.0f);\n";
} else {
c += " FLT4 V0 = args.src_tensor.Read(X, Y, 0);\n";
@ -80,6 +80,7 @@ std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
} // namespace
GPUOperation CreateReduce(const OperationDef& definition,
const ReduceAttributes& attr,
const OperationType& op_type) {
GPUOperation op(definition);
auto src_desc = definition.src_tensors[0];

View File

@ -24,6 +24,7 @@ namespace gpu {
namespace cl {
GPUOperation CreateReduce(const OperationDef& definition,
const ReduceAttributes& attr,
const OperationType& op_type);
} // namespace cl

View File

@ -37,6 +37,8 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 1, 5);
src_tensor.data = {1.1, 2.1, 0.7, 0.3, 1.2, 3.1, 4.1, 0.0, 1.0, 4.4};
ReduceAttributes attr;
attr.axis = Axis::CHANNELS;
for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
@ -47,7 +49,8 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
GPUOperation operation = CreateReduce(op_def, OperationType::ADD);
GPUOperation operation =
CreateReduce(op_def, attr, OperationType::REDUCE_SUM);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 1, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {5.4f, 12.6f}));
@ -59,6 +62,8 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) {
TensorFloat32 src_tensor;
src_tensor.shape = BHWC(1, 2, 1, 2);
src_tensor.data = {1.1, 2.0, 3.1, 4.0};
ReduceAttributes attr;
attr.axis = Axis::CHANNELS;
for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
@ -69,7 +74,8 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
GPUOperation operation = CreateReduce(op_def, OperationType::MUL);
GPUOperation operation =
CreateReduce(op_def, attr, OperationType::REDUCE_PRODUCT);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 1, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {2.2f, 12.4f}));
@ -82,6 +88,8 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) {
src_tensor.shape = BHWC(1, 2, 1, 6);
src_tensor.data = {1.1, 2.0, -0.3, -100.0, 32.6, 1.1,
-3.1, -4.0, -5.0, -7.0, -2.0, -100.0};
ReduceAttributes attr;
attr.axis = Axis::CHANNELS;
for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
@ -92,7 +100,8 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
GPUOperation operation = CreateReduce(op_def, OperationType::MAXIMUM);
GPUOperation operation =
CreateReduce(op_def, attr, OperationType::REDUCE_MAXIMUM);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 1, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {32.6f, -2.0f}));
@ -105,6 +114,8 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) {
src_tensor.shape = BHWC(1, 2, 1, 6);
src_tensor.data = {1.1, 2.0, -0.3, -100.0, 32.6, 1.1,
-3.1, -4.0, -5.0, -7.0, -2.0, 100.0};
ReduceAttributes attr;
attr.axis = Axis::CHANNELS;
for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
@ -115,7 +126,8 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) {
op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
TensorFloat32 dst_tensor;
GPUOperation operation = CreateReduce(op_def, OperationType::MINIMUM);
GPUOperation operation =
CreateReduce(op_def, attr, OperationType::REDUCE_MINIMUM);
ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
BHWC(1, 2, 1, 1), &dst_tensor));
EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {-100.0f, -7.0f}));

View File

@ -110,6 +110,7 @@ cc_library(
"//tensorflow/lite/delegates/gpu/cl/kernels:elementwise",
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
"//tensorflow/lite/delegates/gpu/cl/kernels:mean_stddev_normalization",
"//tensorflow/lite/delegates/gpu/cl/kernels:reduce",
"//tensorflow/lite/delegates/gpu/cl/selectors:default_selector",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:model",

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/reduce.h"
#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h"
#include "tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h"
@ -364,6 +365,19 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info,
return absl::UnimplementedError(absl::StrCat(
"No support of ", node.operation.type, " with this parameters"));
}
case OperationType::REDUCE_MAXIMUM:
case OperationType::REDUCE_MINIMUM:
case OperationType::REDUCE_PRODUCT:
case OperationType::REDUCE_SUM: {
auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
if (attr.axis != Axis::CHANNELS) {
return absl::UnimplementedError(
"Currently we can reduce only in channels dimension.");
}
GPUOperation operation = CreateReduce(op_def, attr, op_type);
*gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
return absl::OkStatus();
}
default:
return SelectDefault(device_info, op_def, hints, inputs, outputs, node,
gpu_subgraph);

View File

@ -132,6 +132,14 @@ std::string ToString(enum OperationType op) {
return "prelu";
case OperationType::QUANTIZE_AND_DEQUANTIZE:
return "quantize_and_dequantize";
case OperationType::REDUCE_MAXIMUM:
return "reduce_maximum";
case OperationType::REDUCE_MINIMUM:
return "reduce_minimum";
case OperationType::REDUCE_PRODUCT:
return "reduce_product";
case OperationType::REDUCE_SUM:
return "reduce_sum";
case OperationType::RELU:
return "relu";
case OperationType::RESHAPE:
@ -201,6 +209,10 @@ OperationType OperationTypeFromString(const std::string& name) {
{"pow", OperationType::POW},
{"prelu", OperationType::PRELU},
{"quantize_and_dequantize", OperationType::QUANTIZE_AND_DEQUANTIZE},
{"reduce_maximum", OperationType::REDUCE_MAXIMUM},
{"reduce_minimum", OperationType::REDUCE_MINIMUM},
{"reduce_product", OperationType::REDUCE_PRODUCT},
{"reduce_sum", OperationType::REDUCE_SUM},
{"relu", OperationType::RELU},
{"resize", OperationType::RESIZE},
{"reshape", OperationType::RESHAPE},

View File

@ -63,6 +63,10 @@ enum class OperationType {
PRELU,
// Used to accurately run inference on quantized models.
QUANTIZE_AND_DEQUANTIZE,
REDUCE_MAXIMUM,
REDUCE_MINIMUM,
REDUCE_PRODUCT,
REDUCE_SUM,
RELU,
RESHAPE,
RESIZE,
@ -359,6 +363,10 @@ struct PReLUAttributes {
alpha;
};
struct ReduceAttributes {
Axis axis = Axis::UNKNOWN;
};
struct SoftmaxAttributes {
Axis axis = Axis::UNKNOWN;
};

View File

@ -408,6 +408,10 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
case OperationType::LSTM:
// TODO(b/162763635): implement MeanStddevNormalization for Metal.
case OperationType::MEAN_STDDEV_NORMALIZATION:
case OperationType::REDUCE_MAXIMUM:
case OperationType::REDUCE_MINIMUM:
case OperationType::REDUCE_PRODUCT:
case OperationType::REDUCE_SUM:
case OperationType::SPACE_TO_BATCH:
case OperationType::TRANSPOSE:
case OperationType::UNKNOWN: