Added new operation types(for reduce operations).

Added OpenCL selection of reduce ops. PiperOrigin-RevId: 329362643 Change-Id: I9d44d2cc1471ae2a57d0d5c731a526dbf600382c
2020-08-31 13:08:13 -07:00 · 2020-08-31 13:08:13 -07:00 · 0377e1f434
commit 0377e1f434
parent e1c867f994
8 changed files with 63 additions and 10 deletions
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reduce.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reduce.cc
@ -29,13 +29,13 @@ namespace {
 std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
                                        const OperationType& op_type) {
  std::string c = GetCommonDefines(op_def.precision);
-  if (op_type == OperationType::ADD) {
+  if (op_type == OperationType::REDUCE_SUM) {
    c += "#define OP(a, b) ((a) + (b))\n";
-  } else if (op_type == OperationType::MUL) {
+  } else if (op_type == OperationType::REDUCE_PRODUCT) {
    c += "#define OP(a, b) ((a) * (b))\n";
-  } else if (op_type == OperationType::MAXIMUM) {
+  } else if (op_type == OperationType::REDUCE_MAXIMUM) {
    c += "#define OP(a, b) max(a, b)\n";
-  } else if (op_type == OperationType::MINIMUM) {
+  } else if (op_type == OperationType::REDUCE_MINIMUM) {
    c += "#define OP(a, b) min(a, b)\n";
  }
  c += "__kernel void main_function($0) {\n";
@ -43,9 +43,9 @@ std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
  c += "  int Y = get_global_id(1);\n";
  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
       "return;\n";
-  if (op_type == OperationType::ADD) {
+  if (op_type == OperationType::REDUCE_SUM) {
    c += "  FLT4 reduced = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  } else if (op_type == OperationType::MUL) {
+  } else if (op_type == OperationType::REDUCE_PRODUCT) {
    c += "  FLT4 reduced = (FLT4)(1.0f, 1.0f, 1.0f, 1.0f);\n";
  } else {
    c += "  FLT4 V0 = args.src_tensor.Read(X, Y, 0);\n";
@ -80,6 +80,7 @@ std::string GetReduceChannelsKernelCode(const OperationDef& op_def,
 }  // namespace

 GPUOperation CreateReduce(const OperationDef& definition,
+                          const ReduceAttributes& attr,
                          const OperationType& op_type) {
  GPUOperation op(definition);
  auto src_desc = definition.src_tensors[0];
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reduce.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reduce.h
@ -24,6 +24,7 @@ namespace gpu {
 namespace cl {

 GPUOperation CreateReduce(const OperationDef& definition,
+                          const ReduceAttributes& attr,
                          const OperationType& op_type);

 }  // namespace cl
--- a/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/reduce_test.cc
@ -37,6 +37,8 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) {
  TensorFloat32 src_tensor;
  src_tensor.shape = BHWC(1, 2, 1, 5);
  src_tensor.data = {1.1, 2.1, 0.7, 0.3, 1.2, 3.1, 4.1, 0.0, 1.0, 4.4};
+  ReduceAttributes attr;
+  attr.axis = Axis::CHANNELS;

  for (auto storage : env_.GetSupportedStorages()) {
    for (auto precision : env_.GetSupportedPrecisions()) {
@ -47,7 +49,8 @@ TEST_F(OpenCLOperationTest, ReduceSumChannels) {
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreateReduce(op_def, OperationType::ADD);
+      GPUOperation operation =
+          CreateReduce(op_def, attr, OperationType::REDUCE_SUM);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
                                    BHWC(1, 2, 1, 1), &dst_tensor));
      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {5.4f, 12.6f}));
@ -59,6 +62,8 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) {
  TensorFloat32 src_tensor;
  src_tensor.shape = BHWC(1, 2, 1, 2);
  src_tensor.data = {1.1, 2.0, 3.1, 4.0};
+  ReduceAttributes attr;
+  attr.axis = Axis::CHANNELS;

  for (auto storage : env_.GetSupportedStorages()) {
    for (auto precision : env_.GetSupportedPrecisions()) {
@ -69,7 +74,8 @@ TEST_F(OpenCLOperationTest, ReduceProductChannels) {
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreateReduce(op_def, OperationType::MUL);
+      GPUOperation operation =
+          CreateReduce(op_def, attr, OperationType::REDUCE_PRODUCT);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
                                    BHWC(1, 2, 1, 1), &dst_tensor));
      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {2.2f, 12.4f}));
@ -82,6 +88,8 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) {
  src_tensor.shape = BHWC(1, 2, 1, 6);
  src_tensor.data = {1.1,  2.0,  -0.3, -100.0, 32.6, 1.1,
                     -3.1, -4.0, -5.0, -7.0,   -2.0, -100.0};
+  ReduceAttributes attr;
+  attr.axis = Axis::CHANNELS;

  for (auto storage : env_.GetSupportedStorages()) {
    for (auto precision : env_.GetSupportedPrecisions()) {
@ -92,7 +100,8 @@ TEST_F(OpenCLOperationTest, ReduceMaxChannels) {
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreateReduce(op_def, OperationType::MAXIMUM);
+      GPUOperation operation =
+          CreateReduce(op_def, attr, OperationType::REDUCE_MAXIMUM);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
                                    BHWC(1, 2, 1, 1), &dst_tensor));
      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {32.6f, -2.0f}));
@ -105,6 +114,8 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) {
  src_tensor.shape = BHWC(1, 2, 1, 6);
  src_tensor.data = {1.1,  2.0,  -0.3, -100.0, 32.6, 1.1,
                     -3.1, -4.0, -5.0, -7.0,   -2.0, 100.0};
+  ReduceAttributes attr;
+  attr.axis = Axis::CHANNELS;

  for (auto storage : env_.GetSupportedStorages()) {
    for (auto precision : env_.GetSupportedPrecisions()) {
@ -115,7 +126,8 @@ TEST_F(OpenCLOperationTest, ReduceMinChannels) {
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreateReduce(op_def, OperationType::MINIMUM);
+      GPUOperation operation =
+          CreateReduce(op_def, attr, OperationType::REDUCE_MINIMUM);
      ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation,
                                    BHWC(1, 2, 1, 1), &dst_tensor));
      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {-100.0f, -7.0f}));
--- a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD
@ -110,6 +110,7 @@ cc_library(
        "//tensorflow/lite/delegates/gpu/cl/kernels:elementwise",
        "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
        "//tensorflow/lite/delegates/gpu/cl/kernels:mean_stddev_normalization",
+        "//tensorflow/lite/delegates/gpu/cl/kernels:reduce",
        "//tensorflow/lite/delegates/gpu/cl/selectors:default_selector",
        "//tensorflow/lite/delegates/gpu/common:data_type",
        "//tensorflow/lite/delegates/gpu/common:model",
--- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc
@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/reduce.h"
 #include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
 #include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h"
 #include "tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h"
@ -364,6 +365,19 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info,
      return absl::UnimplementedError(absl::StrCat(
          "No support of ", node.operation.type, " with this parameters"));
    }
+    case OperationType::REDUCE_MAXIMUM:
+    case OperationType::REDUCE_MINIMUM:
+    case OperationType::REDUCE_PRODUCT:
+    case OperationType::REDUCE_SUM: {
+      auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
+      if (attr.axis != Axis::CHANNELS) {
+        return absl::UnimplementedError(
+            "Currently we can reduce only in channels dimension.");
+      }
+      GPUOperation operation = CreateReduce(op_def, attr, op_type);
+      *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
+      return absl::OkStatus();
+    }
    default:
      return SelectDefault(device_info, op_def, hints, inputs, outputs, node,
                           gpu_subgraph);
--- a/tensorflow/lite/delegates/gpu/common/operations.cc
+++ b/tensorflow/lite/delegates/gpu/common/operations.cc
@ -132,6 +132,14 @@ std::string ToString(enum OperationType op) {
      return "prelu";
    case OperationType::QUANTIZE_AND_DEQUANTIZE:
      return "quantize_and_dequantize";
+    case OperationType::REDUCE_MAXIMUM:
+      return "reduce_maximum";
+    case OperationType::REDUCE_MINIMUM:
+      return "reduce_minimum";
+    case OperationType::REDUCE_PRODUCT:
+      return "reduce_product";
+    case OperationType::REDUCE_SUM:
+      return "reduce_sum";
    case OperationType::RELU:
      return "relu";
    case OperationType::RESHAPE:
@ -201,6 +209,10 @@ OperationType OperationTypeFromString(const std::string& name) {
          {"pow", OperationType::POW},
          {"prelu", OperationType::PRELU},
          {"quantize_and_dequantize", OperationType::QUANTIZE_AND_DEQUANTIZE},
+          {"reduce_maximum", OperationType::REDUCE_MAXIMUM},
+          {"reduce_minimum", OperationType::REDUCE_MINIMUM},
+          {"reduce_product", OperationType::REDUCE_PRODUCT},
+          {"reduce_sum", OperationType::REDUCE_SUM},
          {"relu", OperationType::RELU},
          {"resize", OperationType::RESIZE},
          {"reshape", OperationType::RESHAPE},
--- a/tensorflow/lite/delegates/gpu/common/operations.h
+++ b/tensorflow/lite/delegates/gpu/common/operations.h
@ -63,6 +63,10 @@ enum class OperationType {
  PRELU,
  // Used to accurately run inference on quantized models.
  QUANTIZE_AND_DEQUANTIZE,
+  REDUCE_MAXIMUM,
+  REDUCE_MINIMUM,
+  REDUCE_PRODUCT,
+  REDUCE_SUM,
  RELU,
  RESHAPE,
  RESIZE,
@ -359,6 +363,10 @@ struct PReLUAttributes {
      alpha;
 };

+struct ReduceAttributes {
+  Axis axis = Axis::UNKNOWN;
+};
+
 struct SoftmaxAttributes {
  Axis axis = Axis::UNKNOWN;
 };
--- a/tensorflow/lite/delegates/gpu/metal/api.cc
+++ b/tensorflow/lite/delegates/gpu/metal/api.cc
@ -408,6 +408,10 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
    case OperationType::LSTM:
    // TODO(b/162763635): implement MeanStddevNormalization for Metal.
    case OperationType::MEAN_STDDEV_NORMALIZATION:
+    case OperationType::REDUCE_MAXIMUM:
+    case OperationType::REDUCE_MINIMUM:
+    case OperationType::REDUCE_PRODUCT:
+    case OperationType::REDUCE_SUM:
    case OperationType::SPACE_TO_BATCH:
    case OperationType::TRANSPOSE:
    case OperationType::UNKNOWN: