Added enum for tensor to grid mapping.

Added to have default GetGridSize for base class GPUOperation. default GetGridSize will allow to reduce amount of specialized ops and have more 'generic' operations. Demonstrated on ConcatXY. PiperOrigin-RevId: 326483127 Change-Id: Ib8d04d0841f217b2b7262e6d09554bc82a212730
2020-08-13 11:06:47 -07:00 · 2020-08-13 11:06:47 -07:00 · bc3589eb2b
commit bc3589eb2b
parent c979f5a424
6 changed files with 46 additions and 54 deletions
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc
@ -51,7 +51,7 @@ TEST_F(OpenCLOperationTest, ConcatWidth) {
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
-      ConcatXY operation = CreateConcatXY(op_def, attr);
+      GPUOperation operation = CreateConcatXY(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
                                    BHWC(1, 2, 3, 2), &dst_tensor));
      EXPECT_THAT(
@ -83,7 +83,7 @@ TEST_F(OpenCLOperationTest, ConcatHeight) {
      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
      TensorFloat32 dst_tensor;
-      ConcatXY operation = CreateConcatXY(op_def, attr);
+      GPUOperation operation = CreateConcatXY(op_def, attr);
      ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation,
                                    BHWC(1, 3, 1, 2), &dst_tensor));
      EXPECT_THAT(
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc
@ -27,28 +27,13 @@ limitations under the License.
 namespace tflite {
 namespace gpu {
 namespace cl {
-ConcatXY::ConcatXY(const OperationDef& definition, const ConcatAttributes& attr)
-    : GPUOperation(definition) {
-  code_ = GetConcatKernelCode(definition, attr);
-}
-
-ConcatXY::ConcatXY(ConcatXY&& operation) : GPUOperation(std::move(operation)) {}
-
-ConcatXY& ConcatXY::operator=(ConcatXY&& operation) {
-  if (this != &operation) {
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string ConcatXY::GetConcatKernelCode(const OperationDef& op_def,
-                                          const ConcatAttributes& attr) {
+namespace {
+std::string GetConcatKernelCode(const OperationDef& op_def,
+                                const ConcatAttributes& attr) {
  std::vector<std::string> tensor_names(op_def.src_tensors.size());
  for (int i = 0; i < op_def.src_tensors.size(); ++i) {
    tensor_names[i] = "src_tensor_" + std::to_string(i);
-    AddSrcTensor(tensor_names[i], op_def.src_tensors[i]);
  }
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);

  std::map<Axis, std::string> axis_to_selector = {
      {Axis::WIDTH, "Width"}, {Axis::HEIGHT, "Height"},
@ -127,17 +112,19 @@ std::string ConcatXY::GetConcatKernelCode(const OperationDef& op_def,
  c += "}\n";
  return c;
 }
+}  // namespace

-int3 ConcatXY::GetGridSize() const {
-  const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-  const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
-  const int grid_z = dst_[0]->Slices();
-  return int3(grid_x, grid_y, grid_z);
-}
-
-ConcatXY CreateConcatXY(const OperationDef& definition,
-                        const ConcatAttributes& attr) {
-  return ConcatXY(definition, attr);
+GPUOperation CreateConcatXY(const OperationDef& definition,
+                            const ConcatAttributes& attr) {
+  GPUOperation op(definition);
+  for (int i = 0; i < definition.src_tensors.size(); ++i) {
+    const std::string name = "src_tensor_" + std::to_string(i);
+    op.AddSrcTensor(name, definition.src_tensors[i]);
+  }
+  op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
+  op.code_ = GetConcatKernelCode(definition, attr);
+  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
+  return op;
 }

 }  // namespace cl
--- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h
@ -26,24 +26,8 @@ namespace tflite {
 namespace gpu {
 namespace cl {

-class ConcatXY : public GPUOperation {
- public:
-  ConcatXY(const OperationDef& definition, const ConcatAttributes& attr);
-  int3 GetGridSize() const override;
-
-  // Move only
-  ConcatXY(ConcatXY&& operation);
-  ConcatXY& operator=(ConcatXY&& operation);
-  ConcatXY(const ConcatXY&) = delete;
-  ConcatXY& operator=(const ConcatXY&) = delete;
-
- private:
-  std::string GetConcatKernelCode(const OperationDef& op_def,
-                                  const ConcatAttributes& attr);
-};
-
-ConcatXY CreateConcatXY(const OperationDef& definition,
-                        const ConcatAttributes& attr);
+GPUOperation CreateConcatXY(const OperationDef& definition,
+                            const ConcatAttributes& attr);

 }  // namespace cl
 }  // namespace gpu
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc
@ -124,6 +124,7 @@ void GPUOperation::SetDst(Tensor* ptr, int index) {
 GPUOperation::GPUOperation(GPUOperation&& operation)
    : args_(std::move(operation.args_)),
      code_(std::move(operation.code_)),
+      tensor_to_grid_(operation.tensor_to_grid_),
      elementwise_(operation.elementwise_),
      linkable_(operation.linkable_),
      check_src_channels_size_(operation.check_src_channels_size_),
@ -142,6 +143,7 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
  if (this != &operation) {
    args_ = std::move(operation.args_);
    code_ = std::move(operation.code_);
+    tensor_to_grid_ = operation.tensor_to_grid_;
    elementwise_ = operation.elementwise_;
    linkable_ = operation.linkable_;
    check_src_channels_size_ = operation.check_src_channels_size_;
@ -277,14 +279,19 @@ absl::Status GPUOperation::Tune(const TuningParameters& params) {
 }

 int3 GPUOperation::GetGridSize() const {
-  if (elementwise_) {
+  if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ) {
    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height();
+    const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
    const int grid_z = dst_[0]->Slices();
    return int3(grid_x, grid_y, grid_z);
-  } else {
-    return int3(0, 0, 0);
  }
+  if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1) {
+    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
+    const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
+    const int grid_z = 1;
+    return int3(grid_x, grid_y, grid_z);
+  }
+  return int3(0, 0, 0);
 }

 void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
@ -37,6 +37,18 @@ namespace tflite {
 namespace gpu {
 namespace cl {

+// kCustom: default value
+//   GPUOperation::GetGridSize must be overloaded
+// kWBToX_HDToY_SToZ:
+//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
+//   grid_y = dst_[0]->Height() * dst_[0]->Depth();
+//   grid_z = dst_[0]->Slices();
+// kWBToX_HDToY_ZIs1:
+//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
+//   grid_y = dst_[0]->Height() * dst_[0]->Depth();
+//   grid_z = 1;
+enum class TensorToGrid { kCustom, kWBToX_HDToY_SToZ, kWBToX_HDToY_ZIs1 };
+
 struct CreationContext {
  const CLDevice* device;
  CLContext* context;
@ -122,6 +134,8 @@ class GPUOperation {

  Arguments args_;
  std::string code_;
+  // not applicable to elementwise
+  TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;

  bool elementwise_ = false;
  // applicable only with elementwise_ = true;
--- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc
@ -110,8 +110,8 @@ absl::Status SelectConcat(const ConcatAttributes& attr,
    case Axis::DEPTH:
    case Axis::HEIGHT:
    case Axis::WIDTH: {
-      ConcatXY operation = CreateConcatXY(op_def, attr);
-      *ptr = absl::make_unique<ConcatXY>(std::move(operation));
+      GPUOperation operation = CreateConcatXY(op_def, attr);
+      *ptr = absl::make_unique<GPUOperation>(std::move(operation));
      return absl::OkStatus();
    }
    default: