From faad89dadf32e1c33293cb4b1e1212b1562c0780 Mon Sep 17 00:00:00 2001
From: Raman Sarokin <sorokin@google.com>
Date: Mon, 16 Nov 2020 19:08:01 -0800
Subject: [PATCH] TuningType moved to gpu/common/task.

PiperOrigin-RevId: 342770347
Change-Id: If0fd6174fb650002e9262fe13cb5a409989a9593
---
 .../lite/delegates/gpu/cl/cl_operation.cc     | 11 +++++-----
 .../lite/delegates/gpu/cl/cl_operation.h      |  3 ++-
 .../delegates/gpu/cl/inference_context.cc     | 22 ++++++++++---------
 .../lite/delegates/gpu/cl/inference_context.h |  3 ++-
 .../lite/delegates/gpu/cl/kernels/BUILD       | 15 +++----------
 .../delegates/gpu/cl/kernels/converter.cc     |  2 +-
 .../gpu/cl/kernels/fully_connected.h          |  1 -
 .../delegates/gpu/cl/kernels/gpu_operation.h  |  3 ++-
 .../delegates/gpu/cl/kernels/special/BUILD    |  1 -
 .../gpu/cl/kernels/special/fc_fc_add.h        |  1 -
 .../lite/delegates/gpu/cl/kernels/winograd.cc |  8 +++----
 .../gpu/cl/kernels/work_group_picking.cc      |  8 +++----
 .../gpu/cl/kernels/work_group_picking.h       |  2 +-
 .../lite/delegates/gpu/common/task/BUILD      |  5 +++++
 .../task/tuning_type.h}                       | 21 +++++-------------
 15 files changed, 47 insertions(+), 59 deletions(-)
 rename tensorflow/lite/delegates/gpu/{cl/kernels/tuning_parameters.h => common/task/tuning_type.h} (53%)
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_operation.cc b/tensorflow/lite/delegates/gpu/cl/cl_operation.cc
index 56e29daa438..f0d01e156d0 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_operation.cc
+++ b/tensorflow/lite/delegates/gpu/cl/cl_operation.cc
@@ -114,10 +114,11 @@ absl::Status ClOperation::CompileDeserialized(
       *creation_context.context, *creation_context.device, &kernel_);
 }
 
-absl::Status ClOperation::Tune(const TuningParameters& params) {
+absl::Status ClOperation::Tune(TuningType tuning_type, const GpuInfo& gpu_info,
+                               ProfilingCommandQueue* profiling_queue) {
   std::vector<int3> possible_work_groups;
-  operation_->GetPossibleKernelWorkGroups(params.tuning_type, *params.info,
-                                          kernel_.info_, &possible_work_groups);
+  operation_->GetPossibleKernelWorkGroups(tuning_type, gpu_info, kernel_.info_,
+                                          &possible_work_groups);
   if (possible_work_groups.empty()) {
     return absl::NotFoundError(
         "Can not found work_group size to launch kernel");
@@ -137,8 +138,8 @@ absl::Status ClOperation::Tune(const TuningParameters& params) {
     }
     RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
     int best_work_group_index;
-    RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
-        kernel_, *params.info, work_groups_count, possible_work_groups,
+    RETURN_IF_ERROR(profiling_queue->GetBestWorkGroupIndex(
+        kernel_, gpu_info, work_groups_count, possible_work_groups,
         &best_work_group_index));
     operation_->work_group_size_ = possible_work_groups[best_work_group_index];
     operation_->work_groups_count_ = GetWorkGroupsCount(
diff --git a/tensorflow/lite/delegates/gpu/cl/cl_operation.h b/tensorflow/lite/delegates/gpu/cl/cl_operation.h
index af403295afd..3e4d40a5fa0 100644
--- a/tensorflow/lite/delegates/gpu/cl/cl_operation.h
+++ b/tensorflow/lite/delegates/gpu/cl/cl_operation.h
@@ -71,7 +71,8 @@ class ClOperation {
                            operation_->work_group_size_);
   }
 
-  absl::Status Tune(const TuningParameters& params);
+  absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info,
+                    ProfilingCommandQueue* profiling_queue);
 
   absl::Status Compile(const CreationContext& creation_context);
 
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
index b5deba26a97..f8a1493af01 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@@ -183,21 +183,20 @@ absl::Status InferenceContext::InitFromGraph(
   RETURN_IF_ERROR(Compile(creation_context));
   RETURN_IF_ERROR(UpdateParams());
 
-  TuningParameters tuning_parameters;
-  tuning_parameters.queue = env->profiling_queue();
-  tuning_parameters.info = &env->device().info_;
+  TuningType tuning_type = TuningType::kExhaustive;
   if (create_info.hints.Check(ModelHints::kFastTuning)) {
-    tuning_parameters.tuning_type = TuningType::FAST;
+    tuning_type = TuningType::kFast;
   }
-  if (tuning_parameters.info->IsMali()) {
-    const MaliInfo& info = tuning_parameters.info->mali_info;
+  if (env->device().GetInfo().IsMali()) {
+    const MaliInfo& info = env->device().GetInfo().mali_info;
     if (info.IsMaliT6xx()) {
       // Mali T628 hangs forever in clFinish when used profiling queue
       // TuningType::FAST does not use profiling queue.
-      tuning_parameters.tuning_type = TuningType::FAST;
+      tuning_type = TuningType::kFast;
     }
   }
-  RETURN_IF_ERROR(Tune(tuning_parameters));
+  RETURN_IF_ERROR(
+      Tune(tuning_type, env->device().GetInfo(), env->profiling_queue()));
 
   if (serialized_model) {
     for (auto& node : nodes_) {
@@ -631,9 +630,12 @@ absl::Status InferenceContext::Compile(
   return absl::OkStatus();
 }
 
-absl::Status InferenceContext::Tune(const TuningParameters& tuning_parameters) {
+absl::Status InferenceContext::Tune(TuningType tuning_type,
+                                    const GpuInfo& gpu_info,
+                                    ProfilingCommandQueue* profiling_queue) {
   for (auto& node : nodes_) {
-    RETURN_IF_ERROR(node.cl_operation.Tune(tuning_parameters));
+    RETURN_IF_ERROR(
+        node.cl_operation.Tune(tuning_type, gpu_info, profiling_queue));
   }
   return absl::OkStatus();
 }
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h
index c4214597cc6..83f0a75076c 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
@@ -133,7 +133,8 @@ class InferenceContext {
 
   void BindMemoryToOperations();
   absl::Status Compile(const CreationContext& creation_context);
-  absl::Status Tune(const TuningParameters& tuning_parameters);
+  absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info,
+                    ProfilingCommandQueue* profiling_queue);
   absl::Status UpdateParams();
 
   // performance hacks
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
index 79df26c8f34..8c26932a3f6 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@@ -580,7 +580,6 @@ cc_library(
     hdrs = ["fully_connected.h"],
     deps = [
         ":gpu_operation",
-        ":tuning_parameters",
         ":util",
         "//tensorflow/lite/delegates/gpu/cl:buffer",
         "//tensorflow/lite/delegates/gpu/cl:cl_kernel",
@@ -624,9 +623,9 @@ cc_library(
     srcs = ["gpu_operation.cc"],
     hdrs = ["gpu_operation.h"],
     deps = [
-        ":tuning_parameters",
         ":util",
         ":work_group_picking",
+        "//tensorflow/lite/delegates/gpu/cl:cl_kernel",
         "//tensorflow/lite/delegates/gpu/cl:device_info",
         "//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
         "//tensorflow/lite/delegates/gpu/common:access_type",
@@ -638,6 +637,7 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/common/task:buffer_desc",
         "//tensorflow/lite/delegates/gpu/common/task:gpu_tensor",
         "//tensorflow/lite/delegates/gpu/common/task:tensor_desc",
+        "//tensorflow/lite/delegates/gpu/common/task:tuning_type",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1204,15 +1204,6 @@ cc_test(
     ],
 )
 
-cc_library(
-    name = "tuning_parameters",
-    hdrs = ["tuning_parameters.h"],
-    deps = [
-        "//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
-        "//tensorflow/lite/delegates/gpu/cl:device_info",
-    ],
-)
-
 cc_library(
     name = "resize",
     srcs = ["resize.cc"],
@@ -1306,12 +1297,12 @@ cc_library(
     srcs = ["work_group_picking.cc"],
     hdrs = ["work_group_picking.h"],
     deps = [
-        ":tuning_parameters",
         "//tensorflow/lite/delegates/gpu/cl:cl_kernel",
         "//tensorflow/lite/delegates/gpu/cl:device_info",
         "//tensorflow/lite/delegates/gpu/common:types",
         "//tensorflow/lite/delegates/gpu/common:util",
         "//tensorflow/lite/delegates/gpu/common:workgroup_selection",
+        "//tensorflow/lite/delegates/gpu/common/task:tuning_type",
     ],
 )
 
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
index f8b3aef8605..ab6f986ea97 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc
@@ -54,7 +54,7 @@ class OpenClConverterImpl : public TensorObjectConverter {
     const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(),
                            tensor->Slices());
     std::vector<int3> work_groups;
-    GetPossibleWorkGroupsConv(TuningType::FAST, gpu_info_, kernel_.info_, grid,
+    GetPossibleWorkGroupsConv(TuningType::kFast, gpu_info_, kernel_.info_, grid,
                               &work_groups);
     const int3 work_group_size = work_groups[0];
     const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h
index 1fdeb1f6db7..a508e741ef9 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/device_info.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
-#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
 #include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
index 6a16897fcfe..8dbf9398376 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
@@ -19,8 +19,8 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/device_info.h"
-#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
 #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/precision.h"
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
+#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
 
 namespace tflite {
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD
index 71d5f9811b3..92231338730 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD
@@ -36,7 +36,6 @@ cc_library(
         "//tensorflow/lite/delegates/gpu/cl:tensor",
         "//tensorflow/lite/delegates/gpu/cl:texture2d",
         "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
-        "//tensorflow/lite/delegates/gpu/cl/kernels:tuning_parameters",
         "//tensorflow/lite/delegates/gpu/cl/kernels:util",
         "//tensorflow/lite/delegates/gpu/common:data_type",
         "//tensorflow/lite/delegates/gpu/common:operations",
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h
index 65b392d03e3..09e0548c663 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h
@@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/device_info.h"
 #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
-#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
 #include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
index 6af8e429435..6f8bf1ca3cf 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc
@@ -285,11 +285,11 @@ void Winograd4x4To36::GetPossibleKernelWorkGroups(
     TuningType tuning_type, const GpuInfo& gpu_info,
     const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
   switch (tuning_type) {
-    case TuningType::EXHAUSTIVE:
+    case TuningType::kExhaustive:
       GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
                             work_groups);
       return;
-    case TuningType::FAST:
+    case TuningType::kFast:
     default:
       work_groups->push_back(SelectBestWorkGroup(kernel_info));
       return;
@@ -481,11 +481,11 @@ void Winograd36To4x4::GetPossibleKernelWorkGroups(
     TuningType tuning_type, const GpuInfo& gpu_info,
     const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
   switch (tuning_type) {
-    case TuningType::EXHAUSTIVE:
+    case TuningType::kExhaustive:
       GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
                             work_groups);
       return;
-    case TuningType::FAST:
+    case TuningType::kFast:
     default:
       work_groups->push_back(SelectBestWorkGroup(kernel_info));
       return;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
index d30675f798f..0b7ec8ed683 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc
@@ -252,11 +252,11 @@ void GetPossibleWorkGroups(TuningType tuning_type, const GpuInfo& gpu_info,
                            const KernelInfo& kernel_info, const int3& grid,
                            std::vector<int3>* work_groups) {
   switch (tuning_type) {
-    case TuningType::FAST:
+    case TuningType::kFast:
       work_groups->push_back(
           GetWorkGroup(grid, kernel_info.max_work_group_size));
       return;
-    case TuningType::EXHAUSTIVE: {
+    case TuningType::kExhaustive: {
       GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups);
       return;
     }
@@ -270,7 +270,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info,
                                const KernelInfo& kernel_info, const int3& grid,
                                std::vector<int3>* work_groups) {
   switch (tuning_type) {
-    case TuningType::FAST: {
+    case TuningType::kFast: {
       int max_z_size = 16;
       if (gpu_info.IsAdreno()) {
         max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64;
@@ -280,7 +280,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info,
           GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
       return;
     }
-    case TuningType::EXHAUSTIVE: {
+    case TuningType::kExhaustive: {
       GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups);
       return;
     }
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h
index 90afe44729c..edc1e2d5585 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h
@@ -20,7 +20,7 @@ limitations under the License.
 
 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
 #include "tensorflow/lite/delegates/gpu/cl/device_info.h"
-#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
+#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
 #include "tensorflow/lite/delegates/gpu/common/workgroup_selection.h"
 
diff --git a/tensorflow/lite/delegates/gpu/common/task/BUILD b/tensorflow/lite/delegates/gpu/common/task/BUILD
index ac69fe8b944..f5b60ac6973 100644
--- a/tensorflow/lite/delegates/gpu/common/task/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/task/BUILD
@@ -100,6 +100,11 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "tuning_type",
+    hdrs = ["tuning_type.h"],
+)
+
 cc_library(
     name = "util",
     srcs = ["util.cc"],
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h b/tensorflow/lite/delegates/gpu/common/task/tuning_type.h
similarity index 53%
rename from tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h
rename to tensorflow/lite/delegates/gpu/common/task/tuning_type.h
index b7221e53ad4..0eebffbd539 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h
+++ b/tensorflow/lite/delegates/gpu/common/task/tuning_type.h
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,26 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_
-#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_
-
-#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
-#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_
 
 namespace tflite {
 namespace gpu {
-namespace cl {
 
-enum class TuningType { EXHAUSTIVE, FAST };
+enum class TuningType { kExhaustive, kFast };
 
-struct TuningParameters {
-  ProfilingCommandQueue* queue;
-  const GpuInfo* info;
-  TuningType tuning_type = TuningType::EXHAUSTIVE;
-};
-
-}  // namespace cl
 }  // namespace gpu
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_