From faad89dadf32e1c33293cb4b1e1212b1562c0780 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 16 Nov 2020 19:08:01 -0800 Subject: [PATCH] TuningType moved to gpu/common/task. PiperOrigin-RevId: 342770347 Change-Id: If0fd6174fb650002e9262fe13cb5a409989a9593 --- .../lite/delegates/gpu/cl/cl_operation.cc | 11 +++++----- .../lite/delegates/gpu/cl/cl_operation.h | 3 ++- .../delegates/gpu/cl/inference_context.cc | 22 ++++++++++--------- .../lite/delegates/gpu/cl/inference_context.h | 3 ++- .../lite/delegates/gpu/cl/kernels/BUILD | 15 +++---------- .../delegates/gpu/cl/kernels/converter.cc | 2 +- .../gpu/cl/kernels/fully_connected.h | 1 - .../delegates/gpu/cl/kernels/gpu_operation.h | 3 ++- .../delegates/gpu/cl/kernels/special/BUILD | 1 - .../gpu/cl/kernels/special/fc_fc_add.h | 1 - .../lite/delegates/gpu/cl/kernels/winograd.cc | 8 +++---- .../gpu/cl/kernels/work_group_picking.cc | 8 +++---- .../gpu/cl/kernels/work_group_picking.h | 2 +- .../lite/delegates/gpu/common/task/BUILD | 5 +++++ .../task/tuning_type.h} | 21 +++++------------- 15 files changed, 47 insertions(+), 59 deletions(-) rename tensorflow/lite/delegates/gpu/{cl/kernels/tuning_parameters.h => common/task/tuning_type.h} (53%) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_operation.cc b/tensorflow/lite/delegates/gpu/cl/cl_operation.cc index 56e29daa438..f0d01e156d0 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_operation.cc @@ -114,10 +114,11 @@ absl::Status ClOperation::CompileDeserialized( *creation_context.context, *creation_context.device, &kernel_); } -absl::Status ClOperation::Tune(const TuningParameters& params) { +absl::Status ClOperation::Tune(TuningType tuning_type, const GpuInfo& gpu_info, + ProfilingCommandQueue* profiling_queue) { std::vector possible_work_groups; - operation_->GetPossibleKernelWorkGroups(params.tuning_type, *params.info, - kernel_.info_, &possible_work_groups); + operation_->GetPossibleKernelWorkGroups(tuning_type, gpu_info, kernel_.info_, + &possible_work_groups); if (possible_work_groups.empty()) { return absl::NotFoundError( "Can not found work_group size to launch kernel"); @@ -137,8 +138,8 @@ absl::Status ClOperation::Tune(const TuningParameters& params) { } RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel())); int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel_, *params.info, work_groups_count, possible_work_groups, + RETURN_IF_ERROR(profiling_queue->GetBestWorkGroupIndex( + kernel_, gpu_info, work_groups_count, possible_work_groups, &best_work_group_index)); operation_->work_group_size_ = possible_work_groups[best_work_group_index]; operation_->work_groups_count_ = GetWorkGroupsCount( diff --git a/tensorflow/lite/delegates/gpu/cl/cl_operation.h b/tensorflow/lite/delegates/gpu/cl/cl_operation.h index af403295afd..3e4d40a5fa0 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_operation.h @@ -71,7 +71,8 @@ class ClOperation { operation_->work_group_size_); } - absl::Status Tune(const TuningParameters& params); + absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info, + ProfilingCommandQueue* profiling_queue); absl::Status Compile(const CreationContext& creation_context); diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index b5deba26a97..f8a1493af01 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -183,21 +183,20 @@ absl::Status InferenceContext::InitFromGraph( RETURN_IF_ERROR(Compile(creation_context)); RETURN_IF_ERROR(UpdateParams()); - TuningParameters tuning_parameters; - tuning_parameters.queue = env->profiling_queue(); - tuning_parameters.info = &env->device().info_; + TuningType tuning_type = TuningType::kExhaustive; if (create_info.hints.Check(ModelHints::kFastTuning)) { - tuning_parameters.tuning_type = TuningType::FAST; + tuning_type = TuningType::kFast; } - if (tuning_parameters.info->IsMali()) { - const MaliInfo& info = tuning_parameters.info->mali_info; + if (env->device().GetInfo().IsMali()) { + const MaliInfo& info = env->device().GetInfo().mali_info; if (info.IsMaliT6xx()) { // Mali T628 hangs forever in clFinish when used profiling queue // TuningType::FAST does not use profiling queue. - tuning_parameters.tuning_type = TuningType::FAST; + tuning_type = TuningType::kFast; } } - RETURN_IF_ERROR(Tune(tuning_parameters)); + RETURN_IF_ERROR( + Tune(tuning_type, env->device().GetInfo(), env->profiling_queue())); if (serialized_model) { for (auto& node : nodes_) { @@ -631,9 +630,12 @@ absl::Status InferenceContext::Compile( return absl::OkStatus(); } -absl::Status InferenceContext::Tune(const TuningParameters& tuning_parameters) { +absl::Status InferenceContext::Tune(TuningType tuning_type, + const GpuInfo& gpu_info, + ProfilingCommandQueue* profiling_queue) { for (auto& node : nodes_) { - RETURN_IF_ERROR(node.cl_operation.Tune(tuning_parameters)); + RETURN_IF_ERROR( + node.cl_operation.Tune(tuning_type, gpu_info, profiling_queue)); } return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index c4214597cc6..83f0a75076c 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -133,7 +133,8 @@ class InferenceContext { void BindMemoryToOperations(); absl::Status Compile(const CreationContext& creation_context); - absl::Status Tune(const TuningParameters& tuning_parameters); + absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info, + ProfilingCommandQueue* profiling_queue); absl::Status UpdateParams(); // performance hacks diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 79df26c8f34..8c26932a3f6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -580,7 +580,6 @@ cc_library( hdrs = ["fully_connected.h"], deps = [ ":gpu_operation", - ":tuning_parameters", ":util", "//tensorflow/lite/delegates/gpu/cl:buffer", "//tensorflow/lite/delegates/gpu/cl:cl_kernel", @@ -624,9 +623,9 @@ cc_library( srcs = ["gpu_operation.cc"], hdrs = ["gpu_operation.h"], deps = [ - ":tuning_parameters", ":util", ":work_group_picking", + "//tensorflow/lite/delegates/gpu/cl:cl_kernel", "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs", "//tensorflow/lite/delegates/gpu/common:access_type", @@ -638,6 +637,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common/task:buffer_desc", "//tensorflow/lite/delegates/gpu/common/task:gpu_tensor", "//tensorflow/lite/delegates/gpu/common/task:tensor_desc", + "//tensorflow/lite/delegates/gpu/common/task:tuning_type", "@com_google_absl//absl/strings", ], ) @@ -1204,15 +1204,6 @@ cc_test( ], ) -cc_library( - name = "tuning_parameters", - hdrs = ["tuning_parameters.h"], - deps = [ - "//tensorflow/lite/delegates/gpu/cl:cl_command_queue", - "//tensorflow/lite/delegates/gpu/cl:device_info", - ], -) - cc_library( name = "resize", srcs = ["resize.cc"], @@ -1306,12 +1297,12 @@ cc_library( srcs = ["work_group_picking.cc"], hdrs = ["work_group_picking.h"], deps = [ - ":tuning_parameters", "//tensorflow/lite/delegates/gpu/cl:cl_kernel", "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common:workgroup_selection", + "//tensorflow/lite/delegates/gpu/common/task:tuning_type", ], ) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc index f8b3aef8605..ab6f986ea97 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc @@ -54,7 +54,7 @@ class OpenClConverterImpl : public TensorObjectConverter { const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(), tensor->Slices()); std::vector work_groups; - GetPossibleWorkGroupsConv(TuningType::FAST, gpu_info_, kernel_.info_, grid, + GetPossibleWorkGroupsConv(TuningType::kFast, gpu_info_, kernel_.info_, grid, &work_groups); const int3 work_group_size = work_groups[0]; const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index 1fdeb1f6db7..a508e741ef9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" #include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" -#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" #include "tensorflow/lite/delegates/gpu/cl/texture2d.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 6a16897fcfe..8dbf9398376 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -19,8 +19,8 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" #include "tensorflow/lite/delegates/gpu/cl/device_info.h" -#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/precision.h" @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h" #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h" #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" +#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h" #include "tensorflow/lite/delegates/gpu/common/types.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD index 71d5f9811b3..92231338730 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD @@ -36,7 +36,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/cl:texture2d", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", - "//tensorflow/lite/delegates/gpu/cl/kernels:tuning_parameters", "//tensorflow/lite/delegates/gpu/cl/kernels:util", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:operations", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h index 65b392d03e3..09e0548c663 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" #include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" -#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" #include "tensorflow/lite/delegates/gpu/cl/texture2d.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index 6af8e429435..6f8bf1ca3cf 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -285,11 +285,11 @@ void Winograd4x4To36::GetPossibleKernelWorkGroups( TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { switch (tuning_type) { - case TuningType::EXHAUSTIVE: + case TuningType::kExhaustive: GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); return; - case TuningType::FAST: + case TuningType::kFast: default: work_groups->push_back(SelectBestWorkGroup(kernel_info)); return; @@ -481,11 +481,11 @@ void Winograd36To4x4::GetPossibleKernelWorkGroups( TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, std::vector* work_groups) const { switch (tuning_type) { - case TuningType::EXHAUSTIVE: + case TuningType::kExhaustive: GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_, work_groups); return; - case TuningType::FAST: + case TuningType::kFast: default: work_groups->push_back(SelectBestWorkGroup(kernel_info)); return; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index d30675f798f..0b7ec8ed683 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -252,11 +252,11 @@ void GetPossibleWorkGroups(TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups) { switch (tuning_type) { - case TuningType::FAST: + case TuningType::kFast: work_groups->push_back( GetWorkGroup(grid, kernel_info.max_work_group_size)); return; - case TuningType::EXHAUSTIVE: { + case TuningType::kExhaustive: { GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups); return; } @@ -270,7 +270,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups) { switch (tuning_type) { - case TuningType::FAST: { + case TuningType::kFast: { int max_z_size = 16; if (gpu_info.IsAdreno()) { max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64; @@ -280,7 +280,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info, GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size)); return; } - case TuningType::EXHAUSTIVE: { + case TuningType::kExhaustive: { GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups); return; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h index 90afe44729c..edc1e2d5585 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" #include "tensorflow/lite/delegates/gpu/cl/device_info.h" -#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" +#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/common/workgroup_selection.h" diff --git a/tensorflow/lite/delegates/gpu/common/task/BUILD b/tensorflow/lite/delegates/gpu/common/task/BUILD index ac69fe8b944..f5b60ac6973 100644 --- a/tensorflow/lite/delegates/gpu/common/task/BUILD +++ b/tensorflow/lite/delegates/gpu/common/task/BUILD @@ -100,6 +100,11 @@ cc_library( ], ) +cc_library( + name = "tuning_type", + hdrs = ["tuning_type.h"], +) + cc_library( name = "util", srcs = ["util.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h b/tensorflow/lite/delegates/gpu/common/task/tuning_type.h similarity index 53% rename from tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h rename to tensorflow/lite/delegates/gpu/common/task/tuning_type.h index b7221e53ad4..0eebffbd539 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h +++ b/tensorflow/lite/delegates/gpu/common/task/tuning_type.h @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,26 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_ -#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_ - -#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_ namespace tflite { namespace gpu { -namespace cl { -enum class TuningType { EXHAUSTIVE, FAST }; +enum class TuningType { kExhaustive, kFast }; -struct TuningParameters { - ProfilingCommandQueue* queue; - const GpuInfo* info; - TuningType tuning_type = TuningType::EXHAUSTIVE; -}; - -} // namespace cl } // namespace gpu } // namespace tflite -#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_ +#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_