TuningType moved to gpu/common/task.
PiperOrigin-RevId: 342770347 Change-Id: If0fd6174fb650002e9262fe13cb5a409989a9593
This commit is contained in:
parent
f0c976a27f
commit
faad89dadf
@ -114,10 +114,11 @@ absl::Status ClOperation::CompileDeserialized(
|
|||||||
*creation_context.context, *creation_context.device, &kernel_);
|
*creation_context.context, *creation_context.device, &kernel_);
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status ClOperation::Tune(const TuningParameters& params) {
|
absl::Status ClOperation::Tune(TuningType tuning_type, const GpuInfo& gpu_info,
|
||||||
|
ProfilingCommandQueue* profiling_queue) {
|
||||||
std::vector<int3> possible_work_groups;
|
std::vector<int3> possible_work_groups;
|
||||||
operation_->GetPossibleKernelWorkGroups(params.tuning_type, *params.info,
|
operation_->GetPossibleKernelWorkGroups(tuning_type, gpu_info, kernel_.info_,
|
||||||
kernel_.info_, &possible_work_groups);
|
&possible_work_groups);
|
||||||
if (possible_work_groups.empty()) {
|
if (possible_work_groups.empty()) {
|
||||||
return absl::NotFoundError(
|
return absl::NotFoundError(
|
||||||
"Can not found work_group size to launch kernel");
|
"Can not found work_group size to launch kernel");
|
||||||
@ -137,8 +138,8 @@ absl::Status ClOperation::Tune(const TuningParameters& params) {
|
|||||||
}
|
}
|
||||||
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
|
RETURN_IF_ERROR(cl_args_.Bind(kernel_.kernel()));
|
||||||
int best_work_group_index;
|
int best_work_group_index;
|
||||||
RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
|
RETURN_IF_ERROR(profiling_queue->GetBestWorkGroupIndex(
|
||||||
kernel_, *params.info, work_groups_count, possible_work_groups,
|
kernel_, gpu_info, work_groups_count, possible_work_groups,
|
||||||
&best_work_group_index));
|
&best_work_group_index));
|
||||||
operation_->work_group_size_ = possible_work_groups[best_work_group_index];
|
operation_->work_group_size_ = possible_work_groups[best_work_group_index];
|
||||||
operation_->work_groups_count_ = GetWorkGroupsCount(
|
operation_->work_groups_count_ = GetWorkGroupsCount(
|
||||||
|
@ -71,7 +71,8 @@ class ClOperation {
|
|||||||
operation_->work_group_size_);
|
operation_->work_group_size_);
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status Tune(const TuningParameters& params);
|
absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info,
|
||||||
|
ProfilingCommandQueue* profiling_queue);
|
||||||
|
|
||||||
absl::Status Compile(const CreationContext& creation_context);
|
absl::Status Compile(const CreationContext& creation_context);
|
||||||
|
|
||||||
|
@ -183,21 +183,20 @@ absl::Status InferenceContext::InitFromGraph(
|
|||||||
RETURN_IF_ERROR(Compile(creation_context));
|
RETURN_IF_ERROR(Compile(creation_context));
|
||||||
RETURN_IF_ERROR(UpdateParams());
|
RETURN_IF_ERROR(UpdateParams());
|
||||||
|
|
||||||
TuningParameters tuning_parameters;
|
TuningType tuning_type = TuningType::kExhaustive;
|
||||||
tuning_parameters.queue = env->profiling_queue();
|
|
||||||
tuning_parameters.info = &env->device().info_;
|
|
||||||
if (create_info.hints.Check(ModelHints::kFastTuning)) {
|
if (create_info.hints.Check(ModelHints::kFastTuning)) {
|
||||||
tuning_parameters.tuning_type = TuningType::FAST;
|
tuning_type = TuningType::kFast;
|
||||||
}
|
}
|
||||||
if (tuning_parameters.info->IsMali()) {
|
if (env->device().GetInfo().IsMali()) {
|
||||||
const MaliInfo& info = tuning_parameters.info->mali_info;
|
const MaliInfo& info = env->device().GetInfo().mali_info;
|
||||||
if (info.IsMaliT6xx()) {
|
if (info.IsMaliT6xx()) {
|
||||||
// Mali T628 hangs forever in clFinish when used profiling queue
|
// Mali T628 hangs forever in clFinish when used profiling queue
|
||||||
// TuningType::FAST does not use profiling queue.
|
// TuningType::FAST does not use profiling queue.
|
||||||
tuning_parameters.tuning_type = TuningType::FAST;
|
tuning_type = TuningType::kFast;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
RETURN_IF_ERROR(Tune(tuning_parameters));
|
RETURN_IF_ERROR(
|
||||||
|
Tune(tuning_type, env->device().GetInfo(), env->profiling_queue()));
|
||||||
|
|
||||||
if (serialized_model) {
|
if (serialized_model) {
|
||||||
for (auto& node : nodes_) {
|
for (auto& node : nodes_) {
|
||||||
@ -631,9 +630,12 @@ absl::Status InferenceContext::Compile(
|
|||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status InferenceContext::Tune(const TuningParameters& tuning_parameters) {
|
absl::Status InferenceContext::Tune(TuningType tuning_type,
|
||||||
|
const GpuInfo& gpu_info,
|
||||||
|
ProfilingCommandQueue* profiling_queue) {
|
||||||
for (auto& node : nodes_) {
|
for (auto& node : nodes_) {
|
||||||
RETURN_IF_ERROR(node.cl_operation.Tune(tuning_parameters));
|
RETURN_IF_ERROR(
|
||||||
|
node.cl_operation.Tune(tuning_type, gpu_info, profiling_queue));
|
||||||
}
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
@ -133,7 +133,8 @@ class InferenceContext {
|
|||||||
|
|
||||||
void BindMemoryToOperations();
|
void BindMemoryToOperations();
|
||||||
absl::Status Compile(const CreationContext& creation_context);
|
absl::Status Compile(const CreationContext& creation_context);
|
||||||
absl::Status Tune(const TuningParameters& tuning_parameters);
|
absl::Status Tune(TuningType tuning_type, const GpuInfo& gpu_info,
|
||||||
|
ProfilingCommandQueue* profiling_queue);
|
||||||
absl::Status UpdateParams();
|
absl::Status UpdateParams();
|
||||||
|
|
||||||
// performance hacks
|
// performance hacks
|
||||||
|
@ -580,7 +580,6 @@ cc_library(
|
|||||||
hdrs = ["fully_connected.h"],
|
hdrs = ["fully_connected.h"],
|
||||||
deps = [
|
deps = [
|
||||||
":gpu_operation",
|
":gpu_operation",
|
||||||
":tuning_parameters",
|
|
||||||
":util",
|
":util",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:buffer",
|
"//tensorflow/lite/delegates/gpu/cl:buffer",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
||||||
@ -624,9 +623,9 @@ cc_library(
|
|||||||
srcs = ["gpu_operation.cc"],
|
srcs = ["gpu_operation.cc"],
|
||||||
hdrs = ["gpu_operation.h"],
|
hdrs = ["gpu_operation.h"],
|
||||||
deps = [
|
deps = [
|
||||||
":tuning_parameters",
|
|
||||||
":util",
|
":util",
|
||||||
":work_group_picking",
|
":work_group_picking",
|
||||||
|
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
|
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
|
||||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||||
@ -638,6 +637,7 @@ cc_library(
|
|||||||
"//tensorflow/lite/delegates/gpu/common/task:buffer_desc",
|
"//tensorflow/lite/delegates/gpu/common/task:buffer_desc",
|
||||||
"//tensorflow/lite/delegates/gpu/common/task:gpu_tensor",
|
"//tensorflow/lite/delegates/gpu/common/task:gpu_tensor",
|
||||||
"//tensorflow/lite/delegates/gpu/common/task:tensor_desc",
|
"//tensorflow/lite/delegates/gpu/common/task:tensor_desc",
|
||||||
|
"//tensorflow/lite/delegates/gpu/common/task:tuning_type",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -1204,15 +1204,6 @@ cc_test(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "tuning_parameters",
|
|
||||||
hdrs = ["tuning_parameters.h"],
|
|
||||||
deps = [
|
|
||||||
"//tensorflow/lite/delegates/gpu/cl:cl_command_queue",
|
|
||||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "resize",
|
name = "resize",
|
||||||
srcs = ["resize.cc"],
|
srcs = ["resize.cc"],
|
||||||
@ -1306,12 +1297,12 @@ cc_library(
|
|||||||
srcs = ["work_group_picking.cc"],
|
srcs = ["work_group_picking.cc"],
|
||||||
hdrs = ["work_group_picking.h"],
|
hdrs = ["work_group_picking.h"],
|
||||||
deps = [
|
deps = [
|
||||||
":tuning_parameters",
|
|
||||||
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||||
"//tensorflow/lite/delegates/gpu/common:types",
|
"//tensorflow/lite/delegates/gpu/common:types",
|
||||||
"//tensorflow/lite/delegates/gpu/common:util",
|
"//tensorflow/lite/delegates/gpu/common:util",
|
||||||
"//tensorflow/lite/delegates/gpu/common:workgroup_selection",
|
"//tensorflow/lite/delegates/gpu/common:workgroup_selection",
|
||||||
|
"//tensorflow/lite/delegates/gpu/common/task:tuning_type",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ class OpenClConverterImpl : public TensorObjectConverter {
|
|||||||
const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(),
|
const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(),
|
||||||
tensor->Slices());
|
tensor->Slices());
|
||||||
std::vector<int3> work_groups;
|
std::vector<int3> work_groups;
|
||||||
GetPossibleWorkGroupsConv(TuningType::FAST, gpu_info_, kernel_.info_, grid,
|
GetPossibleWorkGroupsConv(TuningType::kFast, gpu_info_, kernel_.info_, grid,
|
||||||
&work_groups);
|
&work_groups);
|
||||||
const int3 work_group_size = work_groups[0];
|
const int3 work_group_size = work_groups[0];
|
||||||
const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
|
const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
|
||||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||||
|
@ -19,8 +19,8 @@ limitations under the License.
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||||
@ -29,6 +29,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
|
#include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
|
#include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
|
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
@ -36,7 +36,6 @@ cc_library(
|
|||||||
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:texture2d",
|
"//tensorflow/lite/delegates/gpu/cl:texture2d",
|
||||||
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
||||||
"//tensorflow/lite/delegates/gpu/cl/kernels:tuning_parameters",
|
|
||||||
"//tensorflow/lite/delegates/gpu/cl/kernels:util",
|
"//tensorflow/lite/delegates/gpu/cl/kernels:util",
|
||||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
|||||||
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||||
|
@ -285,11 +285,11 @@ void Winograd4x4To36::GetPossibleKernelWorkGroups(
|
|||||||
TuningType tuning_type, const GpuInfo& gpu_info,
|
TuningType tuning_type, const GpuInfo& gpu_info,
|
||||||
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
||||||
switch (tuning_type) {
|
switch (tuning_type) {
|
||||||
case TuningType::EXHAUSTIVE:
|
case TuningType::kExhaustive:
|
||||||
GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
|
GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
|
||||||
work_groups);
|
work_groups);
|
||||||
return;
|
return;
|
||||||
case TuningType::FAST:
|
case TuningType::kFast:
|
||||||
default:
|
default:
|
||||||
work_groups->push_back(SelectBestWorkGroup(kernel_info));
|
work_groups->push_back(SelectBestWorkGroup(kernel_info));
|
||||||
return;
|
return;
|
||||||
@ -481,11 +481,11 @@ void Winograd36To4x4::GetPossibleKernelWorkGroups(
|
|||||||
TuningType tuning_type, const GpuInfo& gpu_info,
|
TuningType tuning_type, const GpuInfo& gpu_info,
|
||||||
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
|
||||||
switch (tuning_type) {
|
switch (tuning_type) {
|
||||||
case TuningType::EXHAUSTIVE:
|
case TuningType::kExhaustive:
|
||||||
GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
|
GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
|
||||||
work_groups);
|
work_groups);
|
||||||
return;
|
return;
|
||||||
case TuningType::FAST:
|
case TuningType::kFast:
|
||||||
default:
|
default:
|
||||||
work_groups->push_back(SelectBestWorkGroup(kernel_info));
|
work_groups->push_back(SelectBestWorkGroup(kernel_info));
|
||||||
return;
|
return;
|
||||||
|
@ -252,11 +252,11 @@ void GetPossibleWorkGroups(TuningType tuning_type, const GpuInfo& gpu_info,
|
|||||||
const KernelInfo& kernel_info, const int3& grid,
|
const KernelInfo& kernel_info, const int3& grid,
|
||||||
std::vector<int3>* work_groups) {
|
std::vector<int3>* work_groups) {
|
||||||
switch (tuning_type) {
|
switch (tuning_type) {
|
||||||
case TuningType::FAST:
|
case TuningType::kFast:
|
||||||
work_groups->push_back(
|
work_groups->push_back(
|
||||||
GetWorkGroup(grid, kernel_info.max_work_group_size));
|
GetWorkGroup(grid, kernel_info.max_work_group_size));
|
||||||
return;
|
return;
|
||||||
case TuningType::EXHAUSTIVE: {
|
case TuningType::kExhaustive: {
|
||||||
GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups);
|
GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -270,7 +270,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info,
|
|||||||
const KernelInfo& kernel_info, const int3& grid,
|
const KernelInfo& kernel_info, const int3& grid,
|
||||||
std::vector<int3>* work_groups) {
|
std::vector<int3>* work_groups) {
|
||||||
switch (tuning_type) {
|
switch (tuning_type) {
|
||||||
case TuningType::FAST: {
|
case TuningType::kFast: {
|
||||||
int max_z_size = 16;
|
int max_z_size = 16;
|
||||||
if (gpu_info.IsAdreno()) {
|
if (gpu_info.IsAdreno()) {
|
||||||
max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64;
|
max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64;
|
||||||
@ -280,7 +280,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info,
|
|||||||
GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
|
GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case TuningType::EXHAUSTIVE: {
|
case TuningType::kExhaustive: {
|
||||||
GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups);
|
GetWorkGroupsAlignedToGrid(gpu_info, kernel_info, grid, work_groups);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
|
#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/workgroup_selection.h"
|
#include "tensorflow/lite/delegates/gpu/common/workgroup_selection.h"
|
||||||
|
|
||||||
|
@ -100,6 +100,11 @@ cc_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "tuning_type",
|
||||||
|
hdrs = ["tuning_type.h"],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "util",
|
name = "util",
|
||||||
srcs = ["util.cc"],
|
srcs = ["util.cc"],
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
@ -13,26 +13,15 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_
|
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_
|
||||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_
|
#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_
|
||||||
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
|
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace cl {
|
|
||||||
|
|
||||||
enum class TuningType { EXHAUSTIVE, FAST };
|
enum class TuningType { kExhaustive, kFast };
|
||||||
|
|
||||||
struct TuningParameters {
|
|
||||||
ProfilingCommandQueue* queue;
|
|
||||||
const GpuInfo* info;
|
|
||||||
TuningType tuning_type = TuningType::EXHAUSTIVE;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace cl
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
|
||||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TUNING_PARAMETERS_H_
|
#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_TUNING_TYPE_H_
|
Loading…
x
Reference in New Issue
Block a user