Enable OpenCL 2.0 or 3.0 compilation when the device supports it.

By default OpenCL programs are compiled as 1.x only.

PiperOrigin-RevId: 327300390
Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147
This commit is contained in:
Robert David 2020-08-18 13:55:38 -07:00 committed by TensorFlower Gardener
parent c1a32fd496
commit 1219f682f7
6 changed files with 23 additions and 7 deletions

View File

@ -95,6 +95,8 @@ std::string CompilerOptionToString(const CLDevice& device,
return "-cl-opt-disable"; return "-cl-opt-disable";
case CompilerOptions::CL_2_0: case CompilerOptions::CL_2_0:
return "-cl-std=CL2.0"; return "-cl-std=CL2.0";
case CompilerOptions::CL_3_0:
return "-cl-std=CL3.0";
} }
} }

View File

@ -41,7 +41,8 @@ enum class CompilerOptions {
ADRENO_MORE_WAVES, ADRENO_MORE_WAVES,
POWERVR_FP16, POWERVR_FP16,
CL_OPT_DISABLE, CL_OPT_DISABLE,
CL_2_0 CL_2_0,
CL_3_0,
}; };
std::string CompilerOptionsToString( std::string CompilerOptionsToString(

View File

@ -774,6 +774,8 @@ cc_library(
":gpu_operation", ":gpu_operation",
":util", ":util",
":work_group_picking", ":work_group_picking",
"//tensorflow/lite/delegates/gpu/cl:cl_program",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/cl:precision", "//tensorflow/lite/delegates/gpu/cl:precision",
"//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:operations",
"//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:status",

View File

@ -17,6 +17,8 @@ limitations under the License.
#include <string> #include <string>
#include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
#include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h"
@ -64,7 +66,8 @@ static inline float local_reduce(float input, __local float* tmp) {
} }
} // namespace } // namespace
MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition,
const DeviceInfo& device_info)
: GPUOperation(definition) { : GPUOperation(definition) {
// The kernel code does not inherently need a fixed size, but in order to not // The kernel code does not inherently need a fixed size, but in order to not
// hardcode the __local array's size for the reductions, we would need to pass // hardcode the __local array's size for the reductions, we would need to pass
@ -74,6 +77,11 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition)
work_group_size_.y = 1; // Required work_group_size_.y = 1; // Required
work_group_size_.z = 1; // Required work_group_size_.z = 1; // Required
code_ = GetNormalizationCode(); code_ = GetNormalizationCode();
if (device_info.cl_version >= OpenCLVersion::CL_3_0) {
compiler_options_.push_back(CompilerOptions::CL_3_0);
} else if (device_info.cl_version >= OpenCLVersion::CL_2_0) {
compiler_options_.push_back(CompilerOptions::CL_2_0);
}
} }
std::string MeanStdDevNormalization::GetNormalizationCode() { std::string MeanStdDevNormalization::GetNormalizationCode() {
@ -145,8 +153,8 @@ int3 MeanStdDevNormalization::GetGridSize() const {
} }
MeanStdDevNormalization CreateMeanStdDevNormalization( MeanStdDevNormalization CreateMeanStdDevNormalization(
const OperationDef& definition) { const OperationDef& definition, const DeviceInfo& device_info) {
return MeanStdDevNormalization(definition); return MeanStdDevNormalization(definition, device_info);
} }
} // namespace cl } // namespace cl

View File

@ -16,6 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/operations.h"
#include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/status.h"
@ -28,7 +29,8 @@ namespace cl {
// Implements tensor_utils::MeanStddevNormalization // Implements tensor_utils::MeanStddevNormalization
class MeanStdDevNormalization : public GPUOperation { class MeanStdDevNormalization : public GPUOperation {
public: public:
explicit MeanStdDevNormalization(const OperationDef& definition); explicit MeanStdDevNormalization(const OperationDef& definition,
const DeviceInfo& device_info);
void GetPossibleKernelWorkGroups( void GetPossibleKernelWorkGroups(
TuningType tuning_type, const DeviceInfo& device_info, TuningType tuning_type, const DeviceInfo& device_info,
@ -50,7 +52,7 @@ class MeanStdDevNormalization : public GPUOperation {
}; };
MeanStdDevNormalization CreateMeanStdDevNormalization( MeanStdDevNormalization CreateMeanStdDevNormalization(
const OperationDef& definition); const OperationDef& definition, const DeviceInfo& device_info);
} // namespace cl } // namespace cl
} // namespace gpu } // namespace gpu

View File

@ -262,7 +262,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
return SelectMean(attr, op_def, creation_context.device->info_, gpu_op); return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
} }
case OperationType::MEAN_STDDEV_NORMALIZATION: { case OperationType::MEAN_STDDEV_NORMALIZATION: {
MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def); MeanStdDevNormalization operation =
CreateMeanStdDevNormalization(op_def, creation_context.device->info_);
*gpu_op = *gpu_op =
absl::make_unique<MeanStdDevNormalization>(std::move(operation)); absl::make_unique<MeanStdDevNormalization>(std::move(operation));
return absl::OkStatus(); return absl::OkStatus();