Enable OpenCL 2.0 or 3.0 compilation when the device supports it.
By default OpenCL programs are compiled as 1.x only. PiperOrigin-RevId: 327300390 Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147
This commit is contained in:
parent
c1a32fd496
commit
1219f682f7
@ -95,6 +95,8 @@ std::string CompilerOptionToString(const CLDevice& device,
|
|||||||
return "-cl-opt-disable";
|
return "-cl-opt-disable";
|
||||||
case CompilerOptions::CL_2_0:
|
case CompilerOptions::CL_2_0:
|
||||||
return "-cl-std=CL2.0";
|
return "-cl-std=CL2.0";
|
||||||
|
case CompilerOptions::CL_3_0:
|
||||||
|
return "-cl-std=CL3.0";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,8 @@ enum class CompilerOptions {
|
|||||||
ADRENO_MORE_WAVES,
|
ADRENO_MORE_WAVES,
|
||||||
POWERVR_FP16,
|
POWERVR_FP16,
|
||||||
CL_OPT_DISABLE,
|
CL_OPT_DISABLE,
|
||||||
CL_2_0
|
CL_2_0,
|
||||||
|
CL_3_0,
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string CompilerOptionsToString(
|
std::string CompilerOptionsToString(
|
||||||
|
@ -774,6 +774,8 @@ cc_library(
|
|||||||
":gpu_operation",
|
":gpu_operation",
|
||||||
":util",
|
":util",
|
||||||
":work_group_picking",
|
":work_group_picking",
|
||||||
|
"//tensorflow/lite/delegates/gpu/cl:cl_program",
|
||||||
|
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||||
"//tensorflow/lite/delegates/gpu/cl:precision",
|
"//tensorflow/lite/delegates/gpu/cl:precision",
|
||||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||||
"//tensorflow/lite/delegates/gpu/common:status",
|
"//tensorflow/lite/delegates/gpu/common:status",
|
||||||
|
@ -17,6 +17,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
#include "tensorflow/lite/delegates/gpu/cl/precision.h"
|
||||||
@ -64,7 +66,8 @@ static inline float local_reduce(float input, __local float* tmp) {
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition)
|
MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition,
|
||||||
|
const DeviceInfo& device_info)
|
||||||
: GPUOperation(definition) {
|
: GPUOperation(definition) {
|
||||||
// The kernel code does not inherently need a fixed size, but in order to not
|
// The kernel code does not inherently need a fixed size, but in order to not
|
||||||
// hardcode the __local array's size for the reductions, we would need to pass
|
// hardcode the __local array's size for the reductions, we would need to pass
|
||||||
@ -74,6 +77,11 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition)
|
|||||||
work_group_size_.y = 1; // Required
|
work_group_size_.y = 1; // Required
|
||||||
work_group_size_.z = 1; // Required
|
work_group_size_.z = 1; // Required
|
||||||
code_ = GetNormalizationCode();
|
code_ = GetNormalizationCode();
|
||||||
|
if (device_info.cl_version >= OpenCLVersion::CL_3_0) {
|
||||||
|
compiler_options_.push_back(CompilerOptions::CL_3_0);
|
||||||
|
} else if (device_info.cl_version >= OpenCLVersion::CL_2_0) {
|
||||||
|
compiler_options_.push_back(CompilerOptions::CL_2_0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string MeanStdDevNormalization::GetNormalizationCode() {
|
std::string MeanStdDevNormalization::GetNormalizationCode() {
|
||||||
@ -145,8 +153,8 @@ int3 MeanStdDevNormalization::GetGridSize() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MeanStdDevNormalization CreateMeanStdDevNormalization(
|
MeanStdDevNormalization CreateMeanStdDevNormalization(
|
||||||
const OperationDef& definition) {
|
const OperationDef& definition, const DeviceInfo& device_info) {
|
||||||
return MeanStdDevNormalization(definition);
|
return MeanStdDevNormalization(definition, device_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
|
@ -16,6 +16,7 @@ limitations under the License.
|
|||||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
|
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
|
||||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
|
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
|
||||||
|
|
||||||
|
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||||
@ -28,7 +29,8 @@ namespace cl {
|
|||||||
// Implements tensor_utils::MeanStddevNormalization
|
// Implements tensor_utils::MeanStddevNormalization
|
||||||
class MeanStdDevNormalization : public GPUOperation {
|
class MeanStdDevNormalization : public GPUOperation {
|
||||||
public:
|
public:
|
||||||
explicit MeanStdDevNormalization(const OperationDef& definition);
|
explicit MeanStdDevNormalization(const OperationDef& definition,
|
||||||
|
const DeviceInfo& device_info);
|
||||||
|
|
||||||
void GetPossibleKernelWorkGroups(
|
void GetPossibleKernelWorkGroups(
|
||||||
TuningType tuning_type, const DeviceInfo& device_info,
|
TuningType tuning_type, const DeviceInfo& device_info,
|
||||||
@ -50,7 +52,7 @@ class MeanStdDevNormalization : public GPUOperation {
|
|||||||
};
|
};
|
||||||
|
|
||||||
MeanStdDevNormalization CreateMeanStdDevNormalization(
|
MeanStdDevNormalization CreateMeanStdDevNormalization(
|
||||||
const OperationDef& definition);
|
const OperationDef& definition, const DeviceInfo& device_info);
|
||||||
|
|
||||||
} // namespace cl
|
} // namespace cl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -262,7 +262,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
|
|||||||
return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
|
return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
|
||||||
}
|
}
|
||||||
case OperationType::MEAN_STDDEV_NORMALIZATION: {
|
case OperationType::MEAN_STDDEV_NORMALIZATION: {
|
||||||
MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def);
|
MeanStdDevNormalization operation =
|
||||||
|
CreateMeanStdDevNormalization(op_def, creation_context.device->info_);
|
||||||
*gpu_op =
|
*gpu_op =
|
||||||
absl::make_unique<MeanStdDevNormalization>(std::move(operation));
|
absl::make_unique<MeanStdDevNormalization>(std::move(operation));
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
|
Loading…
Reference in New Issue
Block a user