Added support of subgroups on Intel with CL 1.2 and necessary extension.

PiperOrigin-RevId: 334412963
Change-Id: I225df7f2591277b923eba64cec8c0febf4ad8078
This commit is contained in:
Raman Sarokin 2020-09-29 10:44:10 -07:00 committed by TensorFlower Gardener
parent 195369c5a0
commit 557b5a8e13

View File

@ -230,7 +230,7 @@ void ConvPowerVR::GenerateCode(const DeviceInfo& device_info) {
device_info.IsPowerVR()) {
compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
}
if (conv_params_.IsPrivateMemBroadcast()) {
if (conv_params_.IsPrivateMemBroadcast() && device_info.IsCL20OrHigher()) {
compiler_options_.push_back(CompilerOptions::CL_2_0);
}
}
@ -453,6 +453,8 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info,
if (use_simd_broadcast) {
if (device_info.cl_version == OpenCLVersion::CL_2_0) {
c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
} else if (device_info.SupportsExtension("cl_intel_subgroups")) {
c += "#pragma OPENCL EXTENSION cl_intel_subgroups : enable\n";
}
}
const int4 block_size = conv_params.block_size;
@ -1213,10 +1215,12 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
conv_params.block_size = int4(1, 1, 1, 4);
conv_params.src_depth_loop_size = 1;
int sub_group_size = 16;
const bool supports_subgroups =
device_info.SupportsExtension("cl_khr_subgroups") ||
device_info.SupportsExtension("cl_intel_subgroups");
if (definition.precision != CalculationsPrecision::F32_F16 &&
device_info.SupportsExtension("cl_khr_subgroups") &&
supports_subgroups &&
device_info.SupportsExtension("cl_intel_required_subgroup_size") &&
device_info.IsCL20OrHigher() &&
device_info.SupportsSubGroupWithSize(sub_group_size)) {
conv_params.weights_upload_type =
WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;