Choosing better setting in convolution for Intel.

PiperOrigin-RevId: 316903965
Change-Id: I9ff6c2a5026059011b5ccf7beddb8111b419ff8d
This commit is contained in:
Raman Sarokin 2020-06-17 09:31:04 -07:00 committed by TensorFlower Gardener
parent 225bdf60f3
commit 51ccd6911b
3 changed files with 26 additions and 4 deletions

View File

@ -495,6 +495,12 @@ std::string CLDevice::GetPlatformVersion() const {
return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
}
bool CLDevice::IsCL20OrHigher() const {
return info_.cl_version != OpenCLVersion::CL_1_0 &&
info_.cl_version != OpenCLVersion::CL_1_1 &&
info_.cl_version != OpenCLVersion::CL_1_2;
}
bool CLDevice::IsAdreno() const { return info_.vendor == Vendor::QUALCOMM; }
bool CLDevice::IsAdreno3xx() const {

View File

@ -178,6 +178,7 @@ class CLDevice {
bool SupportsExtension(const std::string& extension) const;
bool SupportsFP32RTN() const;
bool SupportsFP16RTN() const;
bool IsCL20OrHigher() const;
bool IsAdreno() const;
bool IsAdreno3xx() const;
bool IsAdreno4xx() const;

View File

@ -859,12 +859,27 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
conv_params.src_depth_loop_size = 1;
conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
} else if (device.IsIntel()) {
if (different_weights_for_height) {
conv_params.work_group_size = int3(16, 1, 1);
conv_params.work_group_launch_order = int3(0, 1, 2);
conv_params.fixed_work_group_size = true;
} else {
conv_params.linear_hw = true;
conv_params.work_group_size = int3(16, 1, 1);
conv_params.work_group_launch_order = int3(0, 1, 2);
conv_params.fixed_work_group_size = true;
}
conv_params.block_size = int3(1, 1, 4);
conv_params.work_group_size = int3(8, 2, 1);
conv_params.work_group_launch_order = int3(0, 1, 2);
conv_params.fixed_work_group_size = true;
conv_params.src_depth_loop_size = 1;
conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
if (definition.precision != CalculationsPrecision::F32_F16 &&
device.SupportsExtension("cl_khr_subgroups") &&
device.SupportsExtension("cl_intel_required_subgroup_size") &&
device.IsCL20OrHigher()) {
conv_params.weights_upload_type =
WeightsUploadType::PRIVATE_MEM_SIMD16_BROADCAST;
} else {
conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
}
if (dst_depth % 4 == 0 || dst_depth >= 8) {
conv_params.block_size.z = 4;
} else if (dst_depth % 2 == 0 || dst_depth >= 4) {