Choosing better setting in convolution for Intel.
PiperOrigin-RevId: 316903965 Change-Id: I9ff6c2a5026059011b5ccf7beddb8111b419ff8d
This commit is contained in:
parent
225bdf60f3
commit
51ccd6911b
@ -495,6 +495,12 @@ std::string CLDevice::GetPlatformVersion() const {
|
||||
return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
|
||||
}
|
||||
|
||||
bool CLDevice::IsCL20OrHigher() const {
|
||||
return info_.cl_version != OpenCLVersion::CL_1_0 &&
|
||||
info_.cl_version != OpenCLVersion::CL_1_1 &&
|
||||
info_.cl_version != OpenCLVersion::CL_1_2;
|
||||
}
|
||||
|
||||
bool CLDevice::IsAdreno() const { return info_.vendor == Vendor::QUALCOMM; }
|
||||
|
||||
bool CLDevice::IsAdreno3xx() const {
|
||||
|
@ -178,6 +178,7 @@ class CLDevice {
|
||||
bool SupportsExtension(const std::string& extension) const;
|
||||
bool SupportsFP32RTN() const;
|
||||
bool SupportsFP16RTN() const;
|
||||
bool IsCL20OrHigher() const;
|
||||
bool IsAdreno() const;
|
||||
bool IsAdreno3xx() const;
|
||||
bool IsAdreno4xx() const;
|
||||
|
@ -859,12 +859,27 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
|
||||
conv_params.src_depth_loop_size = 1;
|
||||
conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
|
||||
} else if (device.IsIntel()) {
|
||||
if (different_weights_for_height) {
|
||||
conv_params.work_group_size = int3(16, 1, 1);
|
||||
conv_params.work_group_launch_order = int3(0, 1, 2);
|
||||
conv_params.fixed_work_group_size = true;
|
||||
} else {
|
||||
conv_params.linear_hw = true;
|
||||
conv_params.work_group_size = int3(16, 1, 1);
|
||||
conv_params.work_group_launch_order = int3(0, 1, 2);
|
||||
conv_params.fixed_work_group_size = true;
|
||||
}
|
||||
conv_params.block_size = int3(1, 1, 4);
|
||||
conv_params.work_group_size = int3(8, 2, 1);
|
||||
conv_params.work_group_launch_order = int3(0, 1, 2);
|
||||
conv_params.fixed_work_group_size = true;
|
||||
conv_params.src_depth_loop_size = 1;
|
||||
conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
|
||||
if (definition.precision != CalculationsPrecision::F32_F16 &&
|
||||
device.SupportsExtension("cl_khr_subgroups") &&
|
||||
device.SupportsExtension("cl_intel_required_subgroup_size") &&
|
||||
device.IsCL20OrHigher()) {
|
||||
conv_params.weights_upload_type =
|
||||
WeightsUploadType::PRIVATE_MEM_SIMD16_BROADCAST;
|
||||
} else {
|
||||
conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
|
||||
}
|
||||
if (dst_depth % 4 == 0 || dst_depth >= 8) {
|
||||
conv_params.block_size.z = 4;
|
||||
} else if (dst_depth % 2 == 0 || dst_depth >= 4) {
|
||||
|
Loading…
Reference in New Issue
Block a user