Added OpenClInfo. Added methods to GpuInfo instead of direct field accesses.

PiperOrigin-RevId: 343511854
Change-Id: I1db674bf181b909bbb426863494ab28b13ef0c0f
This commit is contained in:
Raman Sarokin 2020-11-20 09:56:16 -08:00 committed by TensorFlower Gardener
parent 0eae6e3b96
commit 7c29278eed
15 changed files with 236 additions and 195 deletions

View File

@ -54,29 +54,29 @@ void AddSupportedImageFormats(cl_context context, GpuInfo* info) {
auto supported_formats =
GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
for (auto format : supported_formats) {
info->supports_r_f16_tex2d =
info->supports_r_f16_tex2d ||
info->opencl_info.supports_r_f16_tex2d =
info->opencl_info.supports_r_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 1);
info->supports_rg_f16_tex2d =
info->supports_rg_f16_tex2d ||
info->opencl_info.supports_rg_f16_tex2d =
info->opencl_info.supports_rg_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 2);
info->supports_rgb_f16_tex2d =
info->supports_rgb_f16_tex2d ||
info->opencl_info.supports_rgb_f16_tex2d =
info->opencl_info.supports_rgb_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 3);
info->supports_rgba_f16_tex2d =
info->supports_rgba_f16_tex2d ||
info->opencl_info.supports_rgba_f16_tex2d =
info->opencl_info.supports_rgba_f16_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT16, 4);
info->supports_r_f32_tex2d =
info->supports_r_f32_tex2d ||
info->opencl_info.supports_r_f32_tex2d =
info->opencl_info.supports_r_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 1);
info->supports_rg_f32_tex2d =
info->supports_rg_f32_tex2d ||
info->opencl_info.supports_rg_f32_tex2d =
info->opencl_info.supports_rg_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 2);
info->supports_rgb_f32_tex2d =
info->supports_rgb_f32_tex2d ||
info->opencl_info.supports_rgb_f32_tex2d =
info->opencl_info.supports_rgb_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 3);
info->supports_rgba_f32_tex2d =
info->supports_rgba_f32_tex2d ||
info->opencl_info.supports_rgba_f32_tex2d =
info->opencl_info.supports_rgba_f32_tex2d ||
IsEqualToImageFormat(format, DataType::FLOAT32, 4);
}
}
@ -148,7 +148,7 @@ absl::Status CreateCLGLContext(const CLDevice& device,
cl_context_properties egl_context,
cl_context_properties egl_display,
CLContext* result) {
if (!device.SupportsExtension("cl_khr_gl_sharing")) {
if (!device.GetInfo().SupportsExtension("cl_khr_gl_sharing")) {
return absl::UnavailableError("Device doesn't support CL-GL sharing.");
}
cl_context_properties platform =

View File

@ -169,80 +169,80 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
info.mali_info = MaliInfo(device_name);
}
info.opencl_info.cl_version = ParseCLVersion(opencl_c_version);
info.extensions =
info.opencl_info.extensions =
absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
info.supports_fp16 = false;
info.supports_image3d_writes = false;
for (const auto& ext : info.extensions) {
info.opencl_info.supports_fp16 = false;
info.opencl_info.supports_image3d_writes = false;
for (const auto& ext : info.opencl_info.extensions) {
if (ext == "cl_khr_fp16") {
info.supports_fp16 = true;
info.opencl_info.supports_fp16 = true;
}
if (ext == "cl_khr_3d_image_writes") {
info.supports_image3d_writes = true;
info.opencl_info.supports_image3d_writes = true;
}
}
cl_device_fp_config f32_config =
GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
if (info.supports_fp16) {
if (info.opencl_info.supports_fp16) {
cl_device_fp_config f16_config;
auto status = GetDeviceInfo<cl_device_fp_config>(
id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
// AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
if (status.ok() && !info.IsAMD()) {
info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
} else { // happens on PowerVR
f16_config = f32_config;
info.supports_fp16_rtn = info.supports_fp32_rtn;
info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
}
} else {
info.supports_fp16_rtn = false;
info.opencl_info.supports_fp16_rtn = false;
}
if (info.IsPowerVR() && !info.supports_fp16) {
if (info.IsPowerVR() && !info.opencl_info.supports_fp16) {
// PowerVR doesn't have full support of fp16 and so doesn't list this
// extension. But it can support fp16 in MADs and as buffers/textures types,
// so we will use it.
info.supports_fp16 = true;
info.supports_fp16_rtn = info.supports_fp32_rtn;
info.opencl_info.supports_fp16 = true;
info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
}
if (!info.supports_image3d_writes &&
if (!info.opencl_info.supports_image3d_writes &&
((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) ||
info.IsNvidia())) {
// in local tests Adreno 430 can write in image 3d, at least on small sizes,
// but it doesn't have cl_khr_3d_image_writes in list of available
// extensions
// The same for NVidia
info.supports_image3d_writes = true;
info.opencl_info.supports_image3d_writes = true;
}
info.compute_units_count =
info.opencl_info.compute_units_count =
GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
info.image2d_max_width =
info.opencl_info.image2d_max_width =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
info.image2d_max_height =
info.opencl_info.image2d_max_height =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
info.buffer_max_size =
info.opencl_info.buffer_max_size =
GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) {
info.image_buffer_max_size =
info.opencl_info.image_buffer_max_size =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
info.image_array_max_layers =
info.opencl_info.image_array_max_layers =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
}
info.image3d_max_width =
info.opencl_info.image3d_max_width =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
info.image3d_max_height =
info.opencl_info.image3d_max_height =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
info.image3d_max_depth =
info.opencl_info.image3d_max_depth =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
int3 max_work_group_sizes;
GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
info.max_work_group_size_x = max_work_group_sizes.x;
info.max_work_group_size_y = max_work_group_sizes.y;
info.max_work_group_size_z = max_work_group_sizes.z;
info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
if (info.IsIntel()) {
if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
@ -300,48 +300,10 @@ CLDevice& CLDevice::operator=(CLDevice&& device) {
return *this;
}
bool CLDevice::SupportsFP16() const { return info_.supports_fp16; }
bool CLDevice::SupportsExtension(const std::string& extension) const {
return info_.SupportsExtension(extension);
}
bool CLDevice::SupportsTextureArray() const {
return info_.SupportsTextureArray();
}
bool CLDevice::SupportsImageBuffer() const {
return info_.SupportsImageBuffer();
}
bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; }
bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; }
std::string CLDevice::GetPlatformVersion() const {
return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
}
bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); }
bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const {
return info_.SupportsSubGroupWithSize(sub_group_size);
}
bool CLDevice::IsAdreno() const { return info_.IsAdreno(); }
bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); }
bool CLDevice::IsNvidia() const { return info_.IsNvidia(); }
bool CLDevice::IsMali() const { return info_.IsMali(); }
bool CLDevice::IsAMD() const { return info_.IsAMD(); }
bool CLDevice::IsIntel() const { return info_.IsIntel(); }
void CLDevice::DisableOneLayerTextureArray() {
info_.adreno_info.support_one_layer_texture_array = false;
}

View File

@ -46,23 +46,6 @@ class CLDevice {
cl_platform_id platform() const { return platform_id_; }
std::string GetPlatformVersion() const;
GpuVendor vendor() const { return info_.gpu_vendor; }
bool SupportsFP16() const;
bool SupportsTextureArray() const;
bool SupportsImageBuffer() const;
bool SupportsImage3D() const;
bool SupportsExtension(const std::string& extension) const;
bool SupportsFP32RTN() const;
bool SupportsFP16RTN() const;
bool IsCL20OrHigher() const;
bool SupportsSubGroupWithSize(int sub_group_size) const;
bool IsAdreno() const;
bool IsPowerVR() const;
bool IsNvidia() const;
bool IsMali() const;
bool IsAMD() const;
bool IsIntel() const;
// To track bug on some Adreno. b/131099086
void DisableOneLayerTextureArray();

View File

@ -301,6 +301,8 @@ bool MaliInfo::IsValhall() const {
gpu_version == MaliGpu::kG68 || gpu_version == MaliGpu::kG78;
}
bool GpuInfo::SupportsFP16() const { return opencl_info.supports_fp16; }
bool GpuInfo::SupportsTextureArray() const {
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
}
@ -314,29 +316,29 @@ bool GpuInfo::SupportsImage3D() const {
// On Mali T880 read_imageh doesn't compile with image3d_t
return false;
}
return supports_image3d_writes;
return opencl_info.supports_image3d_writes;
}
bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
if (channels == 1) {
return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d
: supports_r_f16_tex2d;
return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d
: opencl_info.supports_r_f16_tex2d;
} else if (channels == 2) {
return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d
: supports_rg_f16_tex2d;
return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d
: opencl_info.supports_rg_f16_tex2d;
} else if (channels == 3) {
return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d
: supports_rgb_f16_tex2d;
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgb_f32_tex2d
: opencl_info.supports_rgb_f16_tex2d;
} else if (channels == 4) {
return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d
: supports_rgba_f16_tex2d;
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgba_f32_tex2d
: opencl_info.supports_rgba_f16_tex2d;
} else {
return false;
}
}
bool GpuInfo::SupportsExtension(const std::string& extension) const {
for (const auto& ext : extensions) {
for (const auto& ext : opencl_info.extensions) {
if (ext == extension) {
return true;
}
@ -365,6 +367,58 @@ bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const {
return false;
}
int GpuInfo::GetComputeUnitsCount() const {
return opencl_info.compute_units_count;
}
bool GpuInfo::IsRoundToNearestSupported() const {
return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn;
}
int GpuInfo::GetMaxWorkGroupSizeForX() const {
return opencl_info.max_work_group_size_x;
}
int GpuInfo::GetMaxWorkGroupSizeForY() const {
return opencl_info.max_work_group_size_y;
}
int GpuInfo::GetMaxWorkGroupSizeForZ() const {
return opencl_info.max_work_group_size_z;
}
uint64_t GpuInfo::GetMaxImage2DWidth() const {
return opencl_info.image2d_max_width;
}
uint64_t GpuInfo::GetMaxImage2DHeight() const {
return opencl_info.image2d_max_height;
}
uint64_t GpuInfo::GetMaxImage3DWidth() const {
return opencl_info.image3d_max_width;
}
uint64_t GpuInfo::GetMaxImage3DHeight() const {
return opencl_info.image3d_max_height;
}
uint64_t GpuInfo::GetMaxImage3DDepth() const {
return opencl_info.image3d_max_depth;
}
uint64_t GpuInfo::GetMaxBufferSize() const {
return opencl_info.buffer_max_size;
}
uint64_t GpuInfo::GetMaxImageBufferWidth() const {
return opencl_info.image_buffer_max_size;
}
uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
return opencl_info.image_array_max_layers;
}
bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; }
bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; }

View File

@ -179,34 +179,10 @@ std::string OpenClVersionToString(OpenClVersion version);
struct OpenClInfo {
OpenClVersion cl_version;
};
struct GpuInfo {
GpuInfo() = default;
bool IsAdreno() const;
bool IsApple() const;
bool IsMali() const;
bool IsPowerVR() const;
bool IsNvidia() const;
bool IsAMD() const;
bool IsIntel() const;
bool SupportsTextureArray() const;
bool SupportsImageBuffer() const;
bool SupportsImage3D() const;
bool SupportsFloatImage2D(DataType data_type, int channels) const;
bool SupportsExtension(const std::string& extension) const;
bool IsCL20OrHigher() const;
bool IsCL30OrHigher() const;
bool SupportsSubGroupWithSize(int sub_group_size) const;
std::vector<std::string> extensions;
bool supports_fp16;
bool supports_image3d_writes;
GpuVendor gpu_vendor;
int compute_units_count;
uint64_t buffer_max_size;
uint64_t image2d_max_width;
@ -219,7 +195,6 @@ struct GpuInfo {
int max_work_group_size_x;
int max_work_group_size_y;
int max_work_group_size_z;
std::vector<int> supported_subgroup_sizes;
// rtn is ROUND_TO_NEAREST
// with rtn precision is much better then with rtz (ROUND_TO_ZERO)
@ -238,6 +213,54 @@ struct GpuInfo {
bool supports_rg_f32_tex2d = false;
bool supports_rgb_f32_tex2d = false;
bool supports_rgba_f32_tex2d = false;
};
struct GpuInfo {
GpuInfo() = default;
bool IsAdreno() const;
bool IsApple() const;
bool IsMali() const;
bool IsPowerVR() const;
bool IsNvidia() const;
bool IsAMD() const;
bool IsIntel() const;
bool SupportsFP16() const;
bool SupportsTextureArray() const;
bool SupportsImageBuffer() const;
bool SupportsImage3D() const;
bool SupportsFloatImage2D(DataType data_type, int channels) const;
bool SupportsExtension(const std::string& extension) const;
bool IsCL20OrHigher() const;
bool IsCL30OrHigher() const;
bool SupportsSubGroupWithSize(int sub_group_size) const;
int GetComputeUnitsCount() const;
// floating point rounding mode
bool IsRoundToNearestSupported() const;
int GetMaxWorkGroupSizeForX() const;
int GetMaxWorkGroupSizeForY() const;
int GetMaxWorkGroupSizeForZ() const;
uint64_t GetMaxImage2DWidth() const;
uint64_t GetMaxImage2DHeight() const;
uint64_t GetMaxImage3DWidth() const;
uint64_t GetMaxImage3DHeight() const;
uint64_t GetMaxImage3DDepth() const;
uint64_t GetMaxBufferSize() const;
uint64_t GetMaxImageBufferWidth() const;
uint64_t GetMaxImage2DArrayLayers() const;
std::vector<int> supported_subgroup_sizes;
GpuVendor gpu_vendor;
AdrenoInfo adreno_info;
MaliInfo mali_info;

View File

@ -48,6 +48,39 @@ absl::Status CreateEnvironment(Environment* result, bool shared,
return result->Init();
}
bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
TensorStorageType storage_type) {
switch (storage_type) {
case TensorStorageType::TEXTURE_2D:
return !gpu_info.IsAMD();
case TensorStorageType::BUFFER:
return true;
case TensorStorageType::TEXTURE_ARRAY:
return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
case TensorStorageType::IMAGE_BUFFER:
return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
gpu_info.SupportsImageBuffer();
case TensorStorageType::TEXTURE_3D:
return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
case TensorStorageType::SINGLE_TEXTURE_2D:
return false;
case TensorStorageType::UNKNOWN:
return false;
}
return false;
}
bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
CalculationsPrecision precision) {
switch (precision) {
case CalculationsPrecision::F32_F16:
case CalculationsPrecision::F16:
return gpu_info.SupportsFP16();
case CalculationsPrecision::F32:
return true;
}
}
} // namespace
Environment::Environment(CLDevice&& device, CLContext&& context,
@ -77,7 +110,8 @@ Environment& Environment::operator=(Environment&& environment) {
}
absl::Status Environment::Init() {
if (device().IsAdreno() && device().SupportsTextureArray()) {
if (device().GetInfo().IsAdreno() &&
device().GetInfo().SupportsTextureArray()) {
const auto& adreno_info = device().info_.adreno_info;
// Some Adreno < 600 have bug with one layer texture array. b/131099086
// If we have one layer texture array and will write smt from kernel to this
@ -117,13 +151,7 @@ std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
}
bool Environment::IsSupported(CalculationsPrecision precision) const {
switch (precision) {
case CalculationsPrecision::F32_F16:
case CalculationsPrecision::F16:
return device_.SupportsFP16();
case CalculationsPrecision::F32:
return true;
}
return IsGpuSupportsPrecision(device_.GetInfo(), precision);
}
std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
@ -153,24 +181,7 @@ Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
}
bool Environment::IsSupported(TensorStorageType storage_type) const {
switch (storage_type) {
case TensorStorageType::TEXTURE_2D:
return !device_.IsAMD();
case TensorStorageType::BUFFER:
return true;
case TensorStorageType::TEXTURE_ARRAY:
return !device_.IsAMD() && device_.SupportsTextureArray();
case TensorStorageType::IMAGE_BUFFER:
return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) &&
device_.SupportsImageBuffer();
case TensorStorageType::TEXTURE_3D:
return !device_.IsAMD() && device_.SupportsImage3D();
case TensorStorageType::SINGLE_TEXTURE_2D:
return false;
case TensorStorageType::UNKNOWN:
return false;
}
return false;
return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
}
TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {

View File

@ -89,7 +89,7 @@ absl::Status CreateClEventFromEglSync(cl_context context,
}
bool IsClEventFromEglSyncSupported(const CLDevice& device) {
return device.SupportsExtension("cl_khr_egl_event");
return device.GetInfo().SupportsExtension("cl_khr_egl_event");
}
absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
@ -126,7 +126,7 @@ absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
bool IsGlSharingSupported(const CLDevice& device) {
return clCreateFromGLBuffer && clCreateFromGLTexture &&
device.SupportsExtension("cl_khr_gl_sharing");
device.GetInfo().SupportsExtension("cl_khr_gl_sharing");
}
AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }

View File

@ -163,14 +163,14 @@ absl::Status InferenceContext::InitFromGraph(
ReserveGraphTensors(create_info, creation_context.GetGpuInfo(), graph);
precision_ = create_info.precision;
storage_type_ = create_info.storage_type;
if (env->device().IsMali()) {
if (env->device().GetInfo().IsMali()) {
need_flush_ = true;
need_manual_release_ = true;
flush_periodically_ = true;
flush_period_ = 24;
}
if (env->device().IsPowerVR()) {
if (env->device().GetInfo().IsPowerVR()) {
need_flush_ = true;
}
CopyInAndOutIds(graph);

View File

@ -139,7 +139,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const GpuInfo& gpu_info,
conv_params.element_size = 4;
conv_params.block_size = int3(1, 1, 1);
if (gpu_info.IsMali() && definition.precision == CalculationsPrecision::F16 &&
gpu_info.compute_units_count <= 4) {
gpu_info.GetComputeUnitsCount() <= 4) {
conv_params.block_size.x *= 2;
}
return conv_params;

View File

@ -1045,7 +1045,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
if (dst_shape) {
int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
float task_size_per_cu =
static_cast<float>(task_size) / gpu_info.compute_units_count;
static_cast<float>(task_size) / gpu_info.GetComputeUnitsCount();
int block_size = conv_params.block_size.x * conv_params.block_size.y *
conv_params.block_size.w;
float threads_per_cu = task_size_per_cu / block_size;

View File

@ -95,7 +95,7 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition,
// For now, fix workgroup size to the biggest supported by the device, but not
// larger than the number of tensor slices.
int desired_work_group_size =
std::min(tensor_slices, gpu_info.max_work_group_size_x);
std::min(tensor_slices, gpu_info.GetMaxWorkGroupSizeForX());
if (gpu_info.IsMali()) {
// Don't use more than 64 work items per work group on ARM Mali. They
// implement local memory using the global memory, larger workgroups have

View File

@ -118,7 +118,7 @@ int GetRecommendedBlockSizeForConv(const GpuInfo& gpu_info,
CalculationsPrecision precision,
int task_size) {
const float task_size_per_cu =
task_size / static_cast<float>(gpu_info.compute_units_count);
task_size / static_cast<float>(gpu_info.GetComputeUnitsCount());
int block_size = 1;
float threshold_1 = FLT_MAX;
float threshold_2 = FLT_MAX;

View File

@ -78,9 +78,13 @@ TEST_F(OpenCLOperationTest, Winograd4x4To36) {
for (auto precision : env_.GetSupportedPrecisions()) {
float eps;
if (precision == CalculationsPrecision::F32) {
eps = 1e-5f * (env_.device().SupportsFP32RTN() ? 1.0f : 4.0f);
eps = 1e-5f * (env_.device().GetInfo().opencl_info.supports_fp32_rtn
? 1.0f
: 4.0f);
} else {
eps = 1e-2f * (env_.device().SupportsFP16RTN() ? 1.0f : 4.0f);
eps = 1e-2f * (env_.device().GetInfo().opencl_info.supports_fp16_rtn
? 1.0f
: 4.0f);
}
OperationDef op_def;
op_def.precision = precision;
@ -151,9 +155,13 @@ TEST_F(OpenCLOperationTest, Winograd36To4x4) {
for (auto precision : env_.GetSupportedPrecisions()) {
float eps;
if (precision == CalculationsPrecision::F32) {
eps = 1e-5f * (env_.device().SupportsFP32RTN() ? 1.0f : 4.0f);
eps = 1e-5f * (env_.device().GetInfo().opencl_info.supports_fp32_rtn
? 1.0f
: 4.0f);
} else {
eps = 1e-2f * (env_.device().SupportsFP16RTN() ? 1.0f : 4.0f);
eps = 1e-2f * (env_.device().GetInfo().opencl_info.supports_fp16_rtn
? 1.0f
: 4.0f);
}
OperationDef op_def;
op_def.precision = precision;

View File

@ -52,9 +52,9 @@ std::vector<int3> GenerateWorkGroupSizesXYMultipleOf(
if (work_group_size_xy * z > kernel_info.max_work_group_size) {
continue;
}
if (x <= gpu_info.max_work_group_size_x &&
y <= gpu_info.max_work_group_size_y &&
z <= gpu_info.max_work_group_size_z) {
if (x <= gpu_info.GetMaxWorkGroupSizeForX() &&
y <= gpu_info.GetMaxWorkGroupSizeForY() &&
z <= gpu_info.GetMaxWorkGroupSizeForZ()) {
work_groups.push_back({x, y, z});
}
}
@ -78,9 +78,9 @@ std::vector<int3> GenerateWorkGroupSizesXMultipleOf(
x += multiplier) {
for (auto y : possible_y_sizes) {
for (auto z : possible_z_sizes) {
if (x <= gpu_info.max_work_group_size_x &&
y <= gpu_info.max_work_group_size_y &&
z <= gpu_info.max_work_group_size_z &&
if (x <= gpu_info.GetMaxWorkGroupSizeForX() &&
y <= gpu_info.GetMaxWorkGroupSizeForY() &&
z <= gpu_info.GetMaxWorkGroupSizeForZ() &&
x * y * z <= kernel_info.max_work_group_size) {
work_groups.push_back({x, y, z});
}
@ -94,9 +94,9 @@ void GetWorkGroupsAlignedToGrid(const GpuInfo& gpu_info,
const KernelInfo& kernel_info, const int3& grid,
std::vector<int3>* work_groups) {
int3 max_wg_size;
max_wg_size.x = gpu_info.max_work_group_size_x;
max_wg_size.y = gpu_info.max_work_group_size_y;
max_wg_size.z = gpu_info.max_work_group_size_z;
max_wg_size.x = gpu_info.GetMaxWorkGroupSizeForX();
max_wg_size.y = gpu_info.GetMaxWorkGroupSizeForY();
max_wg_size.z = gpu_info.GetMaxWorkGroupSizeForZ();
GenerateWorkGroupSizesAlignedToGrid(
grid, max_wg_size, kernel_info.max_work_group_size, work_groups);
}
@ -275,7 +275,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info,
if (gpu_info.IsAdreno()) {
max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64;
}
max_z_size = std::min(max_z_size, gpu_info.max_work_group_size_z);
max_z_size = std::min(max_z_size, gpu_info.GetMaxWorkGroupSizeForZ());
work_groups->push_back(
GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
return;

View File

@ -33,11 +33,11 @@ bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape,
4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
const int buffer_size =
shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
return buffer_size <= gpu_info.buffer_max_size;
return buffer_size <= gpu_info.GetMaxBufferSize();
}
case TensorStorageType::IMAGE_BUFFER:
return shape.b * shape.w * shape.h * shape.d * slices <=
gpu_info.image_buffer_max_size;
gpu_info.GetMaxImageBufferWidth();
case TensorStorageType::TEXTURE_3D:
if (gpu_info.opencl_info.cl_version < OpenClVersion::kCl1_2 &&
slices == 1) {
@ -45,26 +45,26 @@ bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape,
// depth = 1 by specification;
return false;
}
return shape.w * shape.b <= gpu_info.image3d_max_width &&
shape.h <= gpu_info.image3d_max_height &&
slices * shape.d <= gpu_info.image3d_max_depth;
return shape.w * shape.b <= gpu_info.GetMaxImage3DWidth() &&
shape.h <= gpu_info.GetMaxImage3DHeight() &&
slices * shape.d <= gpu_info.GetMaxImage3DDepth();
case TensorStorageType::TEXTURE_ARRAY:
// Bug on some Adreno. b/131099086
if (slices == 1 && gpu_info.IsAdreno() &&
!gpu_info.adreno_info.support_one_layer_texture_array) {
return false;
}
return shape.w * shape.b <= gpu_info.image2d_max_width &&
shape.h <= gpu_info.image2d_max_height &&
slices * shape.d <= gpu_info.image_array_max_layers;
return shape.w * shape.b <= gpu_info.GetMaxImage2DWidth() &&
shape.h <= gpu_info.GetMaxImage2DHeight() &&
slices * shape.d <= gpu_info.GetMaxImage2DArrayLayers();
case TensorStorageType::TEXTURE_2D:
return shape.w * shape.b * shape.d <= gpu_info.image2d_max_width &&
shape.h * slices <= gpu_info.image2d_max_height;
return shape.w * shape.b * shape.d <= gpu_info.GetMaxImage2DWidth() &&
shape.h * slices <= gpu_info.GetMaxImage2DHeight();
case TensorStorageType::SINGLE_TEXTURE_2D:
return shape.c <= 4 &&
gpu_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
shape.w * shape.b * shape.d <= gpu_info.image2d_max_width &&
shape.h <= gpu_info.image2d_max_height;
shape.w * shape.b * shape.d <= gpu_info.GetMaxImage2DWidth() &&
shape.h <= gpu_info.GetMaxImage2DHeight();
default:
return false;
}