Added OpenClInfo. Added methods to GpuInfo instead of direct field accesses.
PiperOrigin-RevId: 343511854 Change-Id: I1db674bf181b909bbb426863494ab28b13ef0c0f
This commit is contained in:
parent
0eae6e3b96
commit
7c29278eed
@ -54,29 +54,29 @@ void AddSupportedImageFormats(cl_context context, GpuInfo* info) {
|
||||
auto supported_formats =
|
||||
GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
|
||||
for (auto format : supported_formats) {
|
||||
info->supports_r_f16_tex2d =
|
||||
info->supports_r_f16_tex2d ||
|
||||
info->opencl_info.supports_r_f16_tex2d =
|
||||
info->opencl_info.supports_r_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 1);
|
||||
info->supports_rg_f16_tex2d =
|
||||
info->supports_rg_f16_tex2d ||
|
||||
info->opencl_info.supports_rg_f16_tex2d =
|
||||
info->opencl_info.supports_rg_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 2);
|
||||
info->supports_rgb_f16_tex2d =
|
||||
info->supports_rgb_f16_tex2d ||
|
||||
info->opencl_info.supports_rgb_f16_tex2d =
|
||||
info->opencl_info.supports_rgb_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 3);
|
||||
info->supports_rgba_f16_tex2d =
|
||||
info->supports_rgba_f16_tex2d ||
|
||||
info->opencl_info.supports_rgba_f16_tex2d =
|
||||
info->opencl_info.supports_rgba_f16_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT16, 4);
|
||||
info->supports_r_f32_tex2d =
|
||||
info->supports_r_f32_tex2d ||
|
||||
info->opencl_info.supports_r_f32_tex2d =
|
||||
info->opencl_info.supports_r_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 1);
|
||||
info->supports_rg_f32_tex2d =
|
||||
info->supports_rg_f32_tex2d ||
|
||||
info->opencl_info.supports_rg_f32_tex2d =
|
||||
info->opencl_info.supports_rg_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 2);
|
||||
info->supports_rgb_f32_tex2d =
|
||||
info->supports_rgb_f32_tex2d ||
|
||||
info->opencl_info.supports_rgb_f32_tex2d =
|
||||
info->opencl_info.supports_rgb_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 3);
|
||||
info->supports_rgba_f32_tex2d =
|
||||
info->supports_rgba_f32_tex2d ||
|
||||
info->opencl_info.supports_rgba_f32_tex2d =
|
||||
info->opencl_info.supports_rgba_f32_tex2d ||
|
||||
IsEqualToImageFormat(format, DataType::FLOAT32, 4);
|
||||
}
|
||||
}
|
||||
@ -148,7 +148,7 @@ absl::Status CreateCLGLContext(const CLDevice& device,
|
||||
cl_context_properties egl_context,
|
||||
cl_context_properties egl_display,
|
||||
CLContext* result) {
|
||||
if (!device.SupportsExtension("cl_khr_gl_sharing")) {
|
||||
if (!device.GetInfo().SupportsExtension("cl_khr_gl_sharing")) {
|
||||
return absl::UnavailableError("Device doesn't support CL-GL sharing.");
|
||||
}
|
||||
cl_context_properties platform =
|
||||
|
@ -169,80 +169,80 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
|
||||
info.mali_info = MaliInfo(device_name);
|
||||
}
|
||||
info.opencl_info.cl_version = ParseCLVersion(opencl_c_version);
|
||||
info.extensions =
|
||||
info.opencl_info.extensions =
|
||||
absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
|
||||
info.supports_fp16 = false;
|
||||
info.supports_image3d_writes = false;
|
||||
for (const auto& ext : info.extensions) {
|
||||
info.opencl_info.supports_fp16 = false;
|
||||
info.opencl_info.supports_image3d_writes = false;
|
||||
for (const auto& ext : info.opencl_info.extensions) {
|
||||
if (ext == "cl_khr_fp16") {
|
||||
info.supports_fp16 = true;
|
||||
info.opencl_info.supports_fp16 = true;
|
||||
}
|
||||
if (ext == "cl_khr_3d_image_writes") {
|
||||
info.supports_image3d_writes = true;
|
||||
info.opencl_info.supports_image3d_writes = true;
|
||||
}
|
||||
}
|
||||
|
||||
cl_device_fp_config f32_config =
|
||||
GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
|
||||
info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
|
||||
info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
|
||||
|
||||
if (info.supports_fp16) {
|
||||
if (info.opencl_info.supports_fp16) {
|
||||
cl_device_fp_config f16_config;
|
||||
auto status = GetDeviceInfo<cl_device_fp_config>(
|
||||
id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
|
||||
// AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
|
||||
if (status.ok() && !info.IsAMD()) {
|
||||
info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
|
||||
info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
|
||||
} else { // happens on PowerVR
|
||||
f16_config = f32_config;
|
||||
info.supports_fp16_rtn = info.supports_fp32_rtn;
|
||||
info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
|
||||
}
|
||||
} else {
|
||||
info.supports_fp16_rtn = false;
|
||||
info.opencl_info.supports_fp16_rtn = false;
|
||||
}
|
||||
|
||||
if (info.IsPowerVR() && !info.supports_fp16) {
|
||||
if (info.IsPowerVR() && !info.opencl_info.supports_fp16) {
|
||||
// PowerVR doesn't have full support of fp16 and so doesn't list this
|
||||
// extension. But it can support fp16 in MADs and as buffers/textures types,
|
||||
// so we will use it.
|
||||
info.supports_fp16 = true;
|
||||
info.supports_fp16_rtn = info.supports_fp32_rtn;
|
||||
info.opencl_info.supports_fp16 = true;
|
||||
info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
|
||||
}
|
||||
|
||||
if (!info.supports_image3d_writes &&
|
||||
if (!info.opencl_info.supports_image3d_writes &&
|
||||
((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) ||
|
||||
info.IsNvidia())) {
|
||||
// in local tests Adreno 430 can write in image 3d, at least on small sizes,
|
||||
// but it doesn't have cl_khr_3d_image_writes in list of available
|
||||
// extensions
|
||||
// The same for NVidia
|
||||
info.supports_image3d_writes = true;
|
||||
info.opencl_info.supports_image3d_writes = true;
|
||||
}
|
||||
info.compute_units_count =
|
||||
info.opencl_info.compute_units_count =
|
||||
GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
|
||||
info.image2d_max_width =
|
||||
info.opencl_info.image2d_max_width =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
|
||||
info.image2d_max_height =
|
||||
info.opencl_info.image2d_max_height =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
|
||||
info.buffer_max_size =
|
||||
info.opencl_info.buffer_max_size =
|
||||
GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
|
||||
if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) {
|
||||
info.image_buffer_max_size =
|
||||
info.opencl_info.image_buffer_max_size =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
|
||||
info.image_array_max_layers =
|
||||
info.opencl_info.image_array_max_layers =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
|
||||
}
|
||||
info.image3d_max_width =
|
||||
info.opencl_info.image3d_max_width =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
|
||||
info.image3d_max_height =
|
||||
info.opencl_info.image3d_max_height =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
|
||||
info.image3d_max_depth =
|
||||
info.opencl_info.image3d_max_depth =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
|
||||
int3 max_work_group_sizes;
|
||||
GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
|
||||
info.max_work_group_size_x = max_work_group_sizes.x;
|
||||
info.max_work_group_size_y = max_work_group_sizes.y;
|
||||
info.max_work_group_size_z = max_work_group_sizes.z;
|
||||
info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
|
||||
info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
|
||||
info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
|
||||
|
||||
if (info.IsIntel()) {
|
||||
if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
|
||||
@ -300,48 +300,10 @@ CLDevice& CLDevice::operator=(CLDevice&& device) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool CLDevice::SupportsFP16() const { return info_.supports_fp16; }
|
||||
|
||||
bool CLDevice::SupportsExtension(const std::string& extension) const {
|
||||
return info_.SupportsExtension(extension);
|
||||
}
|
||||
|
||||
bool CLDevice::SupportsTextureArray() const {
|
||||
return info_.SupportsTextureArray();
|
||||
}
|
||||
|
||||
bool CLDevice::SupportsImageBuffer() const {
|
||||
return info_.SupportsImageBuffer();
|
||||
}
|
||||
|
||||
bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
|
||||
|
||||
bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; }
|
||||
|
||||
bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; }
|
||||
|
||||
std::string CLDevice::GetPlatformVersion() const {
|
||||
return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
|
||||
}
|
||||
|
||||
bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); }
|
||||
|
||||
bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const {
|
||||
return info_.SupportsSubGroupWithSize(sub_group_size);
|
||||
}
|
||||
|
||||
bool CLDevice::IsAdreno() const { return info_.IsAdreno(); }
|
||||
|
||||
bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); }
|
||||
|
||||
bool CLDevice::IsNvidia() const { return info_.IsNvidia(); }
|
||||
|
||||
bool CLDevice::IsMali() const { return info_.IsMali(); }
|
||||
|
||||
bool CLDevice::IsAMD() const { return info_.IsAMD(); }
|
||||
|
||||
bool CLDevice::IsIntel() const { return info_.IsIntel(); }
|
||||
|
||||
void CLDevice::DisableOneLayerTextureArray() {
|
||||
info_.adreno_info.support_one_layer_texture_array = false;
|
||||
}
|
||||
|
@ -46,23 +46,6 @@ class CLDevice {
|
||||
cl_platform_id platform() const { return platform_id_; }
|
||||
std::string GetPlatformVersion() const;
|
||||
|
||||
GpuVendor vendor() const { return info_.gpu_vendor; }
|
||||
bool SupportsFP16() const;
|
||||
bool SupportsTextureArray() const;
|
||||
bool SupportsImageBuffer() const;
|
||||
bool SupportsImage3D() const;
|
||||
bool SupportsExtension(const std::string& extension) const;
|
||||
bool SupportsFP32RTN() const;
|
||||
bool SupportsFP16RTN() const;
|
||||
bool IsCL20OrHigher() const;
|
||||
bool SupportsSubGroupWithSize(int sub_group_size) const;
|
||||
bool IsAdreno() const;
|
||||
bool IsPowerVR() const;
|
||||
bool IsNvidia() const;
|
||||
bool IsMali() const;
|
||||
bool IsAMD() const;
|
||||
bool IsIntel() const;
|
||||
|
||||
// To track bug on some Adreno. b/131099086
|
||||
void DisableOneLayerTextureArray();
|
||||
|
||||
|
@ -301,6 +301,8 @@ bool MaliInfo::IsValhall() const {
|
||||
gpu_version == MaliGpu::kG68 || gpu_version == MaliGpu::kG78;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsFP16() const { return opencl_info.supports_fp16; }
|
||||
|
||||
bool GpuInfo::SupportsTextureArray() const {
|
||||
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
|
||||
}
|
||||
@ -314,29 +316,29 @@ bool GpuInfo::SupportsImage3D() const {
|
||||
// On Mali T880 read_imageh doesn't compile with image3d_t
|
||||
return false;
|
||||
}
|
||||
return supports_image3d_writes;
|
||||
return opencl_info.supports_image3d_writes;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
|
||||
if (channels == 1) {
|
||||
return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d
|
||||
: supports_r_f16_tex2d;
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d
|
||||
: opencl_info.supports_r_f16_tex2d;
|
||||
} else if (channels == 2) {
|
||||
return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d
|
||||
: supports_rg_f16_tex2d;
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d
|
||||
: opencl_info.supports_rg_f16_tex2d;
|
||||
} else if (channels == 3) {
|
||||
return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d
|
||||
: supports_rgb_f16_tex2d;
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgb_f32_tex2d
|
||||
: opencl_info.supports_rgb_f16_tex2d;
|
||||
} else if (channels == 4) {
|
||||
return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d
|
||||
: supports_rgba_f16_tex2d;
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgba_f32_tex2d
|
||||
: opencl_info.supports_rgba_f16_tex2d;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsExtension(const std::string& extension) const {
|
||||
for (const auto& ext : extensions) {
|
||||
for (const auto& ext : opencl_info.extensions) {
|
||||
if (ext == extension) {
|
||||
return true;
|
||||
}
|
||||
@ -365,6 +367,58 @@ bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
int GpuInfo::GetComputeUnitsCount() const {
|
||||
return opencl_info.compute_units_count;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsRoundToNearestSupported() const {
|
||||
return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForX() const {
|
||||
return opencl_info.max_work_group_size_x;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForY() const {
|
||||
return opencl_info.max_work_group_size_y;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForZ() const {
|
||||
return opencl_info.max_work_group_size_z;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DWidth() const {
|
||||
return opencl_info.image2d_max_width;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DHeight() const {
|
||||
return opencl_info.image2d_max_height;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DWidth() const {
|
||||
return opencl_info.image3d_max_width;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DHeight() const {
|
||||
return opencl_info.image3d_max_height;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DDepth() const {
|
||||
return opencl_info.image3d_max_depth;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxBufferSize() const {
|
||||
return opencl_info.buffer_max_size;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImageBufferWidth() const {
|
||||
return opencl_info.image_buffer_max_size;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
|
||||
return opencl_info.image_array_max_layers;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; }
|
||||
|
||||
bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; }
|
||||
|
@ -179,34 +179,10 @@ std::string OpenClVersionToString(OpenClVersion version);
|
||||
|
||||
struct OpenClInfo {
|
||||
OpenClVersion cl_version;
|
||||
};
|
||||
|
||||
struct GpuInfo {
|
||||
GpuInfo() = default;
|
||||
|
||||
bool IsAdreno() const;
|
||||
bool IsApple() const;
|
||||
bool IsMali() const;
|
||||
bool IsPowerVR() const;
|
||||
bool IsNvidia() const;
|
||||
bool IsAMD() const;
|
||||
bool IsIntel() const;
|
||||
|
||||
bool SupportsTextureArray() const;
|
||||
bool SupportsImageBuffer() const;
|
||||
bool SupportsImage3D() const;
|
||||
|
||||
bool SupportsFloatImage2D(DataType data_type, int channels) const;
|
||||
|
||||
bool SupportsExtension(const std::string& extension) const;
|
||||
bool IsCL20OrHigher() const;
|
||||
bool IsCL30OrHigher() const;
|
||||
bool SupportsSubGroupWithSize(int sub_group_size) const;
|
||||
|
||||
std::vector<std::string> extensions;
|
||||
bool supports_fp16;
|
||||
bool supports_image3d_writes;
|
||||
GpuVendor gpu_vendor;
|
||||
int compute_units_count;
|
||||
uint64_t buffer_max_size;
|
||||
uint64_t image2d_max_width;
|
||||
@ -219,7 +195,6 @@ struct GpuInfo {
|
||||
int max_work_group_size_x;
|
||||
int max_work_group_size_y;
|
||||
int max_work_group_size_z;
|
||||
std::vector<int> supported_subgroup_sizes;
|
||||
|
||||
// rtn is ROUND_TO_NEAREST
|
||||
// with rtn precision is much better then with rtz (ROUND_TO_ZERO)
|
||||
@ -238,6 +213,54 @@ struct GpuInfo {
|
||||
bool supports_rg_f32_tex2d = false;
|
||||
bool supports_rgb_f32_tex2d = false;
|
||||
bool supports_rgba_f32_tex2d = false;
|
||||
};
|
||||
|
||||
struct GpuInfo {
|
||||
GpuInfo() = default;
|
||||
|
||||
bool IsAdreno() const;
|
||||
bool IsApple() const;
|
||||
bool IsMali() const;
|
||||
bool IsPowerVR() const;
|
||||
bool IsNvidia() const;
|
||||
bool IsAMD() const;
|
||||
bool IsIntel() const;
|
||||
|
||||
bool SupportsFP16() const;
|
||||
|
||||
bool SupportsTextureArray() const;
|
||||
bool SupportsImageBuffer() const;
|
||||
bool SupportsImage3D() const;
|
||||
|
||||
bool SupportsFloatImage2D(DataType data_type, int channels) const;
|
||||
|
||||
bool SupportsExtension(const std::string& extension) const;
|
||||
bool IsCL20OrHigher() const;
|
||||
bool IsCL30OrHigher() const;
|
||||
bool SupportsSubGroupWithSize(int sub_group_size) const;
|
||||
|
||||
int GetComputeUnitsCount() const;
|
||||
|
||||
// floating point rounding mode
|
||||
bool IsRoundToNearestSupported() const;
|
||||
|
||||
int GetMaxWorkGroupSizeForX() const;
|
||||
int GetMaxWorkGroupSizeForY() const;
|
||||
int GetMaxWorkGroupSizeForZ() const;
|
||||
|
||||
uint64_t GetMaxImage2DWidth() const;
|
||||
uint64_t GetMaxImage2DHeight() const;
|
||||
uint64_t GetMaxImage3DWidth() const;
|
||||
uint64_t GetMaxImage3DHeight() const;
|
||||
uint64_t GetMaxImage3DDepth() const;
|
||||
|
||||
uint64_t GetMaxBufferSize() const;
|
||||
uint64_t GetMaxImageBufferWidth() const;
|
||||
uint64_t GetMaxImage2DArrayLayers() const;
|
||||
|
||||
std::vector<int> supported_subgroup_sizes;
|
||||
|
||||
GpuVendor gpu_vendor;
|
||||
|
||||
AdrenoInfo adreno_info;
|
||||
MaliInfo mali_info;
|
||||
|
@ -48,6 +48,39 @@ absl::Status CreateEnvironment(Environment* result, bool shared,
|
||||
return result->Init();
|
||||
}
|
||||
|
||||
bool IsGpuSupportsStorageType(const GpuInfo& gpu_info,
|
||||
TensorStorageType storage_type) {
|
||||
switch (storage_type) {
|
||||
case TensorStorageType::TEXTURE_2D:
|
||||
return !gpu_info.IsAMD();
|
||||
case TensorStorageType::BUFFER:
|
||||
return true;
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray();
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) &&
|
||||
gpu_info.SupportsImageBuffer();
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
return !gpu_info.IsAMD() && gpu_info.SupportsImage3D();
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
return false;
|
||||
case TensorStorageType::UNKNOWN:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsGpuSupportsPrecision(const GpuInfo& gpu_info,
|
||||
CalculationsPrecision precision) {
|
||||
switch (precision) {
|
||||
case CalculationsPrecision::F32_F16:
|
||||
case CalculationsPrecision::F16:
|
||||
return gpu_info.SupportsFP16();
|
||||
case CalculationsPrecision::F32:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Environment::Environment(CLDevice&& device, CLContext&& context,
|
||||
@ -77,7 +110,8 @@ Environment& Environment::operator=(Environment&& environment) {
|
||||
}
|
||||
|
||||
absl::Status Environment::Init() {
|
||||
if (device().IsAdreno() && device().SupportsTextureArray()) {
|
||||
if (device().GetInfo().IsAdreno() &&
|
||||
device().GetInfo().SupportsTextureArray()) {
|
||||
const auto& adreno_info = device().info_.adreno_info;
|
||||
// Some Adreno < 600 have bug with one layer texture array. b/131099086
|
||||
// If we have one layer texture array and will write smt from kernel to this
|
||||
@ -117,13 +151,7 @@ std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const {
|
||||
}
|
||||
|
||||
bool Environment::IsSupported(CalculationsPrecision precision) const {
|
||||
switch (precision) {
|
||||
case CalculationsPrecision::F32_F16:
|
||||
case CalculationsPrecision::F16:
|
||||
return device_.SupportsFP16();
|
||||
case CalculationsPrecision::F32:
|
||||
return true;
|
||||
}
|
||||
return IsGpuSupportsPrecision(device_.GetInfo(), precision);
|
||||
}
|
||||
|
||||
std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
|
||||
@ -153,24 +181,7 @@ Environment::GetSupportedStoragesWithHWZeroClampSupport() const {
|
||||
}
|
||||
|
||||
bool Environment::IsSupported(TensorStorageType storage_type) const {
|
||||
switch (storage_type) {
|
||||
case TensorStorageType::TEXTURE_2D:
|
||||
return !device_.IsAMD();
|
||||
case TensorStorageType::BUFFER:
|
||||
return true;
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
return !device_.IsAMD() && device_.SupportsTextureArray();
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) &&
|
||||
device_.SupportsImageBuffer();
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
return !device_.IsAMD() && device_.SupportsImage3D();
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
return false;
|
||||
case TensorStorageType::UNKNOWN:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
return IsGpuSupportsStorageType(device_.GetInfo(), storage_type);
|
||||
}
|
||||
|
||||
TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
|
||||
|
@ -89,7 +89,7 @@ absl::Status CreateClEventFromEglSync(cl_context context,
|
||||
}
|
||||
|
||||
bool IsClEventFromEglSyncSupported(const CLDevice& device) {
|
||||
return device.SupportsExtension("cl_khr_egl_event");
|
||||
return device.GetInfo().SupportsExtension("cl_khr_egl_event");
|
||||
}
|
||||
|
||||
absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
|
||||
@ -126,7 +126,7 @@ absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
|
||||
|
||||
bool IsGlSharingSupported(const CLDevice& device) {
|
||||
return clCreateFromGLBuffer && clCreateFromGLTexture &&
|
||||
device.SupportsExtension("cl_khr_gl_sharing");
|
||||
device.GetInfo().SupportsExtension("cl_khr_gl_sharing");
|
||||
}
|
||||
|
||||
AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
|
||||
|
@ -163,14 +163,14 @@ absl::Status InferenceContext::InitFromGraph(
|
||||
ReserveGraphTensors(create_info, creation_context.GetGpuInfo(), graph);
|
||||
precision_ = create_info.precision;
|
||||
storage_type_ = create_info.storage_type;
|
||||
if (env->device().IsMali()) {
|
||||
if (env->device().GetInfo().IsMali()) {
|
||||
need_flush_ = true;
|
||||
need_manual_release_ = true;
|
||||
|
||||
flush_periodically_ = true;
|
||||
flush_period_ = 24;
|
||||
}
|
||||
if (env->device().IsPowerVR()) {
|
||||
if (env->device().GetInfo().IsPowerVR()) {
|
||||
need_flush_ = true;
|
||||
}
|
||||
CopyInAndOutIds(graph);
|
||||
|
@ -139,7 +139,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const GpuInfo& gpu_info,
|
||||
conv_params.element_size = 4;
|
||||
conv_params.block_size = int3(1, 1, 1);
|
||||
if (gpu_info.IsMali() && definition.precision == CalculationsPrecision::F16 &&
|
||||
gpu_info.compute_units_count <= 4) {
|
||||
gpu_info.GetComputeUnitsCount() <= 4) {
|
||||
conv_params.block_size.x *= 2;
|
||||
}
|
||||
return conv_params;
|
||||
|
@ -1045,7 +1045,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(
|
||||
if (dst_shape) {
|
||||
int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
|
||||
float task_size_per_cu =
|
||||
static_cast<float>(task_size) / gpu_info.compute_units_count;
|
||||
static_cast<float>(task_size) / gpu_info.GetComputeUnitsCount();
|
||||
int block_size = conv_params.block_size.x * conv_params.block_size.y *
|
||||
conv_params.block_size.w;
|
||||
float threads_per_cu = task_size_per_cu / block_size;
|
||||
|
@ -95,7 +95,7 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition,
|
||||
// For now, fix workgroup size to the biggest supported by the device, but not
|
||||
// larger than the number of tensor slices.
|
||||
int desired_work_group_size =
|
||||
std::min(tensor_slices, gpu_info.max_work_group_size_x);
|
||||
std::min(tensor_slices, gpu_info.GetMaxWorkGroupSizeForX());
|
||||
if (gpu_info.IsMali()) {
|
||||
// Don't use more than 64 work items per work group on ARM Mali. They
|
||||
// implement local memory using the global memory, larger workgroups have
|
||||
|
@ -118,7 +118,7 @@ int GetRecommendedBlockSizeForConv(const GpuInfo& gpu_info,
|
||||
CalculationsPrecision precision,
|
||||
int task_size) {
|
||||
const float task_size_per_cu =
|
||||
task_size / static_cast<float>(gpu_info.compute_units_count);
|
||||
task_size / static_cast<float>(gpu_info.GetComputeUnitsCount());
|
||||
int block_size = 1;
|
||||
float threshold_1 = FLT_MAX;
|
||||
float threshold_2 = FLT_MAX;
|
||||
|
@ -78,9 +78,13 @@ TEST_F(OpenCLOperationTest, Winograd4x4To36) {
|
||||
for (auto precision : env_.GetSupportedPrecisions()) {
|
||||
float eps;
|
||||
if (precision == CalculationsPrecision::F32) {
|
||||
eps = 1e-5f * (env_.device().SupportsFP32RTN() ? 1.0f : 4.0f);
|
||||
eps = 1e-5f * (env_.device().GetInfo().opencl_info.supports_fp32_rtn
|
||||
? 1.0f
|
||||
: 4.0f);
|
||||
} else {
|
||||
eps = 1e-2f * (env_.device().SupportsFP16RTN() ? 1.0f : 4.0f);
|
||||
eps = 1e-2f * (env_.device().GetInfo().opencl_info.supports_fp16_rtn
|
||||
? 1.0f
|
||||
: 4.0f);
|
||||
}
|
||||
OperationDef op_def;
|
||||
op_def.precision = precision;
|
||||
@ -151,9 +155,13 @@ TEST_F(OpenCLOperationTest, Winograd36To4x4) {
|
||||
for (auto precision : env_.GetSupportedPrecisions()) {
|
||||
float eps;
|
||||
if (precision == CalculationsPrecision::F32) {
|
||||
eps = 1e-5f * (env_.device().SupportsFP32RTN() ? 1.0f : 4.0f);
|
||||
eps = 1e-5f * (env_.device().GetInfo().opencl_info.supports_fp32_rtn
|
||||
? 1.0f
|
||||
: 4.0f);
|
||||
} else {
|
||||
eps = 1e-2f * (env_.device().SupportsFP16RTN() ? 1.0f : 4.0f);
|
||||
eps = 1e-2f * (env_.device().GetInfo().opencl_info.supports_fp16_rtn
|
||||
? 1.0f
|
||||
: 4.0f);
|
||||
}
|
||||
OperationDef op_def;
|
||||
op_def.precision = precision;
|
||||
|
@ -52,9 +52,9 @@ std::vector<int3> GenerateWorkGroupSizesXYMultipleOf(
|
||||
if (work_group_size_xy * z > kernel_info.max_work_group_size) {
|
||||
continue;
|
||||
}
|
||||
if (x <= gpu_info.max_work_group_size_x &&
|
||||
y <= gpu_info.max_work_group_size_y &&
|
||||
z <= gpu_info.max_work_group_size_z) {
|
||||
if (x <= gpu_info.GetMaxWorkGroupSizeForX() &&
|
||||
y <= gpu_info.GetMaxWorkGroupSizeForY() &&
|
||||
z <= gpu_info.GetMaxWorkGroupSizeForZ()) {
|
||||
work_groups.push_back({x, y, z});
|
||||
}
|
||||
}
|
||||
@ -78,9 +78,9 @@ std::vector<int3> GenerateWorkGroupSizesXMultipleOf(
|
||||
x += multiplier) {
|
||||
for (auto y : possible_y_sizes) {
|
||||
for (auto z : possible_z_sizes) {
|
||||
if (x <= gpu_info.max_work_group_size_x &&
|
||||
y <= gpu_info.max_work_group_size_y &&
|
||||
z <= gpu_info.max_work_group_size_z &&
|
||||
if (x <= gpu_info.GetMaxWorkGroupSizeForX() &&
|
||||
y <= gpu_info.GetMaxWorkGroupSizeForY() &&
|
||||
z <= gpu_info.GetMaxWorkGroupSizeForZ() &&
|
||||
x * y * z <= kernel_info.max_work_group_size) {
|
||||
work_groups.push_back({x, y, z});
|
||||
}
|
||||
@ -94,9 +94,9 @@ void GetWorkGroupsAlignedToGrid(const GpuInfo& gpu_info,
|
||||
const KernelInfo& kernel_info, const int3& grid,
|
||||
std::vector<int3>* work_groups) {
|
||||
int3 max_wg_size;
|
||||
max_wg_size.x = gpu_info.max_work_group_size_x;
|
||||
max_wg_size.y = gpu_info.max_work_group_size_y;
|
||||
max_wg_size.z = gpu_info.max_work_group_size_z;
|
||||
max_wg_size.x = gpu_info.GetMaxWorkGroupSizeForX();
|
||||
max_wg_size.y = gpu_info.GetMaxWorkGroupSizeForY();
|
||||
max_wg_size.z = gpu_info.GetMaxWorkGroupSizeForZ();
|
||||
GenerateWorkGroupSizesAlignedToGrid(
|
||||
grid, max_wg_size, kernel_info.max_work_group_size, work_groups);
|
||||
}
|
||||
@ -275,7 +275,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info,
|
||||
if (gpu_info.IsAdreno()) {
|
||||
max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64;
|
||||
}
|
||||
max_z_size = std::min(max_z_size, gpu_info.max_work_group_size_z);
|
||||
max_z_size = std::min(max_z_size, gpu_info.GetMaxWorkGroupSizeForZ());
|
||||
work_groups->push_back(
|
||||
GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
|
||||
return;
|
||||
|
@ -33,11 +33,11 @@ bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape,
|
||||
4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
|
||||
const int buffer_size =
|
||||
shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
|
||||
return buffer_size <= gpu_info.buffer_max_size;
|
||||
return buffer_size <= gpu_info.GetMaxBufferSize();
|
||||
}
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
return shape.b * shape.w * shape.h * shape.d * slices <=
|
||||
gpu_info.image_buffer_max_size;
|
||||
gpu_info.GetMaxImageBufferWidth();
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
if (gpu_info.opencl_info.cl_version < OpenClVersion::kCl1_2 &&
|
||||
slices == 1) {
|
||||
@ -45,26 +45,26 @@ bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape,
|
||||
// depth = 1 by specification;
|
||||
return false;
|
||||
}
|
||||
return shape.w * shape.b <= gpu_info.image3d_max_width &&
|
||||
shape.h <= gpu_info.image3d_max_height &&
|
||||
slices * shape.d <= gpu_info.image3d_max_depth;
|
||||
return shape.w * shape.b <= gpu_info.GetMaxImage3DWidth() &&
|
||||
shape.h <= gpu_info.GetMaxImage3DHeight() &&
|
||||
slices * shape.d <= gpu_info.GetMaxImage3DDepth();
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
// Bug on some Adreno. b/131099086
|
||||
if (slices == 1 && gpu_info.IsAdreno() &&
|
||||
!gpu_info.adreno_info.support_one_layer_texture_array) {
|
||||
return false;
|
||||
}
|
||||
return shape.w * shape.b <= gpu_info.image2d_max_width &&
|
||||
shape.h <= gpu_info.image2d_max_height &&
|
||||
slices * shape.d <= gpu_info.image_array_max_layers;
|
||||
return shape.w * shape.b <= gpu_info.GetMaxImage2DWidth() &&
|
||||
shape.h <= gpu_info.GetMaxImage2DHeight() &&
|
||||
slices * shape.d <= gpu_info.GetMaxImage2DArrayLayers();
|
||||
case TensorStorageType::TEXTURE_2D:
|
||||
return shape.w * shape.b * shape.d <= gpu_info.image2d_max_width &&
|
||||
shape.h * slices <= gpu_info.image2d_max_height;
|
||||
return shape.w * shape.b * shape.d <= gpu_info.GetMaxImage2DWidth() &&
|
||||
shape.h * slices <= gpu_info.GetMaxImage2DHeight();
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
return shape.c <= 4 &&
|
||||
gpu_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
|
||||
shape.w * shape.b * shape.d <= gpu_info.image2d_max_width &&
|
||||
shape.h <= gpu_info.image2d_max_height;
|
||||
shape.w * shape.b * shape.d <= gpu_info.GetMaxImage2DWidth() &&
|
||||
shape.h <= gpu_info.GetMaxImage2DHeight();
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user