diff --git a/tensorflow/lite/delegates/gpu/cl/cl_context.cc b/tensorflow/lite/delegates/gpu/cl/cl_context.cc index 32a5e43d799..e1cc54ba574 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_context.cc @@ -54,29 +54,29 @@ void AddSupportedImageFormats(cl_context context, GpuInfo* info) { auto supported_formats = GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE); for (auto format : supported_formats) { - info->supports_r_f16_tex2d = - info->supports_r_f16_tex2d || + info->opencl_info.supports_r_f16_tex2d = + info->opencl_info.supports_r_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 1); - info->supports_rg_f16_tex2d = - info->supports_rg_f16_tex2d || + info->opencl_info.supports_rg_f16_tex2d = + info->opencl_info.supports_rg_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 2); - info->supports_rgb_f16_tex2d = - info->supports_rgb_f16_tex2d || + info->opencl_info.supports_rgb_f16_tex2d = + info->opencl_info.supports_rgb_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 3); - info->supports_rgba_f16_tex2d = - info->supports_rgba_f16_tex2d || + info->opencl_info.supports_rgba_f16_tex2d = + info->opencl_info.supports_rgba_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 4); - info->supports_r_f32_tex2d = - info->supports_r_f32_tex2d || + info->opencl_info.supports_r_f32_tex2d = + info->opencl_info.supports_r_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 1); - info->supports_rg_f32_tex2d = - info->supports_rg_f32_tex2d || + info->opencl_info.supports_rg_f32_tex2d = + info->opencl_info.supports_rg_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 2); - info->supports_rgb_f32_tex2d = - info->supports_rgb_f32_tex2d || + info->opencl_info.supports_rgb_f32_tex2d = + info->opencl_info.supports_rgb_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 3); - info->supports_rgba_f32_tex2d = - info->supports_rgba_f32_tex2d || + info->opencl_info.supports_rgba_f32_tex2d = + info->opencl_info.supports_rgba_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 4); } } @@ -148,7 +148,7 @@ absl::Status CreateCLGLContext(const CLDevice& device, cl_context_properties egl_context, cl_context_properties egl_display, CLContext* result) { - if (!device.SupportsExtension("cl_khr_gl_sharing")) { + if (!device.GetInfo().SupportsExtension("cl_khr_gl_sharing")) { return absl::UnavailableError("Device doesn't support CL-GL sharing."); } cl_context_properties platform = diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index 3a7cb877e3c..86fbc25dafa 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -169,80 +169,80 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) { info.mali_info = MaliInfo(device_name); } info.opencl_info.cl_version = ParseCLVersion(opencl_c_version); - info.extensions = + info.opencl_info.extensions = absl::StrSplit(GetDeviceInfo(id, CL_DEVICE_EXTENSIONS), ' '); - info.supports_fp16 = false; - info.supports_image3d_writes = false; - for (const auto& ext : info.extensions) { + info.opencl_info.supports_fp16 = false; + info.opencl_info.supports_image3d_writes = false; + for (const auto& ext : info.opencl_info.extensions) { if (ext == "cl_khr_fp16") { - info.supports_fp16 = true; + info.opencl_info.supports_fp16 = true; } if (ext == "cl_khr_3d_image_writes") { - info.supports_image3d_writes = true; + info.opencl_info.supports_image3d_writes = true; } } cl_device_fp_config f32_config = GetDeviceInfo(id, CL_DEVICE_SINGLE_FP_CONFIG); - info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST; + info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST; - if (info.supports_fp16) { + if (info.opencl_info.supports_fp16) { cl_device_fp_config f16_config; auto status = GetDeviceInfo( id, CL_DEVICE_HALF_FP_CONFIG, &f16_config); // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty. if (status.ok() && !info.IsAMD()) { - info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST; + info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST; } else { // happens on PowerVR f16_config = f32_config; - info.supports_fp16_rtn = info.supports_fp32_rtn; + info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn; } } else { - info.supports_fp16_rtn = false; + info.opencl_info.supports_fp16_rtn = false; } - if (info.IsPowerVR() && !info.supports_fp16) { + if (info.IsPowerVR() && !info.opencl_info.supports_fp16) { // PowerVR doesn't have full support of fp16 and so doesn't list this // extension. But it can support fp16 in MADs and as buffers/textures types, // so we will use it. - info.supports_fp16 = true; - info.supports_fp16_rtn = info.supports_fp32_rtn; + info.opencl_info.supports_fp16 = true; + info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn; } - if (!info.supports_image3d_writes && + if (!info.opencl_info.supports_image3d_writes && ((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) || info.IsNvidia())) { // in local tests Adreno 430 can write in image 3d, at least on small sizes, // but it doesn't have cl_khr_3d_image_writes in list of available // extensions // The same for NVidia - info.supports_image3d_writes = true; + info.opencl_info.supports_image3d_writes = true; } - info.compute_units_count = + info.opencl_info.compute_units_count = GetDeviceInfo(id, CL_DEVICE_MAX_COMPUTE_UNITS); - info.image2d_max_width = + info.opencl_info.image2d_max_width = GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_WIDTH); - info.image2d_max_height = + info.opencl_info.image2d_max_height = GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); - info.buffer_max_size = + info.opencl_info.buffer_max_size = GetDeviceInfo(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE); if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) { - info.image_buffer_max_size = + info.opencl_info.image_buffer_max_size = GetDeviceInfo(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE); - info.image_array_max_layers = + info.opencl_info.image_array_max_layers = GetDeviceInfo(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE); } - info.image3d_max_width = + info.opencl_info.image3d_max_width = GetDeviceInfo(id, CL_DEVICE_IMAGE3D_MAX_WIDTH); - info.image3d_max_height = + info.opencl_info.image3d_max_height = GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); - info.image3d_max_depth = + info.opencl_info.image3d_max_depth = GetDeviceInfo(id, CL_DEVICE_IMAGE3D_MAX_DEPTH); int3 max_work_group_sizes; GetDeviceWorkDimsSizes(id, &max_work_group_sizes); - info.max_work_group_size_x = max_work_group_sizes.x; - info.max_work_group_size_y = max_work_group_sizes.y; - info.max_work_group_size_z = max_work_group_sizes.z; + info.opencl_info.max_work_group_size_x = max_work_group_sizes.x; + info.opencl_info.max_work_group_size_y = max_work_group_sizes.y; + info.opencl_info.max_work_group_size_z = max_work_group_sizes.z; if (info.IsIntel()) { if (info.SupportsExtension("cl_intel_required_subgroup_size")) { @@ -300,48 +300,10 @@ CLDevice& CLDevice::operator=(CLDevice&& device) { return *this; } -bool CLDevice::SupportsFP16() const { return info_.supports_fp16; } - -bool CLDevice::SupportsExtension(const std::string& extension) const { - return info_.SupportsExtension(extension); -} - -bool CLDevice::SupportsTextureArray() const { - return info_.SupportsTextureArray(); -} - -bool CLDevice::SupportsImageBuffer() const { - return info_.SupportsImageBuffer(); -} - -bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); } - -bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; } - -bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; } - std::string CLDevice::GetPlatformVersion() const { return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION); } -bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); } - -bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const { - return info_.SupportsSubGroupWithSize(sub_group_size); -} - -bool CLDevice::IsAdreno() const { return info_.IsAdreno(); } - -bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); } - -bool CLDevice::IsNvidia() const { return info_.IsNvidia(); } - -bool CLDevice::IsMali() const { return info_.IsMali(); } - -bool CLDevice::IsAMD() const { return info_.IsAMD(); } - -bool CLDevice::IsIntel() const { return info_.IsIntel(); } - void CLDevice::DisableOneLayerTextureArray() { info_.adreno_info.support_one_layer_texture_array = false; } diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h index a72534366c4..906e6537d49 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h @@ -46,23 +46,6 @@ class CLDevice { cl_platform_id platform() const { return platform_id_; } std::string GetPlatformVersion() const; - GpuVendor vendor() const { return info_.gpu_vendor; } - bool SupportsFP16() const; - bool SupportsTextureArray() const; - bool SupportsImageBuffer() const; - bool SupportsImage3D() const; - bool SupportsExtension(const std::string& extension) const; - bool SupportsFP32RTN() const; - bool SupportsFP16RTN() const; - bool IsCL20OrHigher() const; - bool SupportsSubGroupWithSize(int sub_group_size) const; - bool IsAdreno() const; - bool IsPowerVR() const; - bool IsNvidia() const; - bool IsMali() const; - bool IsAMD() const; - bool IsIntel() const; - // To track bug on some Adreno. b/131099086 void DisableOneLayerTextureArray(); diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc index 70ef024c4da..c21f8ae6e1a 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.cc +++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc @@ -301,6 +301,8 @@ bool MaliInfo::IsValhall() const { gpu_version == MaliGpu::kG68 || gpu_version == MaliGpu::kG78; } +bool GpuInfo::SupportsFP16() const { return opencl_info.supports_fp16; } + bool GpuInfo::SupportsTextureArray() const { return opencl_info.cl_version >= OpenClVersion::kCl1_2; } @@ -314,29 +316,29 @@ bool GpuInfo::SupportsImage3D() const { // On Mali T880 read_imageh doesn't compile with image3d_t return false; } - return supports_image3d_writes; + return opencl_info.supports_image3d_writes; } bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const { if (channels == 1) { - return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d - : supports_r_f16_tex2d; + return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d + : opencl_info.supports_r_f16_tex2d; } else if (channels == 2) { - return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d - : supports_rg_f16_tex2d; + return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d + : opencl_info.supports_rg_f16_tex2d; } else if (channels == 3) { - return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d - : supports_rgb_f16_tex2d; + return data_type == DataType::FLOAT32 ? opencl_info.supports_rgb_f32_tex2d + : opencl_info.supports_rgb_f16_tex2d; } else if (channels == 4) { - return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d - : supports_rgba_f16_tex2d; + return data_type == DataType::FLOAT32 ? opencl_info.supports_rgba_f32_tex2d + : opencl_info.supports_rgba_f16_tex2d; } else { return false; } } bool GpuInfo::SupportsExtension(const std::string& extension) const { - for (const auto& ext : extensions) { + for (const auto& ext : opencl_info.extensions) { if (ext == extension) { return true; } @@ -365,6 +367,58 @@ bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const { return false; } +int GpuInfo::GetComputeUnitsCount() const { + return opencl_info.compute_units_count; +} + +bool GpuInfo::IsRoundToNearestSupported() const { + return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn; +} + +int GpuInfo::GetMaxWorkGroupSizeForX() const { + return opencl_info.max_work_group_size_x; +} + +int GpuInfo::GetMaxWorkGroupSizeForY() const { + return opencl_info.max_work_group_size_y; +} + +int GpuInfo::GetMaxWorkGroupSizeForZ() const { + return opencl_info.max_work_group_size_z; +} + +uint64_t GpuInfo::GetMaxImage2DWidth() const { + return opencl_info.image2d_max_width; +} + +uint64_t GpuInfo::GetMaxImage2DHeight() const { + return opencl_info.image2d_max_height; +} + +uint64_t GpuInfo::GetMaxImage3DWidth() const { + return opencl_info.image3d_max_width; +} + +uint64_t GpuInfo::GetMaxImage3DHeight() const { + return opencl_info.image3d_max_height; +} + +uint64_t GpuInfo::GetMaxImage3DDepth() const { + return opencl_info.image3d_max_depth; +} + +uint64_t GpuInfo::GetMaxBufferSize() const { + return opencl_info.buffer_max_size; +} + +uint64_t GpuInfo::GetMaxImageBufferWidth() const { + return opencl_info.image_buffer_max_size; +} + +uint64_t GpuInfo::GetMaxImage2DArrayLayers() const { + return opencl_info.image_array_max_layers; +} + bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; } bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; } diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h index 11c898ab544..25f5cc479aa 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.h +++ b/tensorflow/lite/delegates/gpu/cl/device_info.h @@ -179,34 +179,10 @@ std::string OpenClVersionToString(OpenClVersion version); struct OpenClInfo { OpenClVersion cl_version; -}; - -struct GpuInfo { - GpuInfo() = default; - - bool IsAdreno() const; - bool IsApple() const; - bool IsMali() const; - bool IsPowerVR() const; - bool IsNvidia() const; - bool IsAMD() const; - bool IsIntel() const; - - bool SupportsTextureArray() const; - bool SupportsImageBuffer() const; - bool SupportsImage3D() const; - - bool SupportsFloatImage2D(DataType data_type, int channels) const; - - bool SupportsExtension(const std::string& extension) const; - bool IsCL20OrHigher() const; - bool IsCL30OrHigher() const; - bool SupportsSubGroupWithSize(int sub_group_size) const; std::vector extensions; bool supports_fp16; bool supports_image3d_writes; - GpuVendor gpu_vendor; int compute_units_count; uint64_t buffer_max_size; uint64_t image2d_max_width; @@ -219,7 +195,6 @@ struct GpuInfo { int max_work_group_size_x; int max_work_group_size_y; int max_work_group_size_z; - std::vector supported_subgroup_sizes; // rtn is ROUND_TO_NEAREST // with rtn precision is much better then with rtz (ROUND_TO_ZERO) @@ -238,6 +213,54 @@ struct GpuInfo { bool supports_rg_f32_tex2d = false; bool supports_rgb_f32_tex2d = false; bool supports_rgba_f32_tex2d = false; +}; + +struct GpuInfo { + GpuInfo() = default; + + bool IsAdreno() const; + bool IsApple() const; + bool IsMali() const; + bool IsPowerVR() const; + bool IsNvidia() const; + bool IsAMD() const; + bool IsIntel() const; + + bool SupportsFP16() const; + + bool SupportsTextureArray() const; + bool SupportsImageBuffer() const; + bool SupportsImage3D() const; + + bool SupportsFloatImage2D(DataType data_type, int channels) const; + + bool SupportsExtension(const std::string& extension) const; + bool IsCL20OrHigher() const; + bool IsCL30OrHigher() const; + bool SupportsSubGroupWithSize(int sub_group_size) const; + + int GetComputeUnitsCount() const; + + // floating point rounding mode + bool IsRoundToNearestSupported() const; + + int GetMaxWorkGroupSizeForX() const; + int GetMaxWorkGroupSizeForY() const; + int GetMaxWorkGroupSizeForZ() const; + + uint64_t GetMaxImage2DWidth() const; + uint64_t GetMaxImage2DHeight() const; + uint64_t GetMaxImage3DWidth() const; + uint64_t GetMaxImage3DHeight() const; + uint64_t GetMaxImage3DDepth() const; + + uint64_t GetMaxBufferSize() const; + uint64_t GetMaxImageBufferWidth() const; + uint64_t GetMaxImage2DArrayLayers() const; + + std::vector supported_subgroup_sizes; + + GpuVendor gpu_vendor; AdrenoInfo adreno_info; MaliInfo mali_info; diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc index 9b2fef288fe..275ea696e09 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.cc +++ b/tensorflow/lite/delegates/gpu/cl/environment.cc @@ -48,6 +48,39 @@ absl::Status CreateEnvironment(Environment* result, bool shared, return result->Init(); } +bool IsGpuSupportsStorageType(const GpuInfo& gpu_info, + TensorStorageType storage_type) { + switch (storage_type) { + case TensorStorageType::TEXTURE_2D: + return !gpu_info.IsAMD(); + case TensorStorageType::BUFFER: + return true; + case TensorStorageType::TEXTURE_ARRAY: + return !gpu_info.IsAMD() && gpu_info.SupportsTextureArray(); + case TensorStorageType::IMAGE_BUFFER: + return (gpu_info.IsAdreno() || gpu_info.IsAMD() || gpu_info.IsNvidia()) && + gpu_info.SupportsImageBuffer(); + case TensorStorageType::TEXTURE_3D: + return !gpu_info.IsAMD() && gpu_info.SupportsImage3D(); + case TensorStorageType::SINGLE_TEXTURE_2D: + return false; + case TensorStorageType::UNKNOWN: + return false; + } + return false; +} + +bool IsGpuSupportsPrecision(const GpuInfo& gpu_info, + CalculationsPrecision precision) { + switch (precision) { + case CalculationsPrecision::F32_F16: + case CalculationsPrecision::F16: + return gpu_info.SupportsFP16(); + case CalculationsPrecision::F32: + return true; + } +} + } // namespace Environment::Environment(CLDevice&& device, CLContext&& context, @@ -77,7 +110,8 @@ Environment& Environment::operator=(Environment&& environment) { } absl::Status Environment::Init() { - if (device().IsAdreno() && device().SupportsTextureArray()) { + if (device().GetInfo().IsAdreno() && + device().GetInfo().SupportsTextureArray()) { const auto& adreno_info = device().info_.adreno_info; // Some Adreno < 600 have bug with one layer texture array. b/131099086 // If we have one layer texture array and will write smt from kernel to this @@ -117,13 +151,7 @@ std::vector Environment::GetSupportedPrecisions() const { } bool Environment::IsSupported(CalculationsPrecision precision) const { - switch (precision) { - case CalculationsPrecision::F32_F16: - case CalculationsPrecision::F16: - return device_.SupportsFP16(); - case CalculationsPrecision::F32: - return true; - } + return IsGpuSupportsPrecision(device_.GetInfo(), precision); } std::vector Environment::GetSupportedStorages() const { @@ -153,24 +181,7 @@ Environment::GetSupportedStoragesWithHWZeroClampSupport() const { } bool Environment::IsSupported(TensorStorageType storage_type) const { - switch (storage_type) { - case TensorStorageType::TEXTURE_2D: - return !device_.IsAMD(); - case TensorStorageType::BUFFER: - return true; - case TensorStorageType::TEXTURE_ARRAY: - return !device_.IsAMD() && device_.SupportsTextureArray(); - case TensorStorageType::IMAGE_BUFFER: - return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) && - device_.SupportsImageBuffer(); - case TensorStorageType::TEXTURE_3D: - return !device_.IsAMD() && device_.SupportsImage3D(); - case TensorStorageType::SINGLE_TEXTURE_2D: - return false; - case TensorStorageType::UNKNOWN: - return false; - } - return false; + return IsGpuSupportsStorageType(device_.GetInfo(), storage_type); } TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) { diff --git a/tensorflow/lite/delegates/gpu/cl/gl_interop.cc b/tensorflow/lite/delegates/gpu/cl/gl_interop.cc index 599e6766301..2d4e6c54b39 100644 --- a/tensorflow/lite/delegates/gpu/cl/gl_interop.cc +++ b/tensorflow/lite/delegates/gpu/cl/gl_interop.cc @@ -89,7 +89,7 @@ absl::Status CreateClEventFromEglSync(cl_context context, } bool IsClEventFromEglSyncSupported(const CLDevice& device) { - return device.SupportsExtension("cl_khr_egl_event"); + return device.GetInfo().SupportsExtension("cl_khr_egl_event"); } absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id, @@ -126,7 +126,7 @@ absl::Status CreateClMemoryFromGlTexture(GLenum texture_target, bool IsGlSharingSupported(const CLDevice& device) { return clCreateFromGLBuffer && clCreateFromGLTexture && - device.SupportsExtension("cl_khr_gl_sharing"); + device.GetInfo().SupportsExtension("cl_khr_gl_sharing"); } AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); } diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 332de066bca..d9de2bcd9ee 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -163,14 +163,14 @@ absl::Status InferenceContext::InitFromGraph( ReserveGraphTensors(create_info, creation_context.GetGpuInfo(), graph); precision_ = create_info.precision; storage_type_ = create_info.storage_type; - if (env->device().IsMali()) { + if (env->device().GetInfo().IsMali()) { need_flush_ = true; need_manual_release_ = true; flush_periodically_ = true; flush_period_ = 24; } - if (env->device().IsPowerVR()) { + if (env->device().GetInfo().IsPowerVR()) { need_flush_ = true; } CopyInAndOutIds(graph); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index ed15df0bea7..d74967ff56f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -139,7 +139,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const GpuInfo& gpu_info, conv_params.element_size = 4; conv_params.block_size = int3(1, 1, 1); if (gpu_info.IsMali() && definition.precision == CalculationsPrecision::F16 && - gpu_info.compute_units_count <= 4) { + gpu_info.GetComputeUnitsCount() <= 4) { conv_params.block_size.x *= 2; } return conv_params; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index 42eaf0e838d..25a3daf121f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -1045,7 +1045,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( if (dst_shape) { int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; float task_size_per_cu = - static_cast(task_size) / gpu_info.compute_units_count; + static_cast(task_size) / gpu_info.GetComputeUnitsCount(); int block_size = conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.w; float threads_per_cu = task_size_per_cu / block_size; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index 01cb6b742fd..3ebeb4bfa7c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -95,7 +95,7 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition, // For now, fix workgroup size to the biggest supported by the device, but not // larger than the number of tensor slices. int desired_work_group_size = - std::min(tensor_slices, gpu_info.max_work_group_size_x); + std::min(tensor_slices, gpu_info.GetMaxWorkGroupSizeForX()); if (gpu_info.IsMali()) { // Don't use more than 64 work items per work group on ARM Mali. They // implement local memory using the global memory, larger workgroups have diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index 2530c73571b..7e0ab092132 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -118,7 +118,7 @@ int GetRecommendedBlockSizeForConv(const GpuInfo& gpu_info, CalculationsPrecision precision, int task_size) { const float task_size_per_cu = - task_size / static_cast(gpu_info.compute_units_count); + task_size / static_cast(gpu_info.GetComputeUnitsCount()); int block_size = 1; float threshold_1 = FLT_MAX; float threshold_2 = FLT_MAX; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc index 9da73ba9783..77da13e5f13 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd_test.cc @@ -78,9 +78,13 @@ TEST_F(OpenCLOperationTest, Winograd4x4To36) { for (auto precision : env_.GetSupportedPrecisions()) { float eps; if (precision == CalculationsPrecision::F32) { - eps = 1e-5f * (env_.device().SupportsFP32RTN() ? 1.0f : 4.0f); + eps = 1e-5f * (env_.device().GetInfo().opencl_info.supports_fp32_rtn + ? 1.0f + : 4.0f); } else { - eps = 1e-2f * (env_.device().SupportsFP16RTN() ? 1.0f : 4.0f); + eps = 1e-2f * (env_.device().GetInfo().opencl_info.supports_fp16_rtn + ? 1.0f + : 4.0f); } OperationDef op_def; op_def.precision = precision; @@ -151,9 +155,13 @@ TEST_F(OpenCLOperationTest, Winograd36To4x4) { for (auto precision : env_.GetSupportedPrecisions()) { float eps; if (precision == CalculationsPrecision::F32) { - eps = 1e-5f * (env_.device().SupportsFP32RTN() ? 1.0f : 4.0f); + eps = 1e-5f * (env_.device().GetInfo().opencl_info.supports_fp32_rtn + ? 1.0f + : 4.0f); } else { - eps = 1e-2f * (env_.device().SupportsFP16RTN() ? 1.0f : 4.0f); + eps = 1e-2f * (env_.device().GetInfo().opencl_info.supports_fp16_rtn + ? 1.0f + : 4.0f); } OperationDef op_def; op_def.precision = precision; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index 0b7ec8ed683..4348f9af054 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -52,9 +52,9 @@ std::vector GenerateWorkGroupSizesXYMultipleOf( if (work_group_size_xy * z > kernel_info.max_work_group_size) { continue; } - if (x <= gpu_info.max_work_group_size_x && - y <= gpu_info.max_work_group_size_y && - z <= gpu_info.max_work_group_size_z) { + if (x <= gpu_info.GetMaxWorkGroupSizeForX() && + y <= gpu_info.GetMaxWorkGroupSizeForY() && + z <= gpu_info.GetMaxWorkGroupSizeForZ()) { work_groups.push_back({x, y, z}); } } @@ -78,9 +78,9 @@ std::vector GenerateWorkGroupSizesXMultipleOf( x += multiplier) { for (auto y : possible_y_sizes) { for (auto z : possible_z_sizes) { - if (x <= gpu_info.max_work_group_size_x && - y <= gpu_info.max_work_group_size_y && - z <= gpu_info.max_work_group_size_z && + if (x <= gpu_info.GetMaxWorkGroupSizeForX() && + y <= gpu_info.GetMaxWorkGroupSizeForY() && + z <= gpu_info.GetMaxWorkGroupSizeForZ() && x * y * z <= kernel_info.max_work_group_size) { work_groups.push_back({x, y, z}); } @@ -94,9 +94,9 @@ void GetWorkGroupsAlignedToGrid(const GpuInfo& gpu_info, const KernelInfo& kernel_info, const int3& grid, std::vector* work_groups) { int3 max_wg_size; - max_wg_size.x = gpu_info.max_work_group_size_x; - max_wg_size.y = gpu_info.max_work_group_size_y; - max_wg_size.z = gpu_info.max_work_group_size_z; + max_wg_size.x = gpu_info.GetMaxWorkGroupSizeForX(); + max_wg_size.y = gpu_info.GetMaxWorkGroupSizeForY(); + max_wg_size.z = gpu_info.GetMaxWorkGroupSizeForZ(); GenerateWorkGroupSizesAlignedToGrid( grid, max_wg_size, kernel_info.max_work_group_size, work_groups); } @@ -275,7 +275,7 @@ void GetPossibleWorkGroupsConv(TuningType tuning_type, const GpuInfo& gpu_info, if (gpu_info.IsAdreno()) { max_z_size = gpu_info.adreno_info.IsAdreno3xx() ? 16 : 64; } - max_z_size = std::min(max_z_size, gpu_info.max_work_group_size_z); + max_z_size = std::min(max_z_size, gpu_info.GetMaxWorkGroupSizeForZ()); work_groups->push_back( GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size)); return; diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc index c4296b110fe..7ba81d138e2 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc @@ -33,11 +33,11 @@ bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape, 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2); const int buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size; - return buffer_size <= gpu_info.buffer_max_size; + return buffer_size <= gpu_info.GetMaxBufferSize(); } case TensorStorageType::IMAGE_BUFFER: return shape.b * shape.w * shape.h * shape.d * slices <= - gpu_info.image_buffer_max_size; + gpu_info.GetMaxImageBufferWidth(); case TensorStorageType::TEXTURE_3D: if (gpu_info.opencl_info.cl_version < OpenClVersion::kCl1_2 && slices == 1) { @@ -45,26 +45,26 @@ bool CanCreateTensorWithShape(const GpuInfo& gpu_info, const BHWDC& shape, // depth = 1 by specification; return false; } - return shape.w * shape.b <= gpu_info.image3d_max_width && - shape.h <= gpu_info.image3d_max_height && - slices * shape.d <= gpu_info.image3d_max_depth; + return shape.w * shape.b <= gpu_info.GetMaxImage3DWidth() && + shape.h <= gpu_info.GetMaxImage3DHeight() && + slices * shape.d <= gpu_info.GetMaxImage3DDepth(); case TensorStorageType::TEXTURE_ARRAY: // Bug on some Adreno. b/131099086 if (slices == 1 && gpu_info.IsAdreno() && !gpu_info.adreno_info.support_one_layer_texture_array) { return false; } - return shape.w * shape.b <= gpu_info.image2d_max_width && - shape.h <= gpu_info.image2d_max_height && - slices * shape.d <= gpu_info.image_array_max_layers; + return shape.w * shape.b <= gpu_info.GetMaxImage2DWidth() && + shape.h <= gpu_info.GetMaxImage2DHeight() && + slices * shape.d <= gpu_info.GetMaxImage2DArrayLayers(); case TensorStorageType::TEXTURE_2D: - return shape.w * shape.b * shape.d <= gpu_info.image2d_max_width && - shape.h * slices <= gpu_info.image2d_max_height; + return shape.w * shape.b * shape.d <= gpu_info.GetMaxImage2DWidth() && + shape.h * slices <= gpu_info.GetMaxImage2DHeight(); case TensorStorageType::SINGLE_TEXTURE_2D: return shape.c <= 4 && gpu_info.SupportsFloatImage2D(descriptor.data_type, shape.c) && - shape.w * shape.b * shape.d <= gpu_info.image2d_max_width && - shape.h <= gpu_info.image2d_max_height; + shape.w * shape.b * shape.d <= gpu_info.GetMaxImage2DWidth() && + shape.h <= gpu_info.GetMaxImage2DHeight(); default: return false; }