From 964d71a2c2377bf5c714585b503d7a8157941572 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 20 Nov 2020 15:10:23 -0800 Subject: [PATCH] Merged gpu/cl/device_info into gpu/common/gpu_info. PiperOrigin-RevId: 343573157 Change-Id: I3173ac6fedd723c8a625a4a7b249e2b58c9d9f34 --- tensorflow/lite/delegates/gpu/cl/BUILD | 20 +- .../lite/delegates/gpu/cl/cl_arguments.h | 2 +- .../delegates/gpu/cl/cl_arguments_test.cc | 2 +- tensorflow/lite/delegates/gpu/cl/cl_device.cc | 5 +- tensorflow/lite/delegates/gpu/cl/cl_device.h | 2 +- .../lite/delegates/gpu/cl/device_info.cc | 438 ------------------ .../lite/delegates/gpu/cl/device_info.h | 275 ----------- .../lite/delegates/gpu/cl/environment.h | 2 +- .../lite/delegates/gpu/cl/kernels/BUILD | 8 +- .../gpu/cl/kernels/fully_connected.cc | 1 - .../gpu/cl/kernels/fully_connected.h | 1 - .../delegates/gpu/cl/kernels/gpu_operation.h | 2 +- .../cl/kernels/mean_stddev_normalization.cc | 1 - .../cl/kernels/mean_stddev_normalization.h | 1 - .../delegates/gpu/cl/kernels/special/BUILD | 1 - .../gpu/cl/kernels/special/fc_fc_add.cc | 1 - .../gpu/cl/kernels/special/fc_fc_add.h | 1 - .../lite/delegates/gpu/cl/kernels/util.h | 2 +- .../gpu/cl/kernels/work_group_picking.h | 2 +- .../lite/delegates/gpu/cl/selectors/BUILD | 1 - .../gpu/cl/selectors/default_selector.h | 1 - .../lite/delegates/gpu/cl/storage_type_util.h | 2 +- tensorflow/lite/delegates/gpu/common/BUILD | 1 + .../lite/delegates/gpu/common/gpu_info.cc | 203 +++++++- .../lite/delegates/gpu/common/gpu_info.h | 71 +++ 25 files changed, 285 insertions(+), 761 deletions(-) delete mode 100644 tensorflow/lite/delegates/gpu/cl/device_info.cc delete mode 100644 tensorflow/lite/delegates/gpu/cl/device_info.h diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 58a36ca7a64..feffb60e4a3 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -99,13 +99,13 @@ cc_library( deps = [ ":buffer", ":cl_context", - ":device_info", ":gpu_object", ":linear_storage", ":tensor", ":texture2d", "//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", @@ -126,8 +126,8 @@ cc_test( deps = [ ":buffer", ":cl_arguments", - ":device_info", ":gpu_object", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], @@ -171,9 +171,9 @@ cc_library( srcs = ["cl_device.cc"], hdrs = ["cl_device.h"], deps = [ - ":device_info", ":opencl_wrapper", ":util", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "@com_google_absl//absl/strings", @@ -276,16 +276,6 @@ flatbuffer_cc_library( ], ) -cc_library( - name = "device_info", - srcs = ["device_info.cc"], - hdrs = ["device_info.h"], - deps = [ - "//tensorflow/lite/delegates/gpu/common:data_type", - "@com_google_absl//absl/strings", - ], -) - cc_library( name = "egl_sync", srcs = ["egl_sync.cc"], @@ -307,11 +297,11 @@ cc_library( ":cl_command_queue", ":cl_context", ":cl_device", - ":device_info", ":program_cache", ":tensor", ":util", "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:precision", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", @@ -509,8 +499,8 @@ cc_library( srcs = ["storage_type_util.cc"], hdrs = ["storage_type_util.h"], deps = [ - ":device_info", "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common/task:tensor_desc", diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments.h b/tensorflow/lite/delegates/gpu/cl/cl_arguments.h index 673b24f63e2..170c5538e02 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_arguments.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments.h @@ -21,8 +21,8 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/task/arguments.h" diff --git a/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc index ddca3d4dc3a..69a490b9493 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_arguments_test.cc @@ -21,8 +21,8 @@ limitations under the License. #include #include "absl/strings/match.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" namespace tflite { namespace gpu { diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index 86fbc25dafa..1bd5db7b646 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -162,7 +162,8 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) { const auto vendor_name = GetDeviceInfo(id, CL_DEVICE_VENDOR); const auto opencl_c_version = GetDeviceInfo(id, CL_DEVICE_OPENCL_C_VERSION); - info.gpu_vendor = ParseVendor(device_name, vendor_name); + info.gpu_api = GpuApi::kOpenCl; + info.vendor = ParseVendor(device_name, vendor_name); if (info.IsAdreno()) { info.adreno_info = AdrenoInfo(opencl_c_version); } else if (info.IsMali()) { @@ -243,6 +244,8 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) { info.opencl_info.max_work_group_size_x = max_work_group_sizes.x; info.opencl_info.max_work_group_size_y = max_work_group_sizes.y; info.opencl_info.max_work_group_size_z = max_work_group_sizes.z; + info.opencl_info.max_work_group_total_size = + GetDeviceInfo(id, CL_DEVICE_MAX_WORK_GROUP_SIZE); if (info.IsIntel()) { if (info.SupportsExtension("cl_intel_required_subgroup_size")) { diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h index 906e6537d49..b94704b40d6 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h @@ -19,9 +19,9 @@ limitations under the License. #include #include -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/util.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc deleted file mode 100644 index c21f8ae6e1a..00000000000 --- a/tensorflow/lite/delegates/gpu/cl/device_info.cc +++ /dev/null @@ -1,438 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" - -#include -#include -#include - -#include "absl/strings/numbers.h" -#include "absl/strings/str_split.h" - -namespace tflite { -namespace gpu { -namespace cl { -namespace { -AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) { - const std::map kMapping = { - // Adreno 6xx series - {"685", AdrenoGpu::kAdreno685}, - {"680", AdrenoGpu::kAdreno680}, - {"675", AdrenoGpu::kAdreno675}, - {"650", AdrenoGpu::kAdreno650}, - {"640", AdrenoGpu::kAdreno640}, - {"630", AdrenoGpu::kAdreno630}, - {"620", AdrenoGpu::kAdreno620}, - {"616", AdrenoGpu::kAdreno618}, - {"616", AdrenoGpu::kAdreno616}, - {"615", AdrenoGpu::kAdreno615}, - {"612", AdrenoGpu::kAdreno612}, - {"610", AdrenoGpu::kAdreno610}, - {"605", AdrenoGpu::kAdreno605}, - // Adreno 5xx series - {"540", AdrenoGpu::kAdreno540}, - {"530", AdrenoGpu::kAdreno530}, - {"512", AdrenoGpu::kAdreno512}, - {"510", AdrenoGpu::kAdreno510}, - {"509", AdrenoGpu::kAdreno509}, - {"508", AdrenoGpu::kAdreno508}, - {"506", AdrenoGpu::kAdreno506}, - {"505", AdrenoGpu::kAdreno505}, - {"504", AdrenoGpu::kAdreno504}, - // Adreno 4xx series - {"430", AdrenoGpu::kAdreno430}, - {"420", AdrenoGpu::kAdreno420}, - {"418", AdrenoGpu::kAdreno418}, - {"405", AdrenoGpu::kAdreno405}, - // Adreno 3xx series - {"330", AdrenoGpu::kAdreno330}, - {"320", AdrenoGpu::kAdreno320}, - {"308", AdrenoGpu::kAdreno308}, - {"306", AdrenoGpu::kAdreno306}, - {"305", AdrenoGpu::kAdreno305}, - {"304", AdrenoGpu::kAdreno304}, - // Adreno 2xx series - {"225", AdrenoGpu::kAdreno225}, - {"220", AdrenoGpu::kAdreno220}, - {"205", AdrenoGpu::kAdreno205}, - {"203", AdrenoGpu::kAdreno203}, - {"200", AdrenoGpu::kAdreno200}, - // Adreno 1xx series - {"130", AdrenoGpu::kAdreno130}, - {"120", AdrenoGpu::kAdreno120}, - }; - - for (const auto& v : kMapping) { - if (device_name.find(v.first) != std::string::npos) { - return v.second; - } - } - return AdrenoGpu::kUnknown; -} - -MaliGpu GetMaliGpuVersion(const std::string& gpu_description) { - const std::map kMapping = { - {"t604", MaliGpu::kT604}, {"t622", MaliGpu::kT622}, - {"t624", MaliGpu::kT624}, {"t628", MaliGpu::kT628}, - {"t658", MaliGpu::kT658}, {"t678", MaliGpu::kT678}, - {"t720", MaliGpu::kT720}, {"t760", MaliGpu::kT760}, - {"t820", MaliGpu::kT820}, {"t830", MaliGpu::kT830}, - {"t860", MaliGpu::kT860}, {"t880", MaliGpu::kT880}, - {"g31", MaliGpu::kG31}, {"g51", MaliGpu::kG51}, - {"g71", MaliGpu::kG71}, {"g52", MaliGpu::kG52}, - {"g72", MaliGpu::kG72}, {"g76", MaliGpu::kG76}, - {"g57", MaliGpu::kG57}, {"g77", MaliGpu::kG77}, - {"g68", MaliGpu::kG68}, {"g78", MaliGpu::kG78}, - }; - for (const auto& v : kMapping) { - if (gpu_description.find(v.first) != std::string::npos) { - return v.second; - } - } - return MaliGpu::kUnknown; -} - -} // namespace - -std::string GpuVendorToString(GpuVendor v) { - switch (v) { - case GpuVendor::kApple: - return "Apple"; - case GpuVendor::kQualcomm: - return "Qualcomm"; - case GpuVendor::kMali: - return "Mali"; - case GpuVendor::kPowerVR: - return "PowerVR"; - case GpuVendor::kNvidia: - return "NVIDIA"; - case GpuVendor::kAMD: - return "AMD"; - case GpuVendor::kIntel: - return "Intel"; - case GpuVendor::kUnknown: - return "unknown vendor"; - } -} - -std::string OpenClVersionToString(OpenClVersion version) { - switch (version) { - case OpenClVersion::kCl1_0: - return "1.0"; - case OpenClVersion::kCl1_1: - return "1.1"; - case OpenClVersion::kCl1_2: - return "1.2"; - case OpenClVersion::kCl2_0: - return "2.0"; - case OpenClVersion::kCl2_1: - return "2.1"; - case OpenClVersion::kCl2_2: - return "2.2"; - case OpenClVersion::kCl3_0: - return "3.0"; - default: - return "Unknown OpenCL version"; - } -} - -AdrenoInfo::AdrenoInfo(const std::string& device_version) - : adreno_gpu(GetAdrenoGpuVersion(device_version)) {} - -bool AdrenoInfo::IsAdreno1xx() const { - return adreno_gpu == AdrenoGpu::kAdreno120 || - adreno_gpu == AdrenoGpu::kAdreno130; -} - -bool AdrenoInfo::IsAdreno2xx() const { - return adreno_gpu == AdrenoGpu::kAdreno200 || - adreno_gpu == AdrenoGpu::kAdreno203 || - adreno_gpu == AdrenoGpu::kAdreno205 || - adreno_gpu == AdrenoGpu::kAdreno220 || - adreno_gpu == AdrenoGpu::kAdreno225; -} - -bool AdrenoInfo::IsAdreno3xx() const { - return adreno_gpu == AdrenoGpu::kAdreno304 || - adreno_gpu == AdrenoGpu::kAdreno305 || - adreno_gpu == AdrenoGpu::kAdreno306 || - adreno_gpu == AdrenoGpu::kAdreno308 || - adreno_gpu == AdrenoGpu::kAdreno320 || - adreno_gpu == AdrenoGpu::kAdreno330; -} - -bool AdrenoInfo::IsAdreno4xx() const { - return adreno_gpu == AdrenoGpu::kAdreno405 || - adreno_gpu == AdrenoGpu::kAdreno418 || - adreno_gpu == AdrenoGpu::kAdreno420 || - adreno_gpu == AdrenoGpu::kAdreno430; -} - -bool AdrenoInfo::IsAdreno5xx() const { - return adreno_gpu == AdrenoGpu::kAdreno504 || - adreno_gpu == AdrenoGpu::kAdreno505 || - adreno_gpu == AdrenoGpu::kAdreno506 || - adreno_gpu == AdrenoGpu::kAdreno508 || - adreno_gpu == AdrenoGpu::kAdreno509 || - adreno_gpu == AdrenoGpu::kAdreno510 || - adreno_gpu == AdrenoGpu::kAdreno512 || - adreno_gpu == AdrenoGpu::kAdreno530 || - adreno_gpu == AdrenoGpu::kAdreno540; -} - -bool AdrenoInfo::IsAdreno6xx() const { - return adreno_gpu == AdrenoGpu::kAdreno605 || - adreno_gpu == AdrenoGpu::kAdreno610 || - adreno_gpu == AdrenoGpu::kAdreno612 || - adreno_gpu == AdrenoGpu::kAdreno615 || - adreno_gpu == AdrenoGpu::kAdreno616 || - adreno_gpu == AdrenoGpu::kAdreno618 || - adreno_gpu == AdrenoGpu::kAdreno620 || - adreno_gpu == AdrenoGpu::kAdreno630 || - adreno_gpu == AdrenoGpu::kAdreno640 || - adreno_gpu == AdrenoGpu::kAdreno650 || - adreno_gpu == AdrenoGpu::kAdreno675 || - adreno_gpu == AdrenoGpu::kAdreno680 || - adreno_gpu == AdrenoGpu::kAdreno685; -} - -bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); } - -int AdrenoInfo::GetMaximumWavesCount() const { - if (IsAdreno6xx()) { - if (adreno_gpu == AdrenoGpu::kAdreno640) { - return 30; - } else { - return 16; - } - } else { - // all other versions not supported - return 1; - } -} - -int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const { - if (IsAdreno6xx()) { - if (adreno_gpu == AdrenoGpu::kAdreno640) { - return 128 * 144 * 16; - } else if (adreno_gpu == AdrenoGpu::kAdreno650) { - return 128 * 64 * 16; - } else { - return 128 * 96 * 16; - } - } else { - // all other versions not supported - return 1; - } -} - -int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, - bool full_wave) const { - const int register_usage_per_wave = - GetWaveSize(full_wave) * register_footprint_per_tread; - const int possible_waves_count = - GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave; - return std::min(possible_waves_count, GetMaximumWavesCount()); -} - -int AdrenoInfo::GetWaveSize(bool full_wave) const { - if (IsAdreno6xx()) { - return full_wave ? 128 : 64; - } else if (IsAdreno5xx() || IsAdreno4xx()) { - return full_wave ? 64 : 32; - } else { - // all other versions not supported - return 1; - } -} - -MaliInfo::MaliInfo(const std::string& gpu_description) - : gpu_version(GetMaliGpuVersion(gpu_description)) {} - -bool MaliInfo::IsMaliT6xx() const { - return gpu_version == MaliGpu::kT604 || gpu_version == MaliGpu::kT622 || - gpu_version == MaliGpu::kT624 || gpu_version == MaliGpu::kT628 || - gpu_version == MaliGpu::kT658 || gpu_version == MaliGpu::kT678; -} - -bool MaliInfo::IsMaliT7xx() const { - return gpu_version == MaliGpu::kT720 || gpu_version == MaliGpu::kT760; -} - -bool MaliInfo::IsMaliT8xx() const { - return gpu_version == MaliGpu::kT820 || gpu_version == MaliGpu::kT830 || - gpu_version == MaliGpu::kT860 || gpu_version == MaliGpu::kT880; -} - -bool MaliInfo::IsMidgard() const { - return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); -} - -bool MaliInfo::IsBifrostGen1() const { - return gpu_version == MaliGpu::kG31 || gpu_version == MaliGpu::kG51 || - gpu_version == MaliGpu::kG71; -} - -bool MaliInfo::IsBifrostGen2() const { - return gpu_version == MaliGpu::kG52 || gpu_version == MaliGpu::kG72; -} - -bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGpu::kG76; } - -bool MaliInfo::IsBifrost() const { - return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); -} - -bool MaliInfo::IsValhall() const { - return gpu_version == MaliGpu::kG57 || gpu_version == MaliGpu::kG77 || - gpu_version == MaliGpu::kG68 || gpu_version == MaliGpu::kG78; -} - -bool GpuInfo::SupportsFP16() const { return opencl_info.supports_fp16; } - -bool GpuInfo::SupportsTextureArray() const { - return opencl_info.cl_version >= OpenClVersion::kCl1_2; -} - -bool GpuInfo::SupportsImageBuffer() const { - return opencl_info.cl_version >= OpenClVersion::kCl1_2; -} - -bool GpuInfo::SupportsImage3D() const { - if (IsMali() && mali_info.IsMidgard()) { - // On Mali T880 read_imageh doesn't compile with image3d_t - return false; - } - return opencl_info.supports_image3d_writes; -} - -bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const { - if (channels == 1) { - return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d - : opencl_info.supports_r_f16_tex2d; - } else if (channels == 2) { - return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d - : opencl_info.supports_rg_f16_tex2d; - } else if (channels == 3) { - return data_type == DataType::FLOAT32 ? opencl_info.supports_rgb_f32_tex2d - : opencl_info.supports_rgb_f16_tex2d; - } else if (channels == 4) { - return data_type == DataType::FLOAT32 ? opencl_info.supports_rgba_f32_tex2d - : opencl_info.supports_rgba_f16_tex2d; - } else { - return false; - } -} - -bool GpuInfo::SupportsExtension(const std::string& extension) const { - for (const auto& ext : opencl_info.extensions) { - if (ext == extension) { - return true; - } - } - return false; -} - -bool GpuInfo::IsCL20OrHigher() const { - return opencl_info.cl_version != OpenClVersion::kCl1_0 && - opencl_info.cl_version != OpenClVersion::kCl1_1 && - opencl_info.cl_version != OpenClVersion::kCl1_2; -} - -bool GpuInfo::IsCL30OrHigher() const { - return IsCL20OrHigher() && opencl_info.cl_version != OpenClVersion::kCl2_0 && - opencl_info.cl_version != OpenClVersion::kCl2_1 && - opencl_info.cl_version != OpenClVersion::kCl2_2; -} - -bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const { - for (auto subgroup_size : supported_subgroup_sizes) { - if (sub_group_size == subgroup_size) { - return true; - } - } - return false; -} - -int GpuInfo::GetComputeUnitsCount() const { - return opencl_info.compute_units_count; -} - -bool GpuInfo::IsRoundToNearestSupported() const { - return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn; -} - -int GpuInfo::GetMaxWorkGroupSizeForX() const { - return opencl_info.max_work_group_size_x; -} - -int GpuInfo::GetMaxWorkGroupSizeForY() const { - return opencl_info.max_work_group_size_y; -} - -int GpuInfo::GetMaxWorkGroupSizeForZ() const { - return opencl_info.max_work_group_size_z; -} - -uint64_t GpuInfo::GetMaxImage2DWidth() const { - return opencl_info.image2d_max_width; -} - -uint64_t GpuInfo::GetMaxImage2DHeight() const { - return opencl_info.image2d_max_height; -} - -uint64_t GpuInfo::GetMaxImage3DWidth() const { - return opencl_info.image3d_max_width; -} - -uint64_t GpuInfo::GetMaxImage3DHeight() const { - return opencl_info.image3d_max_height; -} - -uint64_t GpuInfo::GetMaxImage3DDepth() const { - return opencl_info.image3d_max_depth; -} - -uint64_t GpuInfo::GetMaxBufferSize() const { - return opencl_info.buffer_max_size; -} - -uint64_t GpuInfo::GetMaxImageBufferWidth() const { - return opencl_info.image_buffer_max_size; -} - -uint64_t GpuInfo::GetMaxImage2DArrayLayers() const { - return opencl_info.image_array_max_layers; -} - -bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; } - -bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; } - -bool GpuInfo::IsMali() const { return gpu_vendor == GpuVendor::kMali; } - -bool GpuInfo::IsPowerVR() const { return gpu_vendor == GpuVendor::kPowerVR; } - -bool GpuInfo::IsNvidia() const { return gpu_vendor == GpuVendor::kNvidia; } - -bool GpuInfo::IsAMD() const { return gpu_vendor == GpuVendor::kAMD; } - -bool GpuInfo::IsIntel() const { return gpu_vendor == GpuVendor::kIntel; } - -} // namespace cl -} // namespace gpu -} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h deleted file mode 100644 index 25f5cc479aa..00000000000 --- a/tensorflow/lite/delegates/gpu/cl/device_info.h +++ /dev/null @@ -1,275 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_ -#define TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_ - -#include -#include - -#include "tensorflow/lite/delegates/gpu/common/data_type.h" - -// for use only in device_info.cc, but keep here to make tests -int GetAdrenoGPUVersion(const std::string& gpu_version); - -namespace tflite { -namespace gpu { -namespace cl { - -enum class GpuVendor { - kApple, - kQualcomm, - kMali, - kPowerVR, - kNvidia, - kAMD, - kIntel, - kUnknown -}; - -std::string GpuVendorToString(GpuVendor v); - -enum class AdrenoGpu { - // Adreno 6xx series - kAdreno685, - kAdreno680, - kAdreno675, - kAdreno650, - kAdreno640, - kAdreno630, - kAdreno620, - kAdreno618, - kAdreno616, - kAdreno615, - kAdreno612, - kAdreno610, - kAdreno605, - // Adreno 5xx series - kAdreno540, - kAdreno530, - kAdreno512, - kAdreno510, - kAdreno509, - kAdreno508, - kAdreno506, - kAdreno505, - kAdreno504, - // Adreno 4xx series - kAdreno430, - kAdreno420, - kAdreno418, - kAdreno405, - // Adreno 3xx series - kAdreno330, - kAdreno320, - kAdreno308, - kAdreno306, - kAdreno305, - kAdreno304, - // Adreno 2xx series - kAdreno225, - kAdreno220, - kAdreno205, - kAdreno203, - kAdreno200, - // Adreno 1xx series - kAdreno130, - kAdreno120, - kUnknown -}; - -struct AdrenoInfo { - AdrenoInfo() = default; - explicit AdrenoInfo(const std::string& device_version); - - AdrenoGpu adreno_gpu; - - bool IsAdreno1xx() const; - bool IsAdreno2xx() const; - bool IsAdreno3xx() const; - bool IsAdreno4xx() const; - bool IsAdreno5xx() const; - bool IsAdreno6xx() const; - bool IsAdreno6xxOrHigher() const; - - // This function returns some not very documented physical parameter of - // Adreno6xx GPU. - // We obtained it using Snapdragon Profiler. - int GetMaximumWavesCount() const; - - // returns amount of register memory per CU(Compute Unit) in bytes. - int GetRegisterMemorySizePerComputeUnit() const; - - // returns maximum possible amount of waves based on register usage. - int GetMaximumWavesCount(int register_footprint_per_tread, - bool full_wave = true) const; - - int GetWaveSize(bool full_wave) const; - - // Not supported on some Adreno devices with specific driver version. - // b/131099086 - bool support_one_layer_texture_array = true; -}; - -enum class MaliGpu { - kUnknown, - kT604, - kT622, - kT624, - kT628, - kT658, - kT678, - kT720, - kT760, - kT820, - kT830, - kT860, - kT880, - kG31, - kG51, - kG71, - kG52, - kG72, - kG76, - kG57, - kG77, - kG68, - kG78, -}; - -struct MaliInfo { - MaliInfo() = default; - explicit MaliInfo(const std::string& gpu_description); - MaliGpu gpu_version; - - bool IsMaliT6xx() const; - bool IsMaliT7xx() const; - bool IsMaliT8xx() const; - bool IsMidgard() const; - bool IsBifrostGen1() const; - bool IsBifrostGen2() const; - bool IsBifrostGen3() const; - bool IsBifrost() const; - bool IsValhall() const; -}; - -enum class OpenClVersion { - kCl1_0, - kCl1_1, - kCl1_2, - kCl2_0, - kCl2_1, - kCl2_2, - kCl3_0, - kUnknown, -}; -std::string OpenClVersionToString(OpenClVersion version); - -struct OpenClInfo { - OpenClVersion cl_version; - - std::vector extensions; - bool supports_fp16; - bool supports_image3d_writes; - int compute_units_count; - uint64_t buffer_max_size; - uint64_t image2d_max_width; - uint64_t image2d_max_height; - uint64_t image_buffer_max_size; - uint64_t image_array_max_layers; - uint64_t image3d_max_width; - uint64_t image3d_max_height; - uint64_t image3d_max_depth; - int max_work_group_size_x; - int max_work_group_size_y; - int max_work_group_size_z; - - // rtn is ROUND_TO_NEAREST - // with rtn precision is much better then with rtz (ROUND_TO_ZERO) - // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn - // Mali from T6xx supports rtn - // PowerVR supports only rtz - bool supports_fp32_rtn; - bool supports_fp16_rtn; - - bool supports_r_f16_tex2d = false; - bool supports_rg_f16_tex2d = false; - bool supports_rgb_f16_tex2d = false; - bool supports_rgba_f16_tex2d = false; - - bool supports_r_f32_tex2d = false; - bool supports_rg_f32_tex2d = false; - bool supports_rgb_f32_tex2d = false; - bool supports_rgba_f32_tex2d = false; -}; - -struct GpuInfo { - GpuInfo() = default; - - bool IsAdreno() const; - bool IsApple() const; - bool IsMali() const; - bool IsPowerVR() const; - bool IsNvidia() const; - bool IsAMD() const; - bool IsIntel() const; - - bool SupportsFP16() const; - - bool SupportsTextureArray() const; - bool SupportsImageBuffer() const; - bool SupportsImage3D() const; - - bool SupportsFloatImage2D(DataType data_type, int channels) const; - - bool SupportsExtension(const std::string& extension) const; - bool IsCL20OrHigher() const; - bool IsCL30OrHigher() const; - bool SupportsSubGroupWithSize(int sub_group_size) const; - - int GetComputeUnitsCount() const; - - // floating point rounding mode - bool IsRoundToNearestSupported() const; - - int GetMaxWorkGroupSizeForX() const; - int GetMaxWorkGroupSizeForY() const; - int GetMaxWorkGroupSizeForZ() const; - - uint64_t GetMaxImage2DWidth() const; - uint64_t GetMaxImage2DHeight() const; - uint64_t GetMaxImage3DWidth() const; - uint64_t GetMaxImage3DHeight() const; - uint64_t GetMaxImage3DDepth() const; - - uint64_t GetMaxBufferSize() const; - uint64_t GetMaxImageBufferWidth() const; - uint64_t GetMaxImage2DArrayLayers() const; - - std::vector supported_subgroup_sizes; - - GpuVendor gpu_vendor; - - AdrenoInfo adreno_info; - MaliInfo mali_info; - - OpenClInfo opencl_info; -}; - -} // namespace cl -} // namespace gpu -} // namespace tflite - -#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h index 8917351841f..5138e59ee7e 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.h +++ b/tensorflow/lite/delegates/gpu/cl/environment.h @@ -19,9 +19,9 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/program_cache.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/precision.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 84bf0bd47b9..1d6644b9cfc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -578,7 +578,6 @@ cc_library( ":util", "//tensorflow/lite/delegates/gpu/cl:buffer", "//tensorflow/lite/delegates/gpu/cl:cl_kernel", - "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:linear_storage", "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/cl:texture2d", @@ -620,10 +619,10 @@ cc_library( deps = [ ":util", ":work_group_picking", - "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs", "//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:kernel_info", "//tensorflow/lite/delegates/gpu/common:precision", "//tensorflow/lite/delegates/gpu/common:status", @@ -728,7 +727,6 @@ cc_library( ":util", ":work_group_picking", "//tensorflow/lite/delegates/gpu/cl:cl_program", - "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", @@ -1206,8 +1204,8 @@ cc_library( srcs = ["util.cc"], hdrs = ["util.h"], deps = [ - "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:precision", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:tensor", @@ -1264,7 +1262,7 @@ cc_library( hdrs = ["work_group_picking.h"], deps = [ "//tensorflow/lite/delegates/gpu/cl:cl_kernel", - "//tensorflow/lite/delegates/gpu/cl:device_info", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:kernel_info", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index b5caef81b43..a3c36568831 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include "absl/memory/memory.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index a508e741ef9..2380321a783 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -25,7 +25,6 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/texture2d.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index c4d73225a27..f88a40dd11e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -19,9 +19,9 @@ limitations under the License. #include #include -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/kernel_info.h" #include "tensorflow/lite/delegates/gpu/common/precision.h" #include "tensorflow/lite/delegates/gpu/common/status.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index 3ebeb4bfa7c..4273dc2d015 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/cl/cl_program.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h index 6a4a1848394..bce26f3c670 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD index 92231338730..4ca2b3b51fe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD @@ -31,7 +31,6 @@ cc_library( deps = [ "//tensorflow/lite/delegates/gpu/cl:buffer", "//tensorflow/lite/delegates/gpu/cl:cl_kernel", - "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:linear_storage", "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/cl:texture2d", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc index f1c3ddb7045..9153258e212 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include "absl/memory/memory.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h index 09e0548c663..e250873f207 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/fc_fc_add.h @@ -25,7 +25,6 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/texture2d.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.h b/tensorflow/lite/delegates/gpu/cl/kernels/util.h index 49b9b3c88ad..cc78c3dc8d2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.h @@ -20,8 +20,8 @@ limitations under the License. #include #include "absl/types/span.h" -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/precision.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/tensor.h" diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h index 8135aec8855..975ae28e61f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h @@ -18,7 +18,7 @@ limitations under the License. #include -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/kernel_info.h" #include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h" #include "tensorflow/lite/delegates/gpu/common/types.h" diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD index c66b4a5be63..e8f0518eed2 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD @@ -49,7 +49,6 @@ cc_library( hdrs = ["default_selector.h"], deps = [ ":subgraph", - "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", "//tensorflow/lite/delegates/gpu/cl/selectors/default:default_selector", "//tensorflow/lite/delegates/gpu/common:model", diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h index f6cb9a33ada..cf1223793cd 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h @@ -18,7 +18,6 @@ limitations under the License. #include -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/selectors/subgraph.h" #include "tensorflow/lite/delegates/gpu/common/model.h" diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h index f30219156b4..b849d7a1087 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_ -#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index 35510086d64..ef43c76310b 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -40,6 +40,7 @@ cc_library( srcs = ["gpu_info.cc"], hdrs = ["gpu_info.h"], deps = [ + ":data_type", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.cc b/tensorflow/lite/delegates/gpu/common/gpu_info.cc index c2da5555dbd..2204f4a6448 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.cc +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.cc @@ -358,6 +358,27 @@ void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, } } +std::string OpenClVersionToString(OpenClVersion version) { + switch (version) { + case OpenClVersion::kCl1_0: + return "1.0"; + case OpenClVersion::kCl1_1: + return "1.1"; + case OpenClVersion::kCl1_2: + return "1.2"; + case OpenClVersion::kCl2_0: + return "2.0"; + case OpenClVersion::kCl2_1: + return "2.1"; + case OpenClVersion::kCl2_2: + return "2.2"; + case OpenClVersion::kCl3_0: + return "3.0"; + default: + return "Unknown OpenCL version"; + } +} + bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; } bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; } @@ -373,11 +394,45 @@ bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; } bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; } bool GpuInfo::IsRoundToNearestSupported() const { + if (IsApiOpenCl()) { + return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn; + } if (IsApple()) { return apple_info.IsRoundToNearestSupported(); - } else { - return true; } + return true; +} + +bool GpuInfo::SupportsFP16() const { + if (IsApiOpenCl()) { + return opencl_info.supports_fp16; + } + return true; +} + +bool GpuInfo::SupportsTextureArray() const { + if (IsApiOpenCl()) { + return opencl_info.cl_version >= OpenClVersion::kCl1_2; + } + return true; +} + +bool GpuInfo::SupportsImageBuffer() const { + if (IsApiOpenCl()) { + return opencl_info.cl_version >= OpenClVersion::kCl1_2; + } + return true; +} + +bool GpuInfo::SupportsImage3D() const { + if (IsApiOpenCl()) { + if (IsMali() && mali_info.IsMidgard()) { + // On Mali T880 read_imageh doesn't compile with image3d_t + return false; + } + return opencl_info.supports_image3d_writes; + } + return true; } bool GpuInfo::IsWaveSizeEqualTo32() const { @@ -385,12 +440,66 @@ bool GpuInfo::IsWaveSizeEqualTo32() const { supported_subgroup_sizes[0] == 32; } +bool GpuInfo::SupportsExtension(const std::string& extension) const { + const std::vector* extensions = nullptr; + if (IsApiOpenGl()) { + extensions = &opengl_info.extensions; + } else if (IsApiVulkan()) { + extensions = &vulkan_info.extensions; + } else if (IsApiOpenCl()) { + extensions = &opencl_info.extensions; + } + if (!extensions) { + return false; + } + for (const auto& ext : *extensions) { + if (ext == extension) { + return true; + } + } + return false; +} + +bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const { + for (auto subgroup_size : supported_subgroup_sizes) { + if (sub_group_size == subgroup_size) { + return true; + } + } + return false; +} + +bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const { + if (IsApiOpenCl()) { + if (channels == 1) { + return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d + : opencl_info.supports_r_f16_tex2d; + } else if (channels == 2) { + return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d + : opencl_info.supports_rg_f16_tex2d; + } else if (channels == 3) { + return data_type == DataType::FLOAT32 + ? opencl_info.supports_rgb_f32_tex2d + : opencl_info.supports_rgb_f16_tex2d; + } else if (channels == 4) { + return data_type == DataType::FLOAT32 + ? opencl_info.supports_rgba_f32_tex2d + : opencl_info.supports_rgba_f16_tex2d; + } else { + return false; + } + } + return false; +} + int GpuInfo::GetComputeUnitsCount() const { + if (IsApiOpenCl()) { + return opencl_info.compute_units_count; + } if (IsApple()) { return apple_info.GetComputeUnitsCount(); - } else { - return 1; } + return 1; } int GpuInfo::GetMaxWorkGroupSizeForX() const { @@ -400,6 +509,9 @@ int GpuInfo::GetMaxWorkGroupSizeForX() const { if (IsApiVulkan()) { return vulkan_info.max_compute_work_group_size_x; } + if (IsApiOpenCl()) { + return opencl_info.max_work_group_size_x; + } return 256; } @@ -410,6 +522,9 @@ int GpuInfo::GetMaxWorkGroupSizeForY() const { if (IsApiVulkan()) { return vulkan_info.max_compute_work_group_size_y; } + if (IsApiOpenCl()) { + return opencl_info.max_work_group_size_y; + } return 256; } @@ -420,6 +535,9 @@ int GpuInfo::GetMaxWorkGroupSizeForZ() const { if (IsApiVulkan()) { return vulkan_info.max_compute_work_group_size_z; } + if (IsApiOpenCl()) { + return opencl_info.max_work_group_size_z; + } return 64; } @@ -430,6 +548,9 @@ int GpuInfo::GetMaxWorkGroupTotalSize() const { if (IsApiVulkan()) { return vulkan_info.max_compute_work_group_invocations; } + if (IsApiOpenCl()) { + return opencl_info.max_work_group_total_size; + } return 256; } @@ -440,6 +561,9 @@ uint64_t GpuInfo::GetMaxImage2DWidth() const { if (IsApiVulkan()) { return vulkan_info.max_image_dimension_2d; } + if (IsApiOpenCl()) { + return opencl_info.image2d_max_width; + } return 2048; } @@ -450,6 +574,9 @@ uint64_t GpuInfo::GetMaxImage2DHeight() const { if (IsApiVulkan()) { return vulkan_info.max_image_dimension_2d; } + if (IsApiOpenCl()) { + return opencl_info.image2d_max_height; + } return 2048; } @@ -460,9 +587,47 @@ uint64_t GpuInfo::GetMaxImage2DArrayLayers() const { if (IsApiVulkan()) { return vulkan_info.max_image_array_layers; } + if (IsApiOpenCl()) { + return opencl_info.image_array_max_layers; + } return 256; } +uint64_t GpuInfo::GetMaxImage3DWidth() const { + if (IsApiOpenCl()) { + return opencl_info.image3d_max_width; + } + return 256; +} + +uint64_t GpuInfo::GetMaxImage3DHeight() const { + if (IsApiOpenCl()) { + return opencl_info.image3d_max_height; + } + return 256; +} + +uint64_t GpuInfo::GetMaxImage3DDepth() const { + if (IsApiOpenCl()) { + return opencl_info.image3d_max_depth; + } + return 256; +} + +uint64_t GpuInfo::GetMaxBufferSize() const { + if (IsApiOpenCl()) { + return opencl_info.buffer_max_size; + } + return 128 * 1024 * 1024; +} + +uint64_t GpuInfo::GetMaxImageBufferWidth() const { + if (IsApiOpenCl()) { + return opencl_info.image_buffer_max_size; + } + return 64 * 1024; +} + int GpuInfo::GetMaxImageArguments() const { if (IsApiOpenGl()) { return opengl_info.max_image_units; @@ -481,12 +646,6 @@ int GpuInfo::GetMaxImageArguments() const { bool GpuInfo::IsApiOpenGl() const { return gpu_api == GpuApi::kOpenGl; } -bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; } - -bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; } - -bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; } - bool GpuInfo::IsApiOpenGl31OrAbove() const { if (!IsApiOpenGl()) { return false; @@ -495,5 +654,29 @@ bool GpuInfo::IsApiOpenGl31OrAbove() const { opengl_info.major_version > 3; } +bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; } + +bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; } + +bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; } + +bool GpuInfo::IsCL20OrHigher() const { + if (!IsApiOpenCl()) { + return false; + } + return opencl_info.cl_version != OpenClVersion::kCl1_0 && + opencl_info.cl_version != OpenClVersion::kCl1_1 && + opencl_info.cl_version != OpenClVersion::kCl1_2; +} + +bool GpuInfo::IsCL30OrHigher() const { + if (!IsApiOpenCl()) { + return false; + } + return IsCL20OrHigher() && opencl_info.cl_version != OpenClVersion::kCl2_0 && + opencl_info.cl_version != OpenClVersion::kCl2_1 && + opencl_info.cl_version != OpenClVersion::kCl2_2; +} + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.h b/tensorflow/lite/delegates/gpu/common/gpu_info.h index 3a519c7a877..cd61887fa83 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.h +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h @@ -19,6 +19,8 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/common/data_type.h" + namespace tflite { namespace gpu { @@ -236,6 +238,57 @@ struct VulkanInfo { int max_compute_work_group_size_z; }; +enum class OpenClVersion { + kCl1_0, + kCl1_1, + kCl1_2, + kCl2_0, + kCl2_1, + kCl2_2, + kCl3_0, + kUnknown, +}; +std::string OpenClVersionToString(OpenClVersion version); + +struct OpenClInfo { + OpenClVersion cl_version; + + std::vector extensions; + bool supports_fp16; + bool supports_image3d_writes; + int compute_units_count; + uint64_t buffer_max_size; + uint64_t image2d_max_width; + uint64_t image2d_max_height; + uint64_t image_buffer_max_size; + uint64_t image_array_max_layers; + uint64_t image3d_max_width; + uint64_t image3d_max_height; + uint64_t image3d_max_depth; + int max_work_group_size_x; + int max_work_group_size_y; + int max_work_group_size_z; + int max_work_group_total_size; + + // rtn is ROUND_TO_NEAREST + // with rtn precision is much better then with rtz (ROUND_TO_ZERO) + // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn + // Mali from T6xx supports rtn + // PowerVR supports only rtz + bool supports_fp32_rtn; + bool supports_fp16_rtn; + + bool supports_r_f16_tex2d = false; + bool supports_rg_f16_tex2d = false; + bool supports_rgb_f16_tex2d = false; + bool supports_rgba_f16_tex2d = false; + + bool supports_r_f32_tex2d = false; + bool supports_rg_f32_tex2d = false; + bool supports_rgb_f32_tex2d = false; + bool supports_rgba_f32_tex2d = false; +}; + struct GpuInfo { bool IsAdreno() const; bool IsApple() const; @@ -248,8 +301,18 @@ struct GpuInfo { // floating point rounding mode bool IsRoundToNearestSupported() const; + bool SupportsFP16() const; + + bool SupportsTextureArray() const; + bool SupportsImageBuffer() const; + bool SupportsImage3D() const; + // returns true if device have fixed wave size equal to 32 bool IsWaveSizeEqualTo32() const; + bool SupportsSubGroupWithSize(int sub_group_size) const; + + bool SupportsFloatImage2D(DataType data_type, int channels) const; + bool SupportsExtension(const std::string& extension) const; int GetComputeUnitsCount() const; @@ -263,6 +326,11 @@ struct GpuInfo { uint64_t GetMaxImage2DWidth() const; uint64_t GetMaxImage2DHeight() const; uint64_t GetMaxImage2DArrayLayers() const; + uint64_t GetMaxImage3DWidth() const; + uint64_t GetMaxImage3DHeight() const; + uint64_t GetMaxImage3DDepth() const; + uint64_t GetMaxBufferSize() const; + uint64_t GetMaxImageBufferWidth() const; GpuVendor vendor = GpuVendor::kUnknown; GpuApi gpu_api = GpuApi::kUnknown; @@ -287,7 +355,10 @@ struct GpuInfo { bool IsApiMetal() const; + OpenClInfo opencl_info; bool IsApiOpenCl() const; + bool IsCL20OrHigher() const; + bool IsCL30OrHigher() const; }; inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {