Merged gpu/cl/device_info into gpu/common/gpu_info.

PiperOrigin-RevId: 343573157
Change-Id: I3173ac6fedd723c8a625a4a7b249e2b58c9d9f34
This commit is contained in:
Raman Sarokin 2020-11-20 15:10:23 -08:00 committed by TensorFlower Gardener
parent a5dbf466e2
commit 964d71a2c2
25 changed files with 285 additions and 761 deletions

View File

@ -99,13 +99,13 @@ cc_library(
deps = [
":buffer",
":cl_context",
":device_info",
":gpu_object",
":linear_storage",
":tensor",
":texture2d",
"//tensorflow/lite/delegates/gpu/common:access_type",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common:util",
@ -126,8 +126,8 @@ cc_test(
deps = [
":buffer",
":cl_arguments",
":device_info",
":gpu_object",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
],
@ -171,9 +171,9 @@ cc_library(
srcs = ["cl_device.cc"],
hdrs = ["cl_device.h"],
deps = [
":device_info",
":opencl_wrapper",
":util",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:types",
"@com_google_absl//absl/strings",
@ -276,16 +276,6 @@ flatbuffer_cc_library(
],
)
cc_library(
name = "device_info",
srcs = ["device_info.cc"],
hdrs = ["device_info.h"],
deps = [
"//tensorflow/lite/delegates/gpu/common:data_type",
"@com_google_absl//absl/strings",
],
)
cc_library(
name = "egl_sync",
srcs = ["egl_sync.cc"],
@ -307,11 +297,11 @@ cc_library(
":cl_command_queue",
":cl_context",
":cl_device",
":device_info",
":program_cache",
":tensor",
":util",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:status",
@ -509,8 +499,8 @@ cc_library(
srcs = ["storage_type_util.cc"],
hdrs = ["storage_type_util.h"],
deps = [
":device_info",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:util",
"//tensorflow/lite/delegates/gpu/common/task:tensor_desc",

View File

@ -21,8 +21,8 @@ limitations under the License.
#include <vector>
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/task/arguments.h"

View File

@ -21,8 +21,8 @@ limitations under the License.
#include <gtest/gtest.h>
#include "absl/strings/match.h"
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
namespace tflite {
namespace gpu {

View File

@ -162,7 +162,8 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
const auto opencl_c_version =
GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
info.gpu_vendor = ParseVendor(device_name, vendor_name);
info.gpu_api = GpuApi::kOpenCl;
info.vendor = ParseVendor(device_name, vendor_name);
if (info.IsAdreno()) {
info.adreno_info = AdrenoInfo(opencl_c_version);
} else if (info.IsMali()) {
@ -243,6 +244,8 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
info.opencl_info.max_work_group_total_size =
GetDeviceInfo<size_t>(id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
if (info.IsIntel()) {
if (info.SupportsExtension("cl_intel_required_subgroup_size")) {

View File

@ -19,9 +19,9 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"

View File

@ -1,438 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include <algorithm>
#include <string>
#include <vector>
#include "absl/strings/numbers.h"
#include "absl/strings/str_split.h"
namespace tflite {
namespace gpu {
namespace cl {
namespace {
AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) {
const std::map<std::string, AdrenoGpu> kMapping = {
// Adreno 6xx series
{"685", AdrenoGpu::kAdreno685},
{"680", AdrenoGpu::kAdreno680},
{"675", AdrenoGpu::kAdreno675},
{"650", AdrenoGpu::kAdreno650},
{"640", AdrenoGpu::kAdreno640},
{"630", AdrenoGpu::kAdreno630},
{"620", AdrenoGpu::kAdreno620},
{"616", AdrenoGpu::kAdreno618},
{"616", AdrenoGpu::kAdreno616},
{"615", AdrenoGpu::kAdreno615},
{"612", AdrenoGpu::kAdreno612},
{"610", AdrenoGpu::kAdreno610},
{"605", AdrenoGpu::kAdreno605},
// Adreno 5xx series
{"540", AdrenoGpu::kAdreno540},
{"530", AdrenoGpu::kAdreno530},
{"512", AdrenoGpu::kAdreno512},
{"510", AdrenoGpu::kAdreno510},
{"509", AdrenoGpu::kAdreno509},
{"508", AdrenoGpu::kAdreno508},
{"506", AdrenoGpu::kAdreno506},
{"505", AdrenoGpu::kAdreno505},
{"504", AdrenoGpu::kAdreno504},
// Adreno 4xx series
{"430", AdrenoGpu::kAdreno430},
{"420", AdrenoGpu::kAdreno420},
{"418", AdrenoGpu::kAdreno418},
{"405", AdrenoGpu::kAdreno405},
// Adreno 3xx series
{"330", AdrenoGpu::kAdreno330},
{"320", AdrenoGpu::kAdreno320},
{"308", AdrenoGpu::kAdreno308},
{"306", AdrenoGpu::kAdreno306},
{"305", AdrenoGpu::kAdreno305},
{"304", AdrenoGpu::kAdreno304},
// Adreno 2xx series
{"225", AdrenoGpu::kAdreno225},
{"220", AdrenoGpu::kAdreno220},
{"205", AdrenoGpu::kAdreno205},
{"203", AdrenoGpu::kAdreno203},
{"200", AdrenoGpu::kAdreno200},
// Adreno 1xx series
{"130", AdrenoGpu::kAdreno130},
{"120", AdrenoGpu::kAdreno120},
};
for (const auto& v : kMapping) {
if (device_name.find(v.first) != std::string::npos) {
return v.second;
}
}
return AdrenoGpu::kUnknown;
}
MaliGpu GetMaliGpuVersion(const std::string& gpu_description) {
const std::map<std::string, MaliGpu> kMapping = {
{"t604", MaliGpu::kT604}, {"t622", MaliGpu::kT622},
{"t624", MaliGpu::kT624}, {"t628", MaliGpu::kT628},
{"t658", MaliGpu::kT658}, {"t678", MaliGpu::kT678},
{"t720", MaliGpu::kT720}, {"t760", MaliGpu::kT760},
{"t820", MaliGpu::kT820}, {"t830", MaliGpu::kT830},
{"t860", MaliGpu::kT860}, {"t880", MaliGpu::kT880},
{"g31", MaliGpu::kG31}, {"g51", MaliGpu::kG51},
{"g71", MaliGpu::kG71}, {"g52", MaliGpu::kG52},
{"g72", MaliGpu::kG72}, {"g76", MaliGpu::kG76},
{"g57", MaliGpu::kG57}, {"g77", MaliGpu::kG77},
{"g68", MaliGpu::kG68}, {"g78", MaliGpu::kG78},
};
for (const auto& v : kMapping) {
if (gpu_description.find(v.first) != std::string::npos) {
return v.second;
}
}
return MaliGpu::kUnknown;
}
} // namespace
std::string GpuVendorToString(GpuVendor v) {
switch (v) {
case GpuVendor::kApple:
return "Apple";
case GpuVendor::kQualcomm:
return "Qualcomm";
case GpuVendor::kMali:
return "Mali";
case GpuVendor::kPowerVR:
return "PowerVR";
case GpuVendor::kNvidia:
return "NVIDIA";
case GpuVendor::kAMD:
return "AMD";
case GpuVendor::kIntel:
return "Intel";
case GpuVendor::kUnknown:
return "unknown vendor";
}
}
std::string OpenClVersionToString(OpenClVersion version) {
switch (version) {
case OpenClVersion::kCl1_0:
return "1.0";
case OpenClVersion::kCl1_1:
return "1.1";
case OpenClVersion::kCl1_2:
return "1.2";
case OpenClVersion::kCl2_0:
return "2.0";
case OpenClVersion::kCl2_1:
return "2.1";
case OpenClVersion::kCl2_2:
return "2.2";
case OpenClVersion::kCl3_0:
return "3.0";
default:
return "Unknown OpenCL version";
}
}
AdrenoInfo::AdrenoInfo(const std::string& device_version)
: adreno_gpu(GetAdrenoGpuVersion(device_version)) {}
bool AdrenoInfo::IsAdreno1xx() const {
return adreno_gpu == AdrenoGpu::kAdreno120 ||
adreno_gpu == AdrenoGpu::kAdreno130;
}
bool AdrenoInfo::IsAdreno2xx() const {
return adreno_gpu == AdrenoGpu::kAdreno200 ||
adreno_gpu == AdrenoGpu::kAdreno203 ||
adreno_gpu == AdrenoGpu::kAdreno205 ||
adreno_gpu == AdrenoGpu::kAdreno220 ||
adreno_gpu == AdrenoGpu::kAdreno225;
}
bool AdrenoInfo::IsAdreno3xx() const {
return adreno_gpu == AdrenoGpu::kAdreno304 ||
adreno_gpu == AdrenoGpu::kAdreno305 ||
adreno_gpu == AdrenoGpu::kAdreno306 ||
adreno_gpu == AdrenoGpu::kAdreno308 ||
adreno_gpu == AdrenoGpu::kAdreno320 ||
adreno_gpu == AdrenoGpu::kAdreno330;
}
bool AdrenoInfo::IsAdreno4xx() const {
return adreno_gpu == AdrenoGpu::kAdreno405 ||
adreno_gpu == AdrenoGpu::kAdreno418 ||
adreno_gpu == AdrenoGpu::kAdreno420 ||
adreno_gpu == AdrenoGpu::kAdreno430;
}
bool AdrenoInfo::IsAdreno5xx() const {
return adreno_gpu == AdrenoGpu::kAdreno504 ||
adreno_gpu == AdrenoGpu::kAdreno505 ||
adreno_gpu == AdrenoGpu::kAdreno506 ||
adreno_gpu == AdrenoGpu::kAdreno508 ||
adreno_gpu == AdrenoGpu::kAdreno509 ||
adreno_gpu == AdrenoGpu::kAdreno510 ||
adreno_gpu == AdrenoGpu::kAdreno512 ||
adreno_gpu == AdrenoGpu::kAdreno530 ||
adreno_gpu == AdrenoGpu::kAdreno540;
}
bool AdrenoInfo::IsAdreno6xx() const {
return adreno_gpu == AdrenoGpu::kAdreno605 ||
adreno_gpu == AdrenoGpu::kAdreno610 ||
adreno_gpu == AdrenoGpu::kAdreno612 ||
adreno_gpu == AdrenoGpu::kAdreno615 ||
adreno_gpu == AdrenoGpu::kAdreno616 ||
adreno_gpu == AdrenoGpu::kAdreno618 ||
adreno_gpu == AdrenoGpu::kAdreno620 ||
adreno_gpu == AdrenoGpu::kAdreno630 ||
adreno_gpu == AdrenoGpu::kAdreno640 ||
adreno_gpu == AdrenoGpu::kAdreno650 ||
adreno_gpu == AdrenoGpu::kAdreno675 ||
adreno_gpu == AdrenoGpu::kAdreno680 ||
adreno_gpu == AdrenoGpu::kAdreno685;
}
bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); }
int AdrenoInfo::GetMaximumWavesCount() const {
if (IsAdreno6xx()) {
if (adreno_gpu == AdrenoGpu::kAdreno640) {
return 30;
} else {
return 16;
}
} else {
// all other versions not supported
return 1;
}
}
int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
if (IsAdreno6xx()) {
if (adreno_gpu == AdrenoGpu::kAdreno640) {
return 128 * 144 * 16;
} else if (adreno_gpu == AdrenoGpu::kAdreno650) {
return 128 * 64 * 16;
} else {
return 128 * 96 * 16;
}
} else {
// all other versions not supported
return 1;
}
}
int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
bool full_wave) const {
const int register_usage_per_wave =
GetWaveSize(full_wave) * register_footprint_per_tread;
const int possible_waves_count =
GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
return std::min(possible_waves_count, GetMaximumWavesCount());
}
int AdrenoInfo::GetWaveSize(bool full_wave) const {
if (IsAdreno6xx()) {
return full_wave ? 128 : 64;
} else if (IsAdreno5xx() || IsAdreno4xx()) {
return full_wave ? 64 : 32;
} else {
// all other versions not supported
return 1;
}
}
MaliInfo::MaliInfo(const std::string& gpu_description)
: gpu_version(GetMaliGpuVersion(gpu_description)) {}
bool MaliInfo::IsMaliT6xx() const {
return gpu_version == MaliGpu::kT604 || gpu_version == MaliGpu::kT622 ||
gpu_version == MaliGpu::kT624 || gpu_version == MaliGpu::kT628 ||
gpu_version == MaliGpu::kT658 || gpu_version == MaliGpu::kT678;
}
bool MaliInfo::IsMaliT7xx() const {
return gpu_version == MaliGpu::kT720 || gpu_version == MaliGpu::kT760;
}
bool MaliInfo::IsMaliT8xx() const {
return gpu_version == MaliGpu::kT820 || gpu_version == MaliGpu::kT830 ||
gpu_version == MaliGpu::kT860 || gpu_version == MaliGpu::kT880;
}
bool MaliInfo::IsMidgard() const {
return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx();
}
bool MaliInfo::IsBifrostGen1() const {
return gpu_version == MaliGpu::kG31 || gpu_version == MaliGpu::kG51 ||
gpu_version == MaliGpu::kG71;
}
bool MaliInfo::IsBifrostGen2() const {
return gpu_version == MaliGpu::kG52 || gpu_version == MaliGpu::kG72;
}
bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGpu::kG76; }
bool MaliInfo::IsBifrost() const {
return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3();
}
bool MaliInfo::IsValhall() const {
return gpu_version == MaliGpu::kG57 || gpu_version == MaliGpu::kG77 ||
gpu_version == MaliGpu::kG68 || gpu_version == MaliGpu::kG78;
}
bool GpuInfo::SupportsFP16() const { return opencl_info.supports_fp16; }
bool GpuInfo::SupportsTextureArray() const {
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
}
bool GpuInfo::SupportsImageBuffer() const {
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
}
bool GpuInfo::SupportsImage3D() const {
if (IsMali() && mali_info.IsMidgard()) {
// On Mali T880 read_imageh doesn't compile with image3d_t
return false;
}
return opencl_info.supports_image3d_writes;
}
bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
if (channels == 1) {
return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d
: opencl_info.supports_r_f16_tex2d;
} else if (channels == 2) {
return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d
: opencl_info.supports_rg_f16_tex2d;
} else if (channels == 3) {
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgb_f32_tex2d
: opencl_info.supports_rgb_f16_tex2d;
} else if (channels == 4) {
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgba_f32_tex2d
: opencl_info.supports_rgba_f16_tex2d;
} else {
return false;
}
}
bool GpuInfo::SupportsExtension(const std::string& extension) const {
for (const auto& ext : opencl_info.extensions) {
if (ext == extension) {
return true;
}
}
return false;
}
bool GpuInfo::IsCL20OrHigher() const {
return opencl_info.cl_version != OpenClVersion::kCl1_0 &&
opencl_info.cl_version != OpenClVersion::kCl1_1 &&
opencl_info.cl_version != OpenClVersion::kCl1_2;
}
bool GpuInfo::IsCL30OrHigher() const {
return IsCL20OrHigher() && opencl_info.cl_version != OpenClVersion::kCl2_0 &&
opencl_info.cl_version != OpenClVersion::kCl2_1 &&
opencl_info.cl_version != OpenClVersion::kCl2_2;
}
bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const {
for (auto subgroup_size : supported_subgroup_sizes) {
if (sub_group_size == subgroup_size) {
return true;
}
}
return false;
}
int GpuInfo::GetComputeUnitsCount() const {
return opencl_info.compute_units_count;
}
bool GpuInfo::IsRoundToNearestSupported() const {
return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn;
}
int GpuInfo::GetMaxWorkGroupSizeForX() const {
return opencl_info.max_work_group_size_x;
}
int GpuInfo::GetMaxWorkGroupSizeForY() const {
return opencl_info.max_work_group_size_y;
}
int GpuInfo::GetMaxWorkGroupSizeForZ() const {
return opencl_info.max_work_group_size_z;
}
uint64_t GpuInfo::GetMaxImage2DWidth() const {
return opencl_info.image2d_max_width;
}
uint64_t GpuInfo::GetMaxImage2DHeight() const {
return opencl_info.image2d_max_height;
}
uint64_t GpuInfo::GetMaxImage3DWidth() const {
return opencl_info.image3d_max_width;
}
uint64_t GpuInfo::GetMaxImage3DHeight() const {
return opencl_info.image3d_max_height;
}
uint64_t GpuInfo::GetMaxImage3DDepth() const {
return opencl_info.image3d_max_depth;
}
uint64_t GpuInfo::GetMaxBufferSize() const {
return opencl_info.buffer_max_size;
}
uint64_t GpuInfo::GetMaxImageBufferWidth() const {
return opencl_info.image_buffer_max_size;
}
uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
return opencl_info.image_array_max_layers;
}
bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; }
bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; }
bool GpuInfo::IsMali() const { return gpu_vendor == GpuVendor::kMali; }
bool GpuInfo::IsPowerVR() const { return gpu_vendor == GpuVendor::kPowerVR; }
bool GpuInfo::IsNvidia() const { return gpu_vendor == GpuVendor::kNvidia; }
bool GpuInfo::IsAMD() const { return gpu_vendor == GpuVendor::kAMD; }
bool GpuInfo::IsIntel() const { return gpu_vendor == GpuVendor::kIntel; }
} // namespace cl
} // namespace gpu
} // namespace tflite

View File

@ -1,275 +0,0 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
// for use only in device_info.cc, but keep here to make tests
int GetAdrenoGPUVersion(const std::string& gpu_version);
namespace tflite {
namespace gpu {
namespace cl {
enum class GpuVendor {
kApple,
kQualcomm,
kMali,
kPowerVR,
kNvidia,
kAMD,
kIntel,
kUnknown
};
std::string GpuVendorToString(GpuVendor v);
enum class AdrenoGpu {
// Adreno 6xx series
kAdreno685,
kAdreno680,
kAdreno675,
kAdreno650,
kAdreno640,
kAdreno630,
kAdreno620,
kAdreno618,
kAdreno616,
kAdreno615,
kAdreno612,
kAdreno610,
kAdreno605,
// Adreno 5xx series
kAdreno540,
kAdreno530,
kAdreno512,
kAdreno510,
kAdreno509,
kAdreno508,
kAdreno506,
kAdreno505,
kAdreno504,
// Adreno 4xx series
kAdreno430,
kAdreno420,
kAdreno418,
kAdreno405,
// Adreno 3xx series
kAdreno330,
kAdreno320,
kAdreno308,
kAdreno306,
kAdreno305,
kAdreno304,
// Adreno 2xx series
kAdreno225,
kAdreno220,
kAdreno205,
kAdreno203,
kAdreno200,
// Adreno 1xx series
kAdreno130,
kAdreno120,
kUnknown
};
struct AdrenoInfo {
AdrenoInfo() = default;
explicit AdrenoInfo(const std::string& device_version);
AdrenoGpu adreno_gpu;
bool IsAdreno1xx() const;
bool IsAdreno2xx() const;
bool IsAdreno3xx() const;
bool IsAdreno4xx() const;
bool IsAdreno5xx() const;
bool IsAdreno6xx() const;
bool IsAdreno6xxOrHigher() const;
// This function returns some not very documented physical parameter of
// Adreno6xx GPU.
// We obtained it using Snapdragon Profiler.
int GetMaximumWavesCount() const;
// returns amount of register memory per CU(Compute Unit) in bytes.
int GetRegisterMemorySizePerComputeUnit() const;
// returns maximum possible amount of waves based on register usage.
int GetMaximumWavesCount(int register_footprint_per_tread,
bool full_wave = true) const;
int GetWaveSize(bool full_wave) const;
// Not supported on some Adreno devices with specific driver version.
// b/131099086
bool support_one_layer_texture_array = true;
};
enum class MaliGpu {
kUnknown,
kT604,
kT622,
kT624,
kT628,
kT658,
kT678,
kT720,
kT760,
kT820,
kT830,
kT860,
kT880,
kG31,
kG51,
kG71,
kG52,
kG72,
kG76,
kG57,
kG77,
kG68,
kG78,
};
struct MaliInfo {
MaliInfo() = default;
explicit MaliInfo(const std::string& gpu_description);
MaliGpu gpu_version;
bool IsMaliT6xx() const;
bool IsMaliT7xx() const;
bool IsMaliT8xx() const;
bool IsMidgard() const;
bool IsBifrostGen1() const;
bool IsBifrostGen2() const;
bool IsBifrostGen3() const;
bool IsBifrost() const;
bool IsValhall() const;
};
enum class OpenClVersion {
kCl1_0,
kCl1_1,
kCl1_2,
kCl2_0,
kCl2_1,
kCl2_2,
kCl3_0,
kUnknown,
};
std::string OpenClVersionToString(OpenClVersion version);
struct OpenClInfo {
OpenClVersion cl_version;
std::vector<std::string> extensions;
bool supports_fp16;
bool supports_image3d_writes;
int compute_units_count;
uint64_t buffer_max_size;
uint64_t image2d_max_width;
uint64_t image2d_max_height;
uint64_t image_buffer_max_size;
uint64_t image_array_max_layers;
uint64_t image3d_max_width;
uint64_t image3d_max_height;
uint64_t image3d_max_depth;
int max_work_group_size_x;
int max_work_group_size_y;
int max_work_group_size_z;
// rtn is ROUND_TO_NEAREST
// with rtn precision is much better then with rtz (ROUND_TO_ZERO)
// Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
// Mali from T6xx supports rtn
// PowerVR supports only rtz
bool supports_fp32_rtn;
bool supports_fp16_rtn;
bool supports_r_f16_tex2d = false;
bool supports_rg_f16_tex2d = false;
bool supports_rgb_f16_tex2d = false;
bool supports_rgba_f16_tex2d = false;
bool supports_r_f32_tex2d = false;
bool supports_rg_f32_tex2d = false;
bool supports_rgb_f32_tex2d = false;
bool supports_rgba_f32_tex2d = false;
};
struct GpuInfo {
GpuInfo() = default;
bool IsAdreno() const;
bool IsApple() const;
bool IsMali() const;
bool IsPowerVR() const;
bool IsNvidia() const;
bool IsAMD() const;
bool IsIntel() const;
bool SupportsFP16() const;
bool SupportsTextureArray() const;
bool SupportsImageBuffer() const;
bool SupportsImage3D() const;
bool SupportsFloatImage2D(DataType data_type, int channels) const;
bool SupportsExtension(const std::string& extension) const;
bool IsCL20OrHigher() const;
bool IsCL30OrHigher() const;
bool SupportsSubGroupWithSize(int sub_group_size) const;
int GetComputeUnitsCount() const;
// floating point rounding mode
bool IsRoundToNearestSupported() const;
int GetMaxWorkGroupSizeForX() const;
int GetMaxWorkGroupSizeForY() const;
int GetMaxWorkGroupSizeForZ() const;
uint64_t GetMaxImage2DWidth() const;
uint64_t GetMaxImage2DHeight() const;
uint64_t GetMaxImage3DWidth() const;
uint64_t GetMaxImage3DHeight() const;
uint64_t GetMaxImage3DDepth() const;
uint64_t GetMaxBufferSize() const;
uint64_t GetMaxImageBufferWidth() const;
uint64_t GetMaxImage2DArrayLayers() const;
std::vector<int> supported_subgroup_sizes;
GpuVendor gpu_vendor;
AdrenoInfo adreno_info;
MaliInfo mali_info;
OpenClInfo opencl_info;
};
} // namespace cl
} // namespace gpu
} // namespace tflite
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_

View File

@ -19,9 +19,9 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"

View File

@ -578,7 +578,6 @@ cc_library(
":util",
"//tensorflow/lite/delegates/gpu/cl:buffer",
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/cl:linear_storage",
"//tensorflow/lite/delegates/gpu/cl:tensor",
"//tensorflow/lite/delegates/gpu/cl:texture2d",
@ -620,10 +619,10 @@ cc_library(
deps = [
":util",
":work_group_picking",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
"//tensorflow/lite/delegates/gpu/common:access_type",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:kernel_info",
"//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:status",
@ -728,7 +727,6 @@ cc_library(
":util",
":work_group_picking",
"//tensorflow/lite/delegates/gpu/cl:cl_program",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/common:operations",
"//tensorflow/lite/delegates/gpu/common:status",
"//tensorflow/lite/delegates/gpu/common:types",
@ -1206,8 +1204,8 @@ cc_library(
srcs = ["util.cc"],
hdrs = ["util.h"],
deps = [
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/common:data_type",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:precision",
"//tensorflow/lite/delegates/gpu/common:shape",
"//tensorflow/lite/delegates/gpu/common:tensor",
@ -1264,7 +1262,7 @@ cc_library(
hdrs = ["work_group_picking.h"],
deps = [
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/common:gpu_info",
"//tensorflow/lite/delegates/gpu/common:kernel_info",
"//tensorflow/lite/delegates/gpu/common:types",
"//tensorflow/lite/delegates/gpu/common:util",

View File

@ -20,7 +20,6 @@ limitations under the License.
#include <vector>
#include "absl/memory/memory.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
#include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"

View File

@ -25,7 +25,6 @@ limitations under the License.
#include "absl/memory/memory.h"
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"

View File

@ -19,9 +19,9 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"

View File

@ -18,7 +18,6 @@ limitations under the License.
#include <string>
#include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"

View File

@ -16,7 +16,6 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/common/operations.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"

View File

@ -31,7 +31,6 @@ cc_library(
deps = [
"//tensorflow/lite/delegates/gpu/cl:buffer",
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/cl:linear_storage",
"//tensorflow/lite/delegates/gpu/cl:tensor",
"//tensorflow/lite/delegates/gpu/cl:texture2d",

View File

@ -20,7 +20,6 @@ limitations under the License.
#include <vector>
#include "absl/memory/memory.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
#include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"

View File

@ -25,7 +25,6 @@ limitations under the License.
#include "absl/memory/memory.h"
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"

View File

@ -20,8 +20,8 @@ limitations under the License.
#include <vector>
#include "absl/types/span.h"
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/precision.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/tensor.h"

View File

@ -18,7 +18,7 @@ limitations under the License.
#include <vector>
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
#include "tensorflow/lite/delegates/gpu/common/types.h"

View File

@ -49,7 +49,6 @@ cc_library(
hdrs = ["default_selector.h"],
deps = [
":subgraph",
"//tensorflow/lite/delegates/gpu/cl:device_info",
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
"//tensorflow/lite/delegates/gpu/cl/selectors/default:default_selector",
"//tensorflow/lite/delegates/gpu/common:model",

View File

@ -18,7 +18,6 @@ limitations under the License.
#include <memory>
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
#include "tensorflow/lite/delegates/gpu/cl/selectors/subgraph.h"
#include "tensorflow/lite/delegates/gpu/common/model.h"

View File

@ -16,8 +16,8 @@ limitations under the License.
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include "tensorflow/lite/delegates/gpu/common/shape.h"
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"

View File

@ -40,6 +40,7 @@ cc_library(
srcs = ["gpu_info.cc"],
hdrs = ["gpu_info.h"],
deps = [
":data_type",
"@com_google_absl//absl/strings",
],
)

View File

@ -358,6 +358,27 @@ void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
}
}
std::string OpenClVersionToString(OpenClVersion version) {
switch (version) {
case OpenClVersion::kCl1_0:
return "1.0";
case OpenClVersion::kCl1_1:
return "1.1";
case OpenClVersion::kCl1_2:
return "1.2";
case OpenClVersion::kCl2_0:
return "2.0";
case OpenClVersion::kCl2_1:
return "2.1";
case OpenClVersion::kCl2_2:
return "2.2";
case OpenClVersion::kCl3_0:
return "3.0";
default:
return "Unknown OpenCL version";
}
}
bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; }
bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; }
@ -373,11 +394,45 @@ bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; }
bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; }
bool GpuInfo::IsRoundToNearestSupported() const {
if (IsApiOpenCl()) {
return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn;
}
if (IsApple()) {
return apple_info.IsRoundToNearestSupported();
} else {
return true;
}
return true;
}
bool GpuInfo::SupportsFP16() const {
if (IsApiOpenCl()) {
return opencl_info.supports_fp16;
}
return true;
}
bool GpuInfo::SupportsTextureArray() const {
if (IsApiOpenCl()) {
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
}
return true;
}
bool GpuInfo::SupportsImageBuffer() const {
if (IsApiOpenCl()) {
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
}
return true;
}
bool GpuInfo::SupportsImage3D() const {
if (IsApiOpenCl()) {
if (IsMali() && mali_info.IsMidgard()) {
// On Mali T880 read_imageh doesn't compile with image3d_t
return false;
}
return opencl_info.supports_image3d_writes;
}
return true;
}
bool GpuInfo::IsWaveSizeEqualTo32() const {
@ -385,12 +440,66 @@ bool GpuInfo::IsWaveSizeEqualTo32() const {
supported_subgroup_sizes[0] == 32;
}
bool GpuInfo::SupportsExtension(const std::string& extension) const {
const std::vector<std::string>* extensions = nullptr;
if (IsApiOpenGl()) {
extensions = &opengl_info.extensions;
} else if (IsApiVulkan()) {
extensions = &vulkan_info.extensions;
} else if (IsApiOpenCl()) {
extensions = &opencl_info.extensions;
}
if (!extensions) {
return false;
}
for (const auto& ext : *extensions) {
if (ext == extension) {
return true;
}
}
return false;
}
bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const {
for (auto subgroup_size : supported_subgroup_sizes) {
if (sub_group_size == subgroup_size) {
return true;
}
}
return false;
}
bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
if (IsApiOpenCl()) {
if (channels == 1) {
return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d
: opencl_info.supports_r_f16_tex2d;
} else if (channels == 2) {
return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d
: opencl_info.supports_rg_f16_tex2d;
} else if (channels == 3) {
return data_type == DataType::FLOAT32
? opencl_info.supports_rgb_f32_tex2d
: opencl_info.supports_rgb_f16_tex2d;
} else if (channels == 4) {
return data_type == DataType::FLOAT32
? opencl_info.supports_rgba_f32_tex2d
: opencl_info.supports_rgba_f16_tex2d;
} else {
return false;
}
}
return false;
}
int GpuInfo::GetComputeUnitsCount() const {
if (IsApiOpenCl()) {
return opencl_info.compute_units_count;
}
if (IsApple()) {
return apple_info.GetComputeUnitsCount();
} else {
return 1;
}
return 1;
}
int GpuInfo::GetMaxWorkGroupSizeForX() const {
@ -400,6 +509,9 @@ int GpuInfo::GetMaxWorkGroupSizeForX() const {
if (IsApiVulkan()) {
return vulkan_info.max_compute_work_group_size_x;
}
if (IsApiOpenCl()) {
return opencl_info.max_work_group_size_x;
}
return 256;
}
@ -410,6 +522,9 @@ int GpuInfo::GetMaxWorkGroupSizeForY() const {
if (IsApiVulkan()) {
return vulkan_info.max_compute_work_group_size_y;
}
if (IsApiOpenCl()) {
return opencl_info.max_work_group_size_y;
}
return 256;
}
@ -420,6 +535,9 @@ int GpuInfo::GetMaxWorkGroupSizeForZ() const {
if (IsApiVulkan()) {
return vulkan_info.max_compute_work_group_size_z;
}
if (IsApiOpenCl()) {
return opencl_info.max_work_group_size_z;
}
return 64;
}
@ -430,6 +548,9 @@ int GpuInfo::GetMaxWorkGroupTotalSize() const {
if (IsApiVulkan()) {
return vulkan_info.max_compute_work_group_invocations;
}
if (IsApiOpenCl()) {
return opencl_info.max_work_group_total_size;
}
return 256;
}
@ -440,6 +561,9 @@ uint64_t GpuInfo::GetMaxImage2DWidth() const {
if (IsApiVulkan()) {
return vulkan_info.max_image_dimension_2d;
}
if (IsApiOpenCl()) {
return opencl_info.image2d_max_width;
}
return 2048;
}
@ -450,6 +574,9 @@ uint64_t GpuInfo::GetMaxImage2DHeight() const {
if (IsApiVulkan()) {
return vulkan_info.max_image_dimension_2d;
}
if (IsApiOpenCl()) {
return opencl_info.image2d_max_height;
}
return 2048;
}
@ -460,9 +587,47 @@ uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
if (IsApiVulkan()) {
return vulkan_info.max_image_array_layers;
}
if (IsApiOpenCl()) {
return opencl_info.image_array_max_layers;
}
return 256;
}
uint64_t GpuInfo::GetMaxImage3DWidth() const {
if (IsApiOpenCl()) {
return opencl_info.image3d_max_width;
}
return 256;
}
uint64_t GpuInfo::GetMaxImage3DHeight() const {
if (IsApiOpenCl()) {
return opencl_info.image3d_max_height;
}
return 256;
}
uint64_t GpuInfo::GetMaxImage3DDepth() const {
if (IsApiOpenCl()) {
return opencl_info.image3d_max_depth;
}
return 256;
}
uint64_t GpuInfo::GetMaxBufferSize() const {
if (IsApiOpenCl()) {
return opencl_info.buffer_max_size;
}
return 128 * 1024 * 1024;
}
uint64_t GpuInfo::GetMaxImageBufferWidth() const {
if (IsApiOpenCl()) {
return opencl_info.image_buffer_max_size;
}
return 64 * 1024;
}
int GpuInfo::GetMaxImageArguments() const {
if (IsApiOpenGl()) {
return opengl_info.max_image_units;
@ -481,12 +646,6 @@ int GpuInfo::GetMaxImageArguments() const {
bool GpuInfo::IsApiOpenGl() const { return gpu_api == GpuApi::kOpenGl; }
bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; }
bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; }
bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; }
bool GpuInfo::IsApiOpenGl31OrAbove() const {
if (!IsApiOpenGl()) {
return false;
@ -495,5 +654,29 @@ bool GpuInfo::IsApiOpenGl31OrAbove() const {
opengl_info.major_version > 3;
}
bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; }
bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; }
bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; }
bool GpuInfo::IsCL20OrHigher() const {
if (!IsApiOpenCl()) {
return false;
}
return opencl_info.cl_version != OpenClVersion::kCl1_0 &&
opencl_info.cl_version != OpenClVersion::kCl1_1 &&
opencl_info.cl_version != OpenClVersion::kCl1_2;
}
bool GpuInfo::IsCL30OrHigher() const {
if (!IsApiOpenCl()) {
return false;
}
return IsCL20OrHigher() && opencl_info.cl_version != OpenClVersion::kCl2_0 &&
opencl_info.cl_version != OpenClVersion::kCl2_1 &&
opencl_info.cl_version != OpenClVersion::kCl2_2;
}
} // namespace gpu
} // namespace tflite

View File

@ -19,6 +19,8 @@ limitations under the License.
#include <string>
#include <vector>
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
namespace tflite {
namespace gpu {
@ -236,6 +238,57 @@ struct VulkanInfo {
int max_compute_work_group_size_z;
};
enum class OpenClVersion {
kCl1_0,
kCl1_1,
kCl1_2,
kCl2_0,
kCl2_1,
kCl2_2,
kCl3_0,
kUnknown,
};
std::string OpenClVersionToString(OpenClVersion version);
struct OpenClInfo {
OpenClVersion cl_version;
std::vector<std::string> extensions;
bool supports_fp16;
bool supports_image3d_writes;
int compute_units_count;
uint64_t buffer_max_size;
uint64_t image2d_max_width;
uint64_t image2d_max_height;
uint64_t image_buffer_max_size;
uint64_t image_array_max_layers;
uint64_t image3d_max_width;
uint64_t image3d_max_height;
uint64_t image3d_max_depth;
int max_work_group_size_x;
int max_work_group_size_y;
int max_work_group_size_z;
int max_work_group_total_size;
// rtn is ROUND_TO_NEAREST
// with rtn precision is much better then with rtz (ROUND_TO_ZERO)
// Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
// Mali from T6xx supports rtn
// PowerVR supports only rtz
bool supports_fp32_rtn;
bool supports_fp16_rtn;
bool supports_r_f16_tex2d = false;
bool supports_rg_f16_tex2d = false;
bool supports_rgb_f16_tex2d = false;
bool supports_rgba_f16_tex2d = false;
bool supports_r_f32_tex2d = false;
bool supports_rg_f32_tex2d = false;
bool supports_rgb_f32_tex2d = false;
bool supports_rgba_f32_tex2d = false;
};
struct GpuInfo {
bool IsAdreno() const;
bool IsApple() const;
@ -248,8 +301,18 @@ struct GpuInfo {
// floating point rounding mode
bool IsRoundToNearestSupported() const;
bool SupportsFP16() const;
bool SupportsTextureArray() const;
bool SupportsImageBuffer() const;
bool SupportsImage3D() const;
// returns true if device have fixed wave size equal to 32
bool IsWaveSizeEqualTo32() const;
bool SupportsSubGroupWithSize(int sub_group_size) const;
bool SupportsFloatImage2D(DataType data_type, int channels) const;
bool SupportsExtension(const std::string& extension) const;
int GetComputeUnitsCount() const;
@ -263,6 +326,11 @@ struct GpuInfo {
uint64_t GetMaxImage2DWidth() const;
uint64_t GetMaxImage2DHeight() const;
uint64_t GetMaxImage2DArrayLayers() const;
uint64_t GetMaxImage3DWidth() const;
uint64_t GetMaxImage3DHeight() const;
uint64_t GetMaxImage3DDepth() const;
uint64_t GetMaxBufferSize() const;
uint64_t GetMaxImageBufferWidth() const;
GpuVendor vendor = GpuVendor::kUnknown;
GpuApi gpu_api = GpuApi::kUnknown;
@ -287,7 +355,10 @@ struct GpuInfo {
bool IsApiMetal() const;
OpenClInfo opencl_info;
bool IsApiOpenCl() const;
bool IsCL20OrHigher() const;
bool IsCL30OrHigher() const;
};
inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {