Merged gpu/cl/device_info into gpu/common/gpu_info.
PiperOrigin-RevId: 343573157 Change-Id: I3173ac6fedd723c8a625a4a7b249e2b58c9d9f34
This commit is contained in:
parent
a5dbf466e2
commit
964d71a2c2
@ -99,13 +99,13 @@ cc_library(
|
||||
deps = [
|
||||
":buffer",
|
||||
":cl_context",
|
||||
":device_info",
|
||||
":gpu_object",
|
||||
":linear_storage",
|
||||
":tensor",
|
||||
":texture2d",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
@ -126,8 +126,8 @@ cc_test(
|
||||
deps = [
|
||||
":buffer",
|
||||
":cl_arguments",
|
||||
":device_info",
|
||||
":gpu_object",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
@ -171,9 +171,9 @@ cc_library(
|
||||
srcs = ["cl_device.cc"],
|
||||
hdrs = ["cl_device.h"],
|
||||
deps = [
|
||||
":device_info",
|
||||
":opencl_wrapper",
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"@com_google_absl//absl/strings",
|
||||
@ -276,16 +276,6 @@ flatbuffer_cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "device_info",
|
||||
srcs = ["device_info.cc"],
|
||||
hdrs = ["device_info.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "egl_sync",
|
||||
srcs = ["egl_sync.cc"],
|
||||
@ -307,11 +297,11 @@ cc_library(
|
||||
":cl_command_queue",
|
||||
":cl_context",
|
||||
":cl_device",
|
||||
":device_info",
|
||||
":program_cache",
|
||||
":tensor",
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
@ -509,8 +499,8 @@ cc_library(
|
||||
srcs = ["storage_type_util.cc"],
|
||||
hdrs = ["storage_type_util.h"],
|
||||
deps = [
|
||||
":device_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
"//tensorflow/lite/delegates/gpu/common/task:tensor_desc",
|
||||
|
||||
@ -21,8 +21,8 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/arguments.h"
|
||||
|
||||
|
||||
@ -21,8 +21,8 @@ limitations under the License.
|
||||
#include <gtest/gtest.h>
|
||||
#include "absl/strings/match.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
@ -162,7 +162,8 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
|
||||
const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
|
||||
const auto opencl_c_version =
|
||||
GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
|
||||
info.gpu_vendor = ParseVendor(device_name, vendor_name);
|
||||
info.gpu_api = GpuApi::kOpenCl;
|
||||
info.vendor = ParseVendor(device_name, vendor_name);
|
||||
if (info.IsAdreno()) {
|
||||
info.adreno_info = AdrenoInfo(opencl_c_version);
|
||||
} else if (info.IsMali()) {
|
||||
@ -243,6 +244,8 @@ GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
|
||||
info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
|
||||
info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
|
||||
info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
|
||||
info.opencl_info.max_work_group_total_size =
|
||||
GetDeviceInfo<size_t>(id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
|
||||
|
||||
if (info.IsIntel()) {
|
||||
if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
|
||||
|
||||
@ -19,9 +19,9 @@ limitations under the License.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
|
||||
|
||||
@ -1,438 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) {
|
||||
const std::map<std::string, AdrenoGpu> kMapping = {
|
||||
// Adreno 6xx series
|
||||
{"685", AdrenoGpu::kAdreno685},
|
||||
{"680", AdrenoGpu::kAdreno680},
|
||||
{"675", AdrenoGpu::kAdreno675},
|
||||
{"650", AdrenoGpu::kAdreno650},
|
||||
{"640", AdrenoGpu::kAdreno640},
|
||||
{"630", AdrenoGpu::kAdreno630},
|
||||
{"620", AdrenoGpu::kAdreno620},
|
||||
{"616", AdrenoGpu::kAdreno618},
|
||||
{"616", AdrenoGpu::kAdreno616},
|
||||
{"615", AdrenoGpu::kAdreno615},
|
||||
{"612", AdrenoGpu::kAdreno612},
|
||||
{"610", AdrenoGpu::kAdreno610},
|
||||
{"605", AdrenoGpu::kAdreno605},
|
||||
// Adreno 5xx series
|
||||
{"540", AdrenoGpu::kAdreno540},
|
||||
{"530", AdrenoGpu::kAdreno530},
|
||||
{"512", AdrenoGpu::kAdreno512},
|
||||
{"510", AdrenoGpu::kAdreno510},
|
||||
{"509", AdrenoGpu::kAdreno509},
|
||||
{"508", AdrenoGpu::kAdreno508},
|
||||
{"506", AdrenoGpu::kAdreno506},
|
||||
{"505", AdrenoGpu::kAdreno505},
|
||||
{"504", AdrenoGpu::kAdreno504},
|
||||
// Adreno 4xx series
|
||||
{"430", AdrenoGpu::kAdreno430},
|
||||
{"420", AdrenoGpu::kAdreno420},
|
||||
{"418", AdrenoGpu::kAdreno418},
|
||||
{"405", AdrenoGpu::kAdreno405},
|
||||
// Adreno 3xx series
|
||||
{"330", AdrenoGpu::kAdreno330},
|
||||
{"320", AdrenoGpu::kAdreno320},
|
||||
{"308", AdrenoGpu::kAdreno308},
|
||||
{"306", AdrenoGpu::kAdreno306},
|
||||
{"305", AdrenoGpu::kAdreno305},
|
||||
{"304", AdrenoGpu::kAdreno304},
|
||||
// Adreno 2xx series
|
||||
{"225", AdrenoGpu::kAdreno225},
|
||||
{"220", AdrenoGpu::kAdreno220},
|
||||
{"205", AdrenoGpu::kAdreno205},
|
||||
{"203", AdrenoGpu::kAdreno203},
|
||||
{"200", AdrenoGpu::kAdreno200},
|
||||
// Adreno 1xx series
|
||||
{"130", AdrenoGpu::kAdreno130},
|
||||
{"120", AdrenoGpu::kAdreno120},
|
||||
};
|
||||
|
||||
for (const auto& v : kMapping) {
|
||||
if (device_name.find(v.first) != std::string::npos) {
|
||||
return v.second;
|
||||
}
|
||||
}
|
||||
return AdrenoGpu::kUnknown;
|
||||
}
|
||||
|
||||
MaliGpu GetMaliGpuVersion(const std::string& gpu_description) {
|
||||
const std::map<std::string, MaliGpu> kMapping = {
|
||||
{"t604", MaliGpu::kT604}, {"t622", MaliGpu::kT622},
|
||||
{"t624", MaliGpu::kT624}, {"t628", MaliGpu::kT628},
|
||||
{"t658", MaliGpu::kT658}, {"t678", MaliGpu::kT678},
|
||||
{"t720", MaliGpu::kT720}, {"t760", MaliGpu::kT760},
|
||||
{"t820", MaliGpu::kT820}, {"t830", MaliGpu::kT830},
|
||||
{"t860", MaliGpu::kT860}, {"t880", MaliGpu::kT880},
|
||||
{"g31", MaliGpu::kG31}, {"g51", MaliGpu::kG51},
|
||||
{"g71", MaliGpu::kG71}, {"g52", MaliGpu::kG52},
|
||||
{"g72", MaliGpu::kG72}, {"g76", MaliGpu::kG76},
|
||||
{"g57", MaliGpu::kG57}, {"g77", MaliGpu::kG77},
|
||||
{"g68", MaliGpu::kG68}, {"g78", MaliGpu::kG78},
|
||||
};
|
||||
for (const auto& v : kMapping) {
|
||||
if (gpu_description.find(v.first) != std::string::npos) {
|
||||
return v.second;
|
||||
}
|
||||
}
|
||||
return MaliGpu::kUnknown;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string GpuVendorToString(GpuVendor v) {
|
||||
switch (v) {
|
||||
case GpuVendor::kApple:
|
||||
return "Apple";
|
||||
case GpuVendor::kQualcomm:
|
||||
return "Qualcomm";
|
||||
case GpuVendor::kMali:
|
||||
return "Mali";
|
||||
case GpuVendor::kPowerVR:
|
||||
return "PowerVR";
|
||||
case GpuVendor::kNvidia:
|
||||
return "NVIDIA";
|
||||
case GpuVendor::kAMD:
|
||||
return "AMD";
|
||||
case GpuVendor::kIntel:
|
||||
return "Intel";
|
||||
case GpuVendor::kUnknown:
|
||||
return "unknown vendor";
|
||||
}
|
||||
}
|
||||
|
||||
std::string OpenClVersionToString(OpenClVersion version) {
|
||||
switch (version) {
|
||||
case OpenClVersion::kCl1_0:
|
||||
return "1.0";
|
||||
case OpenClVersion::kCl1_1:
|
||||
return "1.1";
|
||||
case OpenClVersion::kCl1_2:
|
||||
return "1.2";
|
||||
case OpenClVersion::kCl2_0:
|
||||
return "2.0";
|
||||
case OpenClVersion::kCl2_1:
|
||||
return "2.1";
|
||||
case OpenClVersion::kCl2_2:
|
||||
return "2.2";
|
||||
case OpenClVersion::kCl3_0:
|
||||
return "3.0";
|
||||
default:
|
||||
return "Unknown OpenCL version";
|
||||
}
|
||||
}
|
||||
|
||||
AdrenoInfo::AdrenoInfo(const std::string& device_version)
|
||||
: adreno_gpu(GetAdrenoGpuVersion(device_version)) {}
|
||||
|
||||
bool AdrenoInfo::IsAdreno1xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno120 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno130;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno2xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno200 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno203 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno205 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno220 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno225;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno3xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno304 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno305 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno306 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno308 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno320 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno330;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno4xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno405 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno418 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno420 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno430;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno5xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno504 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno505 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno506 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno508 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno509 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno510 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno512 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno530 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno540;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno6xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno605 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno610 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno612 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno615 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno616 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno618 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno620 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno630 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno640 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno650 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno675 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno680 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno685;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); }
|
||||
|
||||
int AdrenoInfo::GetMaximumWavesCount() const {
|
||||
if (IsAdreno6xx()) {
|
||||
if (adreno_gpu == AdrenoGpu::kAdreno640) {
|
||||
return 30;
|
||||
} else {
|
||||
return 16;
|
||||
}
|
||||
} else {
|
||||
// all other versions not supported
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
|
||||
if (IsAdreno6xx()) {
|
||||
if (adreno_gpu == AdrenoGpu::kAdreno640) {
|
||||
return 128 * 144 * 16;
|
||||
} else if (adreno_gpu == AdrenoGpu::kAdreno650) {
|
||||
return 128 * 64 * 16;
|
||||
} else {
|
||||
return 128 * 96 * 16;
|
||||
}
|
||||
} else {
|
||||
// all other versions not supported
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
|
||||
bool full_wave) const {
|
||||
const int register_usage_per_wave =
|
||||
GetWaveSize(full_wave) * register_footprint_per_tread;
|
||||
const int possible_waves_count =
|
||||
GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
|
||||
return std::min(possible_waves_count, GetMaximumWavesCount());
|
||||
}
|
||||
|
||||
int AdrenoInfo::GetWaveSize(bool full_wave) const {
|
||||
if (IsAdreno6xx()) {
|
||||
return full_wave ? 128 : 64;
|
||||
} else if (IsAdreno5xx() || IsAdreno4xx()) {
|
||||
return full_wave ? 64 : 32;
|
||||
} else {
|
||||
// all other versions not supported
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
MaliInfo::MaliInfo(const std::string& gpu_description)
|
||||
: gpu_version(GetMaliGpuVersion(gpu_description)) {}
|
||||
|
||||
bool MaliInfo::IsMaliT6xx() const {
|
||||
return gpu_version == MaliGpu::kT604 || gpu_version == MaliGpu::kT622 ||
|
||||
gpu_version == MaliGpu::kT624 || gpu_version == MaliGpu::kT628 ||
|
||||
gpu_version == MaliGpu::kT658 || gpu_version == MaliGpu::kT678;
|
||||
}
|
||||
|
||||
bool MaliInfo::IsMaliT7xx() const {
|
||||
return gpu_version == MaliGpu::kT720 || gpu_version == MaliGpu::kT760;
|
||||
}
|
||||
|
||||
bool MaliInfo::IsMaliT8xx() const {
|
||||
return gpu_version == MaliGpu::kT820 || gpu_version == MaliGpu::kT830 ||
|
||||
gpu_version == MaliGpu::kT860 || gpu_version == MaliGpu::kT880;
|
||||
}
|
||||
|
||||
bool MaliInfo::IsMidgard() const {
|
||||
return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx();
|
||||
}
|
||||
|
||||
bool MaliInfo::IsBifrostGen1() const {
|
||||
return gpu_version == MaliGpu::kG31 || gpu_version == MaliGpu::kG51 ||
|
||||
gpu_version == MaliGpu::kG71;
|
||||
}
|
||||
|
||||
bool MaliInfo::IsBifrostGen2() const {
|
||||
return gpu_version == MaliGpu::kG52 || gpu_version == MaliGpu::kG72;
|
||||
}
|
||||
|
||||
bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGpu::kG76; }
|
||||
|
||||
bool MaliInfo::IsBifrost() const {
|
||||
return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3();
|
||||
}
|
||||
|
||||
bool MaliInfo::IsValhall() const {
|
||||
return gpu_version == MaliGpu::kG57 || gpu_version == MaliGpu::kG77 ||
|
||||
gpu_version == MaliGpu::kG68 || gpu_version == MaliGpu::kG78;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsFP16() const { return opencl_info.supports_fp16; }
|
||||
|
||||
bool GpuInfo::SupportsTextureArray() const {
|
||||
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsImageBuffer() const {
|
||||
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsImage3D() const {
|
||||
if (IsMali() && mali_info.IsMidgard()) {
|
||||
// On Mali T880 read_imageh doesn't compile with image3d_t
|
||||
return false;
|
||||
}
|
||||
return opencl_info.supports_image3d_writes;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
|
||||
if (channels == 1) {
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d
|
||||
: opencl_info.supports_r_f16_tex2d;
|
||||
} else if (channels == 2) {
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d
|
||||
: opencl_info.supports_rg_f16_tex2d;
|
||||
} else if (channels == 3) {
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgb_f32_tex2d
|
||||
: opencl_info.supports_rgb_f16_tex2d;
|
||||
} else if (channels == 4) {
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rgba_f32_tex2d
|
||||
: opencl_info.supports_rgba_f16_tex2d;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsExtension(const std::string& extension) const {
|
||||
for (const auto& ext : opencl_info.extensions) {
|
||||
if (ext == extension) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsCL20OrHigher() const {
|
||||
return opencl_info.cl_version != OpenClVersion::kCl1_0 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl1_1 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl1_2;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsCL30OrHigher() const {
|
||||
return IsCL20OrHigher() && opencl_info.cl_version != OpenClVersion::kCl2_0 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl2_1 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl2_2;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const {
|
||||
for (auto subgroup_size : supported_subgroup_sizes) {
|
||||
if (sub_group_size == subgroup_size) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int GpuInfo::GetComputeUnitsCount() const {
|
||||
return opencl_info.compute_units_count;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsRoundToNearestSupported() const {
|
||||
return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForX() const {
|
||||
return opencl_info.max_work_group_size_x;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForY() const {
|
||||
return opencl_info.max_work_group_size_y;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForZ() const {
|
||||
return opencl_info.max_work_group_size_z;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DWidth() const {
|
||||
return opencl_info.image2d_max_width;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DHeight() const {
|
||||
return opencl_info.image2d_max_height;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DWidth() const {
|
||||
return opencl_info.image3d_max_width;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DHeight() const {
|
||||
return opencl_info.image3d_max_height;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DDepth() const {
|
||||
return opencl_info.image3d_max_depth;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxBufferSize() const {
|
||||
return opencl_info.buffer_max_size;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImageBufferWidth() const {
|
||||
return opencl_info.image_buffer_max_size;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
|
||||
return opencl_info.image_array_max_layers;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsAdreno() const { return gpu_vendor == GpuVendor::kQualcomm; }
|
||||
|
||||
bool GpuInfo::IsApple() const { return gpu_vendor == GpuVendor::kApple; }
|
||||
|
||||
bool GpuInfo::IsMali() const { return gpu_vendor == GpuVendor::kMali; }
|
||||
|
||||
bool GpuInfo::IsPowerVR() const { return gpu_vendor == GpuVendor::kPowerVR; }
|
||||
|
||||
bool GpuInfo::IsNvidia() const { return gpu_vendor == GpuVendor::kNvidia; }
|
||||
|
||||
bool GpuInfo::IsAMD() const { return gpu_vendor == GpuVendor::kAMD; }
|
||||
|
||||
bool GpuInfo::IsIntel() const { return gpu_vendor == GpuVendor::kIntel; }
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
@ -1,275 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
|
||||
// for use only in device_info.cc, but keep here to make tests
|
||||
int GetAdrenoGPUVersion(const std::string& gpu_version);
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
namespace cl {
|
||||
|
||||
enum class GpuVendor {
|
||||
kApple,
|
||||
kQualcomm,
|
||||
kMali,
|
||||
kPowerVR,
|
||||
kNvidia,
|
||||
kAMD,
|
||||
kIntel,
|
||||
kUnknown
|
||||
};
|
||||
|
||||
std::string GpuVendorToString(GpuVendor v);
|
||||
|
||||
enum class AdrenoGpu {
|
||||
// Adreno 6xx series
|
||||
kAdreno685,
|
||||
kAdreno680,
|
||||
kAdreno675,
|
||||
kAdreno650,
|
||||
kAdreno640,
|
||||
kAdreno630,
|
||||
kAdreno620,
|
||||
kAdreno618,
|
||||
kAdreno616,
|
||||
kAdreno615,
|
||||
kAdreno612,
|
||||
kAdreno610,
|
||||
kAdreno605,
|
||||
// Adreno 5xx series
|
||||
kAdreno540,
|
||||
kAdreno530,
|
||||
kAdreno512,
|
||||
kAdreno510,
|
||||
kAdreno509,
|
||||
kAdreno508,
|
||||
kAdreno506,
|
||||
kAdreno505,
|
||||
kAdreno504,
|
||||
// Adreno 4xx series
|
||||
kAdreno430,
|
||||
kAdreno420,
|
||||
kAdreno418,
|
||||
kAdreno405,
|
||||
// Adreno 3xx series
|
||||
kAdreno330,
|
||||
kAdreno320,
|
||||
kAdreno308,
|
||||
kAdreno306,
|
||||
kAdreno305,
|
||||
kAdreno304,
|
||||
// Adreno 2xx series
|
||||
kAdreno225,
|
||||
kAdreno220,
|
||||
kAdreno205,
|
||||
kAdreno203,
|
||||
kAdreno200,
|
||||
// Adreno 1xx series
|
||||
kAdreno130,
|
||||
kAdreno120,
|
||||
kUnknown
|
||||
};
|
||||
|
||||
struct AdrenoInfo {
|
||||
AdrenoInfo() = default;
|
||||
explicit AdrenoInfo(const std::string& device_version);
|
||||
|
||||
AdrenoGpu adreno_gpu;
|
||||
|
||||
bool IsAdreno1xx() const;
|
||||
bool IsAdreno2xx() const;
|
||||
bool IsAdreno3xx() const;
|
||||
bool IsAdreno4xx() const;
|
||||
bool IsAdreno5xx() const;
|
||||
bool IsAdreno6xx() const;
|
||||
bool IsAdreno6xxOrHigher() const;
|
||||
|
||||
// This function returns some not very documented physical parameter of
|
||||
// Adreno6xx GPU.
|
||||
// We obtained it using Snapdragon Profiler.
|
||||
int GetMaximumWavesCount() const;
|
||||
|
||||
// returns amount of register memory per CU(Compute Unit) in bytes.
|
||||
int GetRegisterMemorySizePerComputeUnit() const;
|
||||
|
||||
// returns maximum possible amount of waves based on register usage.
|
||||
int GetMaximumWavesCount(int register_footprint_per_tread,
|
||||
bool full_wave = true) const;
|
||||
|
||||
int GetWaveSize(bool full_wave) const;
|
||||
|
||||
// Not supported on some Adreno devices with specific driver version.
|
||||
// b/131099086
|
||||
bool support_one_layer_texture_array = true;
|
||||
};
|
||||
|
||||
enum class MaliGpu {
|
||||
kUnknown,
|
||||
kT604,
|
||||
kT622,
|
||||
kT624,
|
||||
kT628,
|
||||
kT658,
|
||||
kT678,
|
||||
kT720,
|
||||
kT760,
|
||||
kT820,
|
||||
kT830,
|
||||
kT860,
|
||||
kT880,
|
||||
kG31,
|
||||
kG51,
|
||||
kG71,
|
||||
kG52,
|
||||
kG72,
|
||||
kG76,
|
||||
kG57,
|
||||
kG77,
|
||||
kG68,
|
||||
kG78,
|
||||
};
|
||||
|
||||
struct MaliInfo {
|
||||
MaliInfo() = default;
|
||||
explicit MaliInfo(const std::string& gpu_description);
|
||||
MaliGpu gpu_version;
|
||||
|
||||
bool IsMaliT6xx() const;
|
||||
bool IsMaliT7xx() const;
|
||||
bool IsMaliT8xx() const;
|
||||
bool IsMidgard() const;
|
||||
bool IsBifrostGen1() const;
|
||||
bool IsBifrostGen2() const;
|
||||
bool IsBifrostGen3() const;
|
||||
bool IsBifrost() const;
|
||||
bool IsValhall() const;
|
||||
};
|
||||
|
||||
enum class OpenClVersion {
|
||||
kCl1_0,
|
||||
kCl1_1,
|
||||
kCl1_2,
|
||||
kCl2_0,
|
||||
kCl2_1,
|
||||
kCl2_2,
|
||||
kCl3_0,
|
||||
kUnknown,
|
||||
};
|
||||
std::string OpenClVersionToString(OpenClVersion version);
|
||||
|
||||
struct OpenClInfo {
|
||||
OpenClVersion cl_version;
|
||||
|
||||
std::vector<std::string> extensions;
|
||||
bool supports_fp16;
|
||||
bool supports_image3d_writes;
|
||||
int compute_units_count;
|
||||
uint64_t buffer_max_size;
|
||||
uint64_t image2d_max_width;
|
||||
uint64_t image2d_max_height;
|
||||
uint64_t image_buffer_max_size;
|
||||
uint64_t image_array_max_layers;
|
||||
uint64_t image3d_max_width;
|
||||
uint64_t image3d_max_height;
|
||||
uint64_t image3d_max_depth;
|
||||
int max_work_group_size_x;
|
||||
int max_work_group_size_y;
|
||||
int max_work_group_size_z;
|
||||
|
||||
// rtn is ROUND_TO_NEAREST
|
||||
// with rtn precision is much better then with rtz (ROUND_TO_ZERO)
|
||||
// Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
|
||||
// Mali from T6xx supports rtn
|
||||
// PowerVR supports only rtz
|
||||
bool supports_fp32_rtn;
|
||||
bool supports_fp16_rtn;
|
||||
|
||||
bool supports_r_f16_tex2d = false;
|
||||
bool supports_rg_f16_tex2d = false;
|
||||
bool supports_rgb_f16_tex2d = false;
|
||||
bool supports_rgba_f16_tex2d = false;
|
||||
|
||||
bool supports_r_f32_tex2d = false;
|
||||
bool supports_rg_f32_tex2d = false;
|
||||
bool supports_rgb_f32_tex2d = false;
|
||||
bool supports_rgba_f32_tex2d = false;
|
||||
};
|
||||
|
||||
struct GpuInfo {
|
||||
GpuInfo() = default;
|
||||
|
||||
bool IsAdreno() const;
|
||||
bool IsApple() const;
|
||||
bool IsMali() const;
|
||||
bool IsPowerVR() const;
|
||||
bool IsNvidia() const;
|
||||
bool IsAMD() const;
|
||||
bool IsIntel() const;
|
||||
|
||||
bool SupportsFP16() const;
|
||||
|
||||
bool SupportsTextureArray() const;
|
||||
bool SupportsImageBuffer() const;
|
||||
bool SupportsImage3D() const;
|
||||
|
||||
bool SupportsFloatImage2D(DataType data_type, int channels) const;
|
||||
|
||||
bool SupportsExtension(const std::string& extension) const;
|
||||
bool IsCL20OrHigher() const;
|
||||
bool IsCL30OrHigher() const;
|
||||
bool SupportsSubGroupWithSize(int sub_group_size) const;
|
||||
|
||||
int GetComputeUnitsCount() const;
|
||||
|
||||
// floating point rounding mode
|
||||
bool IsRoundToNearestSupported() const;
|
||||
|
||||
int GetMaxWorkGroupSizeForX() const;
|
||||
int GetMaxWorkGroupSizeForY() const;
|
||||
int GetMaxWorkGroupSizeForZ() const;
|
||||
|
||||
uint64_t GetMaxImage2DWidth() const;
|
||||
uint64_t GetMaxImage2DHeight() const;
|
||||
uint64_t GetMaxImage3DWidth() const;
|
||||
uint64_t GetMaxImage3DHeight() const;
|
||||
uint64_t GetMaxImage3DDepth() const;
|
||||
|
||||
uint64_t GetMaxBufferSize() const;
|
||||
uint64_t GetMaxImageBufferWidth() const;
|
||||
uint64_t GetMaxImage2DArrayLayers() const;
|
||||
|
||||
std::vector<int> supported_subgroup_sizes;
|
||||
|
||||
GpuVendor gpu_vendor;
|
||||
|
||||
AdrenoInfo adreno_info;
|
||||
MaliInfo mali_info;
|
||||
|
||||
OpenClInfo opencl_info;
|
||||
};
|
||||
|
||||
} // namespace cl
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_
|
||||
@ -19,9 +19,9 @@ limitations under the License.
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
|
||||
|
||||
@ -578,7 +578,6 @@ cc_library(
|
||||
":util",
|
||||
"//tensorflow/lite/delegates/gpu/cl:buffer",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/cl:linear_storage",
|
||||
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/cl:texture2d",
|
||||
@ -620,10 +619,10 @@ cc_library(
|
||||
deps = [
|
||||
":util",
|
||||
":work_group_picking",
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/cl:serialization_cc_fbs",
|
||||
"//tensorflow/lite/delegates/gpu/common:access_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:kernel_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
@ -728,7 +727,6 @@ cc_library(
|
||||
":util",
|
||||
":work_group_picking",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_program",
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:operations",
|
||||
"//tensorflow/lite/delegates/gpu/common:status",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
@ -1206,8 +1204,8 @@ cc_library(
|
||||
srcs = ["util.cc"],
|
||||
hdrs = ["util.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:data_type",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:precision",
|
||||
"//tensorflow/lite/delegates/gpu/common:shape",
|
||||
"//tensorflow/lite/delegates/gpu/common:tensor",
|
||||
@ -1264,7 +1262,7 @@ cc_library(
|
||||
hdrs = ["work_group_picking.h"],
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:gpu_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:kernel_info",
|
||||
"//tensorflow/lite/delegates/gpu/common:types",
|
||||
"//tensorflow/lite/delegates/gpu/common:util",
|
||||
|
||||
@ -20,7 +20,6 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"
|
||||
|
||||
@ -25,7 +25,6 @@ limitations under the License.
|
||||
#include "absl/memory/memory.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
|
||||
@ -19,9 +19,9 @@ limitations under the License.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
|
||||
@ -18,7 +18,6 @@ limitations under the License.
|
||||
#include <string>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h"
|
||||
|
||||
|
||||
@ -16,7 +16,6 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/operations.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/status.h"
|
||||
|
||||
@ -31,7 +31,6 @@ cc_library(
|
||||
deps = [
|
||||
"//tensorflow/lite/delegates/gpu/cl:buffer",
|
||||
"//tensorflow/lite/delegates/gpu/cl:cl_kernel",
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/cl:linear_storage",
|
||||
"//tensorflow/lite/delegates/gpu/cl:tensor",
|
||||
"//tensorflow/lite/delegates/gpu/cl:texture2d",
|
||||
|
||||
@ -20,7 +20,6 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/linear_storage.h"
|
||||
|
||||
@ -25,7 +25,6 @@ limitations under the License.
|
||||
#include "absl/memory/memory.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/texture2d.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
|
||||
@ -20,8 +20,8 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/span.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/precision.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/tensor.h"
|
||||
|
||||
@ -18,7 +18,7 @@ limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/types.h"
|
||||
|
||||
@ -49,7 +49,6 @@ cc_library(
|
||||
hdrs = ["default_selector.h"],
|
||||
deps = [
|
||||
":subgraph",
|
||||
"//tensorflow/lite/delegates/gpu/cl:device_info",
|
||||
"//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation",
|
||||
"//tensorflow/lite/delegates/gpu/cl/selectors/default:default_selector",
|
||||
"//tensorflow/lite/delegates/gpu/common:model",
|
||||
|
||||
@ -18,7 +18,6 @@ limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
|
||||
#include "tensorflow/lite/delegates/gpu/cl/selectors/subgraph.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/model.h"
|
||||
|
||||
@ -16,8 +16,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
|
||||
#define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/cl/device_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/shape.h"
|
||||
#include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
|
||||
|
||||
|
||||
@ -40,6 +40,7 @@ cc_library(
|
||||
srcs = ["gpu_info.cc"],
|
||||
hdrs = ["gpu_info.h"],
|
||||
deps = [
|
||||
":data_type",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
||||
@ -358,6 +358,27 @@ void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
|
||||
}
|
||||
}
|
||||
|
||||
std::string OpenClVersionToString(OpenClVersion version) {
|
||||
switch (version) {
|
||||
case OpenClVersion::kCl1_0:
|
||||
return "1.0";
|
||||
case OpenClVersion::kCl1_1:
|
||||
return "1.1";
|
||||
case OpenClVersion::kCl1_2:
|
||||
return "1.2";
|
||||
case OpenClVersion::kCl2_0:
|
||||
return "2.0";
|
||||
case OpenClVersion::kCl2_1:
|
||||
return "2.1";
|
||||
case OpenClVersion::kCl2_2:
|
||||
return "2.2";
|
||||
case OpenClVersion::kCl3_0:
|
||||
return "3.0";
|
||||
default:
|
||||
return "Unknown OpenCL version";
|
||||
}
|
||||
}
|
||||
|
||||
bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; }
|
||||
|
||||
bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; }
|
||||
@ -373,11 +394,45 @@ bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; }
|
||||
bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; }
|
||||
|
||||
bool GpuInfo::IsRoundToNearestSupported() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.supports_fp16_rtn || opencl_info.supports_fp32_rtn;
|
||||
}
|
||||
if (IsApple()) {
|
||||
return apple_info.IsRoundToNearestSupported();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsFP16() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.supports_fp16;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsTextureArray() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsImageBuffer() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.cl_version >= OpenClVersion::kCl1_2;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsImage3D() const {
|
||||
if (IsApiOpenCl()) {
|
||||
if (IsMali() && mali_info.IsMidgard()) {
|
||||
// On Mali T880 read_imageh doesn't compile with image3d_t
|
||||
return false;
|
||||
}
|
||||
return opencl_info.supports_image3d_writes;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsWaveSizeEqualTo32() const {
|
||||
@ -385,12 +440,66 @@ bool GpuInfo::IsWaveSizeEqualTo32() const {
|
||||
supported_subgroup_sizes[0] == 32;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsExtension(const std::string& extension) const {
|
||||
const std::vector<std::string>* extensions = nullptr;
|
||||
if (IsApiOpenGl()) {
|
||||
extensions = &opengl_info.extensions;
|
||||
} else if (IsApiVulkan()) {
|
||||
extensions = &vulkan_info.extensions;
|
||||
} else if (IsApiOpenCl()) {
|
||||
extensions = &opencl_info.extensions;
|
||||
}
|
||||
if (!extensions) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& ext : *extensions) {
|
||||
if (ext == extension) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsSubGroupWithSize(int sub_group_size) const {
|
||||
for (auto subgroup_size : supported_subgroup_sizes) {
|
||||
if (sub_group_size == subgroup_size) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool GpuInfo::SupportsFloatImage2D(DataType data_type, int channels) const {
|
||||
if (IsApiOpenCl()) {
|
||||
if (channels == 1) {
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_r_f32_tex2d
|
||||
: opencl_info.supports_r_f16_tex2d;
|
||||
} else if (channels == 2) {
|
||||
return data_type == DataType::FLOAT32 ? opencl_info.supports_rg_f32_tex2d
|
||||
: opencl_info.supports_rg_f16_tex2d;
|
||||
} else if (channels == 3) {
|
||||
return data_type == DataType::FLOAT32
|
||||
? opencl_info.supports_rgb_f32_tex2d
|
||||
: opencl_info.supports_rgb_f16_tex2d;
|
||||
} else if (channels == 4) {
|
||||
return data_type == DataType::FLOAT32
|
||||
? opencl_info.supports_rgba_f32_tex2d
|
||||
: opencl_info.supports_rgba_f16_tex2d;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int GpuInfo::GetComputeUnitsCount() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.compute_units_count;
|
||||
}
|
||||
if (IsApple()) {
|
||||
return apple_info.GetComputeUnitsCount();
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForX() const {
|
||||
@ -400,6 +509,9 @@ int GpuInfo::GetMaxWorkGroupSizeForX() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_compute_work_group_size_x;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.max_work_group_size_x;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
@ -410,6 +522,9 @@ int GpuInfo::GetMaxWorkGroupSizeForY() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_compute_work_group_size_y;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.max_work_group_size_y;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
@ -420,6 +535,9 @@ int GpuInfo::GetMaxWorkGroupSizeForZ() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_compute_work_group_size_z;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.max_work_group_size_z;
|
||||
}
|
||||
return 64;
|
||||
}
|
||||
|
||||
@ -430,6 +548,9 @@ int GpuInfo::GetMaxWorkGroupTotalSize() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_compute_work_group_invocations;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.max_work_group_total_size;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
@ -440,6 +561,9 @@ uint64_t GpuInfo::GetMaxImage2DWidth() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_image_dimension_2d;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image2d_max_width;
|
||||
}
|
||||
return 2048;
|
||||
}
|
||||
|
||||
@ -450,6 +574,9 @@ uint64_t GpuInfo::GetMaxImage2DHeight() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_image_dimension_2d;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image2d_max_height;
|
||||
}
|
||||
return 2048;
|
||||
}
|
||||
|
||||
@ -460,9 +587,47 @@ uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
|
||||
if (IsApiVulkan()) {
|
||||
return vulkan_info.max_image_array_layers;
|
||||
}
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image_array_max_layers;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DWidth() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image3d_max_width;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DHeight() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image3d_max_height;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage3DDepth() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image3d_max_depth;
|
||||
}
|
||||
return 256;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxBufferSize() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.buffer_max_size;
|
||||
}
|
||||
return 128 * 1024 * 1024;
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImageBufferWidth() const {
|
||||
if (IsApiOpenCl()) {
|
||||
return opencl_info.image_buffer_max_size;
|
||||
}
|
||||
return 64 * 1024;
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxImageArguments() const {
|
||||
if (IsApiOpenGl()) {
|
||||
return opengl_info.max_image_units;
|
||||
@ -481,12 +646,6 @@ int GpuInfo::GetMaxImageArguments() const {
|
||||
|
||||
bool GpuInfo::IsApiOpenGl() const { return gpu_api == GpuApi::kOpenGl; }
|
||||
|
||||
bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; }
|
||||
|
||||
bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; }
|
||||
|
||||
bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; }
|
||||
|
||||
bool GpuInfo::IsApiOpenGl31OrAbove() const {
|
||||
if (!IsApiOpenGl()) {
|
||||
return false;
|
||||
@ -495,5 +654,29 @@ bool GpuInfo::IsApiOpenGl31OrAbove() const {
|
||||
opengl_info.major_version > 3;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; }
|
||||
|
||||
bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; }
|
||||
|
||||
bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; }
|
||||
|
||||
bool GpuInfo::IsCL20OrHigher() const {
|
||||
if (!IsApiOpenCl()) {
|
||||
return false;
|
||||
}
|
||||
return opencl_info.cl_version != OpenClVersion::kCl1_0 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl1_1 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl1_2;
|
||||
}
|
||||
|
||||
bool GpuInfo::IsCL30OrHigher() const {
|
||||
if (!IsApiOpenCl()) {
|
||||
return false;
|
||||
}
|
||||
return IsCL20OrHigher() && opencl_info.cl_version != OpenClVersion::kCl2_0 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl2_1 &&
|
||||
opencl_info.cl_version != OpenClVersion::kCl2_2;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
||||
@ -19,6 +19,8 @@ limitations under the License.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/data_type.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
@ -236,6 +238,57 @@ struct VulkanInfo {
|
||||
int max_compute_work_group_size_z;
|
||||
};
|
||||
|
||||
enum class OpenClVersion {
|
||||
kCl1_0,
|
||||
kCl1_1,
|
||||
kCl1_2,
|
||||
kCl2_0,
|
||||
kCl2_1,
|
||||
kCl2_2,
|
||||
kCl3_0,
|
||||
kUnknown,
|
||||
};
|
||||
std::string OpenClVersionToString(OpenClVersion version);
|
||||
|
||||
struct OpenClInfo {
|
||||
OpenClVersion cl_version;
|
||||
|
||||
std::vector<std::string> extensions;
|
||||
bool supports_fp16;
|
||||
bool supports_image3d_writes;
|
||||
int compute_units_count;
|
||||
uint64_t buffer_max_size;
|
||||
uint64_t image2d_max_width;
|
||||
uint64_t image2d_max_height;
|
||||
uint64_t image_buffer_max_size;
|
||||
uint64_t image_array_max_layers;
|
||||
uint64_t image3d_max_width;
|
||||
uint64_t image3d_max_height;
|
||||
uint64_t image3d_max_depth;
|
||||
int max_work_group_size_x;
|
||||
int max_work_group_size_y;
|
||||
int max_work_group_size_z;
|
||||
int max_work_group_total_size;
|
||||
|
||||
// rtn is ROUND_TO_NEAREST
|
||||
// with rtn precision is much better then with rtz (ROUND_TO_ZERO)
|
||||
// Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
|
||||
// Mali from T6xx supports rtn
|
||||
// PowerVR supports only rtz
|
||||
bool supports_fp32_rtn;
|
||||
bool supports_fp16_rtn;
|
||||
|
||||
bool supports_r_f16_tex2d = false;
|
||||
bool supports_rg_f16_tex2d = false;
|
||||
bool supports_rgb_f16_tex2d = false;
|
||||
bool supports_rgba_f16_tex2d = false;
|
||||
|
||||
bool supports_r_f32_tex2d = false;
|
||||
bool supports_rg_f32_tex2d = false;
|
||||
bool supports_rgb_f32_tex2d = false;
|
||||
bool supports_rgba_f32_tex2d = false;
|
||||
};
|
||||
|
||||
struct GpuInfo {
|
||||
bool IsAdreno() const;
|
||||
bool IsApple() const;
|
||||
@ -248,8 +301,18 @@ struct GpuInfo {
|
||||
// floating point rounding mode
|
||||
bool IsRoundToNearestSupported() const;
|
||||
|
||||
bool SupportsFP16() const;
|
||||
|
||||
bool SupportsTextureArray() const;
|
||||
bool SupportsImageBuffer() const;
|
||||
bool SupportsImage3D() const;
|
||||
|
||||
// returns true if device have fixed wave size equal to 32
|
||||
bool IsWaveSizeEqualTo32() const;
|
||||
bool SupportsSubGroupWithSize(int sub_group_size) const;
|
||||
|
||||
bool SupportsFloatImage2D(DataType data_type, int channels) const;
|
||||
bool SupportsExtension(const std::string& extension) const;
|
||||
|
||||
int GetComputeUnitsCount() const;
|
||||
|
||||
@ -263,6 +326,11 @@ struct GpuInfo {
|
||||
uint64_t GetMaxImage2DWidth() const;
|
||||
uint64_t GetMaxImage2DHeight() const;
|
||||
uint64_t GetMaxImage2DArrayLayers() const;
|
||||
uint64_t GetMaxImage3DWidth() const;
|
||||
uint64_t GetMaxImage3DHeight() const;
|
||||
uint64_t GetMaxImage3DDepth() const;
|
||||
uint64_t GetMaxBufferSize() const;
|
||||
uint64_t GetMaxImageBufferWidth() const;
|
||||
|
||||
GpuVendor vendor = GpuVendor::kUnknown;
|
||||
GpuApi gpu_api = GpuApi::kUnknown;
|
||||
@ -287,7 +355,10 @@ struct GpuInfo {
|
||||
|
||||
bool IsApiMetal() const;
|
||||
|
||||
OpenClInfo opencl_info;
|
||||
bool IsApiOpenCl() const;
|
||||
bool IsCL20OrHigher() const;
|
||||
bool IsCL30OrHigher() const;
|
||||
};
|
||||
|
||||
inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user