Raman Sarokin bae911298b Additional check for Intel specialization.
PiperOrigin-RevId: 316997503
Change-Id: Ibc97cd77c5e15bb021546aac754e2953b050f0f3
2020-06-17 17:21:38 -07:00

602 lines
19 KiB
C++

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
#include <algorithm>
#include <string>
#include <vector>
#include "absl/strings/numbers.h"
#include "absl/strings/str_split.h"
#include "tensorflow/lite/delegates/gpu/cl/util.h"
#include "tensorflow/lite/delegates/gpu/common/status.h"
namespace tflite {
namespace gpu {
namespace cl {
template <>
std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info) {
size_t size;
cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
if (error != CL_SUCCESS) {
return "";
}
std::string result(size - 1, 0);
error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
if (error != CL_SUCCESS) {
return "";
}
return result;
}
namespace {
template <typename T>
T GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
T result;
cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
if (error != CL_SUCCESS) {
return -1;
}
return result;
}
std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
size_t size;
cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
if (error != CL_SUCCESS) {
return "";
}
std::string result(size - 1, 0);
error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
if (error != CL_SUCCESS) {
return "";
}
return result;
}
void GetDeviceWorkDimsSizes(cl_device_id id, int3* result) {
int dims_count =
GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
if (dims_count < 3) {
return;
}
std::vector<size_t> limits(dims_count);
cl_int error =
clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES,
sizeof(size_t) * dims_count, limits.data(), nullptr);
if (error != CL_SUCCESS) {
return;
}
// dims_count must be at least 3 according to spec
result->x = limits[0];
result->y = limits[1];
result->z = limits[2];
}
OpenCLVersion ParseCLVersion(const std::string& version) {
const auto first_dot_pos = version.find_first_of('.');
if (first_dot_pos == std::string::npos) {
return OpenCLVersion::CL_1_0;
}
const int major = version[first_dot_pos - 1] - '0';
const int minor = version[first_dot_pos + 1] - '0';
if (major == 1) {
if (minor == 2) {
return OpenCLVersion::CL_1_2;
} else if (minor == 1) {
return OpenCLVersion::CL_1_1;
} else {
return OpenCLVersion::CL_1_0;
}
} else if (major == 2) {
if (minor == 2) {
return OpenCLVersion::CL_2_2;
} else if (minor == 1) {
return OpenCLVersion::CL_2_1;
} else {
return OpenCLVersion::CL_2_0;
}
} else if (major == 3) {
return OpenCLVersion::CL_3_0;
} else {
return OpenCLVersion::CL_1_0;
}
}
Vendor ParseVendor(const std::string& device_name,
const std::string& vendor_name) {
std::string d_name = device_name;
std::string v_name = vendor_name;
std::transform(d_name.begin(), d_name.end(), d_name.begin(), ::tolower);
std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower);
if (d_name.find("qualcomm") != std::string::npos ||
v_name.find("qualcomm") != std::string::npos) {
return Vendor::QUALCOMM;
} else if (d_name.find("mali") != std::string::npos ||
v_name.find("mali") != std::string::npos) {
return Vendor::MALI;
} else if (d_name.find("power") != std::string::npos ||
v_name.find("power") != std::string::npos) {
return Vendor::POWERVR;
} else if (d_name.find("nvidia") != std::string::npos ||
v_name.find("nvidia") != std::string::npos) {
return Vendor::NVIDIA;
} else if (d_name.find("advanced micro devices") != std::string::npos ||
v_name.find("advanced micro devices") != std::string::npos) {
return Vendor::AMD;
} else if (d_name.find("intel") != std::string::npos ||
v_name.find("intel") != std::string::npos) {
return Vendor::INTEL;
} else {
return Vendor::UNKNOWN;
}
}
// check that gpu_version belong to range min_version-max_version
// min_version is included and max_version is excluded.
bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
return gpu_version >= min_version && gpu_version < max_version;
}
} // namespace
// There is no rule for gpu version encoding, but we found these samples:
// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2
// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2
// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3
// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8
// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9
// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge
// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4)
// After the number string ends.
// It is assumed that the <vendor-specific information> for Adreno GPUs has
// the following format:
// <text?><space?>Adreno(TM)<space><text?><version>
// Returns -1 if vendor-specific information cannot be parsed
int GetAdrenoGPUVersion(const std::string& gpu_version) {
const std::string gpu = absl::AsciiStrToLower(gpu_version);
const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' ');
int i = 0;
for (; i < words.size(); ++i) {
if (words[i].find("adreno") != words[i].npos) {
break;
}
}
i += 1;
for (; i < words.size(); ++i) {
int number;
bool is_number = absl::SimpleAtoi(words[i], &number);
// Adreno GPUs starts from 2xx, but opencl support should be only from 3xx
if (is_number && number >= 300) {
return number;
}
}
return -1;
}
MaliGPU GetMaliGPUVersion(const std::string& device_name) {
const std::map<std::string, MaliGPU> kMapping = {
{"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624},
{"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678},
{"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820},
{"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880},
{"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71},
{"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76},
{"G57", MaliGPU::G57}, {"G77", MaliGPU::G77},
};
for (const auto& v : kMapping) {
if (device_name.find(v.first) != std::string::npos) {
return v.second;
}
}
return MaliGPU::UNKNOWN;
}
std::string VendorToString(Vendor v) {
switch (v) {
case Vendor::QUALCOMM:
return "Qualcomm";
case Vendor::MALI:
return "Mali";
case Vendor::POWERVR:
return "PowerVR";
case Vendor::NVIDIA:
return "NVIDIA";
case Vendor::AMD:
return "AMD";
case Vendor::INTEL:
return "Intel";
case Vendor::UNKNOWN:
return "unknown vendor";
}
}
std::string OpenCLVersionToString(OpenCLVersion version) {
switch (version) {
case OpenCLVersion::CL_1_0:
return "1.0";
case OpenCLVersion::CL_1_1:
return "1.1";
case OpenCLVersion::CL_1_2:
return "1.2";
case OpenCLVersion::CL_2_0:
return "2.0";
case OpenCLVersion::CL_2_1:
return "2.1";
case OpenCLVersion::CL_2_2:
return "2.2";
case OpenCLVersion::CL_3_0:
return "3.0";
}
}
AdrenoInfo::AdrenoInfo(const std::string& device_version)
: gpu_version(GetAdrenoGPUVersion(device_version)) {}
int AdrenoInfo::GetMaximumWavesCount() const {
if (gpu_version < 400) {
return -1; // Adreno 3xx does not support it currently
} else if (gpu_version >= 400 && gpu_version < 500) {
return -1; // Adreno 4xx does not support it currently
} else if (gpu_version >= 500 && gpu_version < 600) {
return -1; // Adreno 5xx does not support it currently
} else if (gpu_version >= 600 && gpu_version < 700) {
return gpu_version == 640 ? 30 : 16;
} else {
return -1; // Adreno 7xx and higher does not exist yet
}
}
int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
if (gpu_version < 400) {
return -1; // Adreno 3xx does not support it currently
} else if (gpu_version >= 400 && gpu_version < 500) {
return -1; // Adreno 4xx does not support it currently
} else if (gpu_version >= 500 && gpu_version < 600) {
return -1; // Adreno 5xx does not support it currently
} else if (gpu_version >= 600 && gpu_version < 700) {
return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16;
} else {
return -1; // Adreno 7xx and higher does not exist yet
}
}
int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
bool full_wave) const {
const int register_usage_per_wave =
GetWaveSize(full_wave) * register_footprint_per_tread;
const int possible_waves_count =
GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
return std::min(possible_waves_count, GetMaximumWavesCount());
}
int AdrenoInfo::GetWaveSize(bool full_wave) const {
if (gpu_version < 400) {
return -1; // Adreno 3xx does not support it currently
} else if (gpu_version < 600) {
return full_wave ? 64 : 32;
} else {
return full_wave ? 128 : 64;
}
}
MaliInfo::MaliInfo(const std::string& device_name)
: gpu_version(GetMaliGPUVersion(device_name)) {}
bool MaliInfo::IsMaliT6xx() const {
return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 ||
gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 ||
gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678;
}
bool MaliInfo::IsMaliT7xx() const {
return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760;
}
bool MaliInfo::IsMaliT8xx() const {
return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 ||
gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880;
}
bool MaliInfo::IsMidgard() const {
return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx();
}
bool MaliInfo::IsBifrostGen1() const {
return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 ||
gpu_version == MaliGPU::G71;
}
bool MaliInfo::IsBifrostGen2() const {
return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72;
}
bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; }
bool MaliInfo::IsBifrost() const {
return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3();
}
bool MaliInfo::IsValhall() const {
return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77;
}
DeviceInfo::DeviceInfo(cl_device_id id) {
const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
const auto opencl_c_version =
GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
vendor = ParseVendor(device_name, vendor_name);
if (vendor == Vendor::QUALCOMM) {
adreno_info = AdrenoInfo(opencl_c_version);
} else if (vendor == Vendor::MALI) {
mali_info = MaliInfo(device_name);
}
cl_version = ParseCLVersion(opencl_c_version);
extensions =
absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
supports_fp16 = false;
supports_image3d_writes = false;
for (const auto& ext : extensions) {
if (ext == "cl_khr_fp16") {
supports_fp16 = true;
}
if (ext == "cl_khr_3d_image_writes") {
supports_image3d_writes = true;
}
}
f32_config =
GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
if (supports_fp16) {
auto status = GetDeviceInfo<cl_device_fp_config>(
id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
// AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
if (status.ok() && vendor != Vendor::AMD) {
supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
} else { // happens on PowerVR
f16_config = f32_config;
supports_fp16_rtn = supports_fp32_rtn;
}
} else {
f16_config = 0;
supports_fp16_rtn = false;
}
if (vendor == Vendor::POWERVR && !supports_fp16) {
// PowerVR doesn't have full support of fp16 and so doesn't list this
// extension. But it can support fp16 in MADs and as buffers/textures types,
// so we will use it.
supports_fp16 = true;
f16_config = f32_config;
supports_fp16_rtn = supports_fp32_rtn;
}
if (!supports_image3d_writes &&
((vendor == Vendor::QUALCOMM &&
IsGPUVersionInRange(adreno_info.gpu_version, 400, 500)) ||
vendor == Vendor::NVIDIA)) {
// in local tests Adreno 430 can write in image 3d, at least on small sizes,
// but it doesn't have cl_khr_3d_image_writes in list of available
// extensions
// The same for NVidia
supports_image3d_writes = true;
}
compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
if (cl_version >= OpenCLVersion::CL_1_2) {
image_buffer_max_size =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
image_array_max_layers =
GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
}
image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
}
bool DeviceInfo::SupportsTextureArray() const {
return cl_version >= OpenCLVersion::CL_1_2;
}
bool DeviceInfo::SupportsImageBuffer() const {
return cl_version >= OpenCLVersion::CL_1_2;
}
bool DeviceInfo::SupportsImage3D() const {
if (vendor == Vendor::MALI) {
// On Mali T880 read_imageh doesn't compile with image3d_t
return false;
}
return supports_image3d_writes;
}
CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
: id_(id), platform_id_(platform_id), info_(id) {}
CLDevice::CLDevice(const CLDevice& device)
: id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {}
CLDevice& CLDevice::operator=(const CLDevice& device) {
if (this != &device) {
id_ = device.id_;
platform_id_ = device.platform_id_;
info_ = device.info_;
}
return *this;
}
CLDevice::CLDevice(CLDevice&& device)
: id_(device.id_),
platform_id_(device.platform_id_),
info_(std::move(device.info_)) {
device.id_ = nullptr;
device.platform_id_ = nullptr;
}
CLDevice& CLDevice::operator=(CLDevice&& device) {
if (this != &device) {
id_ = nullptr;
platform_id_ = nullptr;
std::swap(id_, device.id_);
std::swap(platform_id_, device.platform_id_);
info_ = std::move(device.info_);
}
return *this;
}
bool CLDevice::SupportsFP16() const { return info_.supports_fp16; }
bool CLDevice::SupportsExtension(const std::string& extension) const {
for (const auto& ext : info_.extensions) {
if (ext == extension) {
return true;
}
}
return false;
}
bool CLDevice::SupportsTextureArray() const {
return info_.SupportsTextureArray();
}
bool CLDevice::SupportsImageBuffer() const {
return info_.SupportsImageBuffer();
}
bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; }
bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; }
std::string CLDevice::GetPlatformVersion() const {
return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
}
bool CLDevice::IsCL20OrHigher() const {
return info_.cl_version != OpenCLVersion::CL_1_0 &&
info_.cl_version != OpenCLVersion::CL_1_1 &&
info_.cl_version != OpenCLVersion::CL_1_2;
}
bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const {
if (IsIntel()) {
if (SupportsExtension("cl_intel_required_subgroup_size")) {
size_t sub_groups_count;
cl_int error =
clGetDeviceInfo(id_, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0,
nullptr, &sub_groups_count);
if (error != CL_SUCCESS) {
return false;
}
std::vector<size_t> sub_group_sizes(sub_groups_count);
error = clGetDeviceInfo(id_, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
sizeof(size_t) * sub_groups_count,
sub_group_sizes.data(), nullptr);
if (error != CL_SUCCESS) {
return false;
}
for (int i = 0; i < sub_groups_count; ++i) {
if (sub_group_sizes[i] == sub_group_size) {
return true;
}
}
}
}
return false;
}
bool CLDevice::IsAdreno() const { return info_.vendor == Vendor::QUALCOMM; }
bool CLDevice::IsAdreno3xx() const {
return IsAdreno() &&
IsGPUVersionInRange(info_.adreno_info.gpu_version, 300, 400);
}
bool CLDevice::IsAdreno4xx() const {
return IsAdreno() &&
IsGPUVersionInRange(info_.adreno_info.gpu_version, 400, 500);
}
bool CLDevice::IsAdreno5xx() const {
return IsAdreno() &&
IsGPUVersionInRange(info_.adreno_info.gpu_version, 500, 600);
}
bool CLDevice::IsAdreno6xx() const {
return IsAdreno() &&
IsGPUVersionInRange(info_.adreno_info.gpu_version, 600, 700);
}
bool CLDevice::IsAdreno6xxOrHigher() const {
return IsAdreno() && info_.adreno_info.gpu_version >= 600;
}
bool CLDevice::IsPowerVR() const { return info_.vendor == Vendor::POWERVR; }
bool CLDevice::IsNvidia() const { return info_.vendor == Vendor::NVIDIA; }
bool CLDevice::IsMali() const { return info_.vendor == Vendor::MALI; }
bool CLDevice::IsAMD() const { return info_.vendor == Vendor::AMD; }
bool CLDevice::IsIntel() const { return info_.vendor == Vendor::INTEL; }
bool CLDevice::SupportsOneLayerTextureArray() const {
return !IsAdreno() || info_.adreno_info.support_one_layer_texture_array;
}
void CLDevice::DisableOneLayerTextureArray() {
info_.adreno_info.support_one_layer_texture_array = false;
}
absl::Status CreateDefaultGPUDevice(CLDevice* result) {
cl_uint num_platforms;
clGetPlatformIDs(0, nullptr, &num_platforms);
if (num_platforms == 0) {
return absl::UnknownError("No supported OpenCL platform.");
}
std::vector<cl_platform_id> platforms(num_platforms);
clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
cl_platform_id platform_id = platforms[0];
cl_uint num_devices;
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
if (num_devices == 0) {
return absl::UnknownError("No GPU on current platform.");
}
std::vector<cl_device_id> devices(num_devices);
clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(),
nullptr);
*result = CLDevice(devices[0], platform_id);
return absl::OkStatus();
}
} // namespace cl
} // namespace gpu
} // namespace tflite