GpuInfo extended, added Adreno info.
Making it similar to OpenCL. PiperOrigin-RevId: 341140554 Change-Id: I46b4476f28046fd4e16f1cc1d6d918032c860446
This commit is contained in:
parent
f903439d8f
commit
430dbcc5de
tensorflow/lite/delegates/gpu
@ -15,6 +15,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/ascii.h"
|
||||
@ -23,79 +24,217 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
namespace {
|
||||
|
||||
GpuType GetGpuType(const std::string& renderer) {
|
||||
GpuVendor GetGpuVendor(const std::string& renderer) {
|
||||
if (renderer.find("mali") != renderer.npos) {
|
||||
return GpuType::MALI;
|
||||
return GpuVendor::kMali;
|
||||
}
|
||||
if (renderer.find("adreno") != renderer.npos) {
|
||||
return GpuType::ADRENO;
|
||||
return GpuVendor::kQualcomm;
|
||||
}
|
||||
if (renderer.find("powervr") != renderer.npos) {
|
||||
return GpuType::POWERVR;
|
||||
return GpuVendor::kPowerVR;
|
||||
}
|
||||
if (renderer.find("intel") != renderer.npos) {
|
||||
return GpuType::INTEL;
|
||||
return GpuVendor::kIntel;
|
||||
}
|
||||
if (renderer.find("nvidia") != renderer.npos) {
|
||||
return GpuType::NVIDIA;
|
||||
return GpuVendor::kNvidia;
|
||||
}
|
||||
return GpuType::UNKNOWN;
|
||||
return GpuVendor::kUnknown;
|
||||
}
|
||||
|
||||
GpuModel GetGpuModel(const std::string& renderer) {
|
||||
auto found_model = [&](std::string model) -> bool {
|
||||
return renderer.find(model) != renderer.npos;
|
||||
AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) {
|
||||
const std::map<std::string, AdrenoGpu> kMapping = {
|
||||
// Adreno 6xx series
|
||||
{"685", AdrenoGpu::kAdreno685},
|
||||
{"680", AdrenoGpu::kAdreno680},
|
||||
{"675", AdrenoGpu::kAdreno675},
|
||||
{"650", AdrenoGpu::kAdreno650},
|
||||
{"640", AdrenoGpu::kAdreno640},
|
||||
{"630", AdrenoGpu::kAdreno630},
|
||||
{"620", AdrenoGpu::kAdreno620},
|
||||
{"616", AdrenoGpu::kAdreno618},
|
||||
{"616", AdrenoGpu::kAdreno616},
|
||||
{"615", AdrenoGpu::kAdreno615},
|
||||
{"612", AdrenoGpu::kAdreno612},
|
||||
{"610", AdrenoGpu::kAdreno610},
|
||||
{"605", AdrenoGpu::kAdreno605},
|
||||
// Adreno 5xx series
|
||||
{"540", AdrenoGpu::kAdreno540},
|
||||
{"530", AdrenoGpu::kAdreno530},
|
||||
{"512", AdrenoGpu::kAdreno512},
|
||||
{"510", AdrenoGpu::kAdreno510},
|
||||
{"509", AdrenoGpu::kAdreno509},
|
||||
{"508", AdrenoGpu::kAdreno508},
|
||||
{"506", AdrenoGpu::kAdreno506},
|
||||
{"505", AdrenoGpu::kAdreno505},
|
||||
{"504", AdrenoGpu::kAdreno504},
|
||||
// Adreno 4xx series
|
||||
{"430", AdrenoGpu::kAdreno430},
|
||||
{"420", AdrenoGpu::kAdreno420},
|
||||
{"418", AdrenoGpu::kAdreno418},
|
||||
{"405", AdrenoGpu::kAdreno405},
|
||||
// Adreno 3xx series
|
||||
{"330", AdrenoGpu::kAdreno330},
|
||||
{"320", AdrenoGpu::kAdreno320},
|
||||
{"308", AdrenoGpu::kAdreno308},
|
||||
{"306", AdrenoGpu::kAdreno306},
|
||||
{"305", AdrenoGpu::kAdreno305},
|
||||
{"304", AdrenoGpu::kAdreno304},
|
||||
// Adreno 2xx series
|
||||
{"225", AdrenoGpu::kAdreno225},
|
||||
{"220", AdrenoGpu::kAdreno220},
|
||||
{"205", AdrenoGpu::kAdreno205},
|
||||
{"203", AdrenoGpu::kAdreno203},
|
||||
{"200", AdrenoGpu::kAdreno200},
|
||||
// Adreno 1xx series
|
||||
{"130", AdrenoGpu::kAdreno130},
|
||||
{"120", AdrenoGpu::kAdreno120},
|
||||
};
|
||||
// Adreno 6xx series
|
||||
if (found_model("640")) return GpuModel::ADRENO640;
|
||||
if (found_model("630")) return GpuModel::ADRENO630;
|
||||
if (found_model("616")) return GpuModel::ADRENO616;
|
||||
if (found_model("615")) return GpuModel::ADRENO615;
|
||||
if (found_model("612")) return GpuModel::ADRENO612;
|
||||
if (found_model("605")) return GpuModel::ADRENO605;
|
||||
// Adreno 5xx series
|
||||
if (found_model("540")) return GpuModel::ADRENO540;
|
||||
if (found_model("530")) return GpuModel::ADRENO530;
|
||||
if (found_model("512")) return GpuModel::ADRENO512;
|
||||
if (found_model("510")) return GpuModel::ADRENO510;
|
||||
if (found_model("509")) return GpuModel::ADRENO509;
|
||||
if (found_model("508")) return GpuModel::ADRENO508;
|
||||
if (found_model("506")) return GpuModel::ADRENO506;
|
||||
if (found_model("505")) return GpuModel::ADRENO505;
|
||||
if (found_model("504")) return GpuModel::ADRENO504;
|
||||
// Adreno 4xx series
|
||||
if (found_model("430")) return GpuModel::ADRENO430;
|
||||
if (found_model("420")) return GpuModel::ADRENO420;
|
||||
if (found_model("418")) return GpuModel::ADRENO418;
|
||||
if (found_model("405")) return GpuModel::ADRENO405;
|
||||
// Adreno 3xx series
|
||||
if (found_model("330")) return GpuModel::ADRENO330;
|
||||
if (found_model("320")) return GpuModel::ADRENO320;
|
||||
if (found_model("308")) return GpuModel::ADRENO308;
|
||||
if (found_model("306")) return GpuModel::ADRENO306;
|
||||
if (found_model("305")) return GpuModel::ADRENO305;
|
||||
if (found_model("304")) return GpuModel::ADRENO304;
|
||||
// Adreno 2xx series
|
||||
if (found_model("225")) return GpuModel::ADRENO225;
|
||||
if (found_model("220")) return GpuModel::ADRENO220;
|
||||
if (found_model("205")) return GpuModel::ADRENO205;
|
||||
if (found_model("203")) return GpuModel::ADRENO203;
|
||||
if (found_model("200")) return GpuModel::ADRENO200;
|
||||
// Adreno 1xx series
|
||||
if (found_model("130")) return GpuModel::ADRENO130;
|
||||
return GpuModel::UNKNOWN;
|
||||
|
||||
for (const auto& v : kMapping) {
|
||||
if (device_name.find(v.first) != std::string::npos) {
|
||||
return v.second;
|
||||
}
|
||||
}
|
||||
return AdrenoGpu::kUnknown;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model,
|
||||
GpuType* gpu_type) {
|
||||
std::string lowered = renderer;
|
||||
absl::AsciiStrToLower(&lowered);
|
||||
*gpu_type = GetGpuType(lowered);
|
||||
*gpu_model =
|
||||
*gpu_type == GpuType::ADRENO ? GetGpuModel(lowered) : GpuModel::UNKNOWN;
|
||||
AdrenoInfo::AdrenoInfo(const std::string& device_version)
|
||||
: adreno_gpu(GetAdrenoGpuVersion(device_version)) {}
|
||||
|
||||
bool AdrenoInfo::IsAdreno1xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno120 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno130;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno2xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno200 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno203 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno205 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno220 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno225;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno3xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno304 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno305 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno306 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno308 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno320 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno330;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno4xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno405 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno418 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno420 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno430;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno5xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno504 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno505 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno506 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno508 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno509 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno510 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno512 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno530 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno540;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno6xx() const {
|
||||
return adreno_gpu == AdrenoGpu::kAdreno605 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno610 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno612 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno615 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno616 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno618 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno620 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno630 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno640 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno650 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno675 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno680 ||
|
||||
adreno_gpu == AdrenoGpu::kAdreno685;
|
||||
}
|
||||
|
||||
bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); }
|
||||
|
||||
int AdrenoInfo::GetMaximumWavesCount() const {
|
||||
if (IsAdreno6xx()) {
|
||||
if (adreno_gpu == AdrenoGpu::kAdreno640) {
|
||||
return 30;
|
||||
} else {
|
||||
return 16;
|
||||
}
|
||||
} else {
|
||||
// all other versions not supported
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
|
||||
if (IsAdreno6xx()) {
|
||||
if (adreno_gpu == AdrenoGpu::kAdreno640) {
|
||||
return 128 * 144 * 16;
|
||||
} else if (adreno_gpu == AdrenoGpu::kAdreno650) {
|
||||
return 128 * 64 * 16;
|
||||
} else {
|
||||
return 128 * 96 * 16;
|
||||
}
|
||||
} else {
|
||||
// all other versions not supported
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
|
||||
bool full_wave) const {
|
||||
const int register_usage_per_wave =
|
||||
GetWaveSize(full_wave) * register_footprint_per_tread;
|
||||
const int possible_waves_count =
|
||||
GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
|
||||
return std::min(possible_waves_count, GetMaximumWavesCount());
|
||||
}
|
||||
|
||||
int AdrenoInfo::GetWaveSize(bool full_wave) const {
|
||||
if (IsAdreno6xx()) {
|
||||
return full_wave ? 128 : 64;
|
||||
} else if (IsAdreno5xx() || IsAdreno4xx()) {
|
||||
return full_wave ? 64 : 32;
|
||||
} else {
|
||||
// all other versions not supported
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
|
||||
GpuInfo* gpu_info) {
|
||||
std::string lowered = gpu_description;
|
||||
absl::AsciiStrToLower(&lowered);
|
||||
gpu_info->vendor = GetGpuVendor(lowered);
|
||||
if (gpu_info->IsAdreno()) {
|
||||
gpu_info->adreno_info = AdrenoInfo(lowered);
|
||||
}
|
||||
}
|
||||
|
||||
bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; }
|
||||
|
||||
bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; }
|
||||
|
||||
bool GpuInfo::IsMali() const { return vendor == GpuVendor::kMali; }
|
||||
|
||||
bool GpuInfo::IsPowerVR() const { return vendor == GpuVendor::kPowerVR; }
|
||||
|
||||
bool GpuInfo::IsNvidia() const { return vendor == GpuVendor::kNvidia; }
|
||||
|
||||
bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; }
|
||||
|
||||
bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; }
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -23,63 +23,113 @@ namespace tflite {
|
||||
namespace gpu {
|
||||
|
||||
// The VendorID returned by the GPU driver.
|
||||
enum class GpuType {
|
||||
UNKNOWN,
|
||||
APPLE,
|
||||
MALI,
|
||||
ADRENO,
|
||||
POWERVR,
|
||||
INTEL,
|
||||
AMD,
|
||||
NVIDIA,
|
||||
enum class GpuVendor {
|
||||
kApple,
|
||||
kQualcomm,
|
||||
kMali,
|
||||
kPowerVR,
|
||||
kNvidia,
|
||||
kAMD,
|
||||
kIntel,
|
||||
kUnknown
|
||||
};
|
||||
enum class GpuModel {
|
||||
UNKNOWN,
|
||||
|
||||
enum class AdrenoGpu {
|
||||
// Adreno 6xx series
|
||||
ADRENO640,
|
||||
ADRENO630,
|
||||
ADRENO616,
|
||||
ADRENO615,
|
||||
ADRENO612,
|
||||
ADRENO605,
|
||||
kAdreno685,
|
||||
kAdreno680,
|
||||
kAdreno675,
|
||||
kAdreno650,
|
||||
kAdreno640,
|
||||
kAdreno630,
|
||||
kAdreno620,
|
||||
kAdreno618,
|
||||
kAdreno616,
|
||||
kAdreno615,
|
||||
kAdreno612,
|
||||
kAdreno610,
|
||||
kAdreno605,
|
||||
// Adreno 5xx series
|
||||
ADRENO540,
|
||||
ADRENO530,
|
||||
ADRENO512,
|
||||
ADRENO510,
|
||||
ADRENO509,
|
||||
ADRENO508,
|
||||
ADRENO506,
|
||||
ADRENO505,
|
||||
ADRENO504,
|
||||
kAdreno540,
|
||||
kAdreno530,
|
||||
kAdreno512,
|
||||
kAdreno510,
|
||||
kAdreno509,
|
||||
kAdreno508,
|
||||
kAdreno506,
|
||||
kAdreno505,
|
||||
kAdreno504,
|
||||
// Adreno 4xx series
|
||||
ADRENO430,
|
||||
ADRENO420,
|
||||
ADRENO418,
|
||||
ADRENO405,
|
||||
kAdreno430,
|
||||
kAdreno420,
|
||||
kAdreno418,
|
||||
kAdreno405,
|
||||
// Adreno 3xx series
|
||||
ADRENO330,
|
||||
ADRENO320,
|
||||
ADRENO308,
|
||||
ADRENO306,
|
||||
ADRENO305,
|
||||
ADRENO304,
|
||||
kAdreno330,
|
||||
kAdreno320,
|
||||
kAdreno308,
|
||||
kAdreno306,
|
||||
kAdreno305,
|
||||
kAdreno304,
|
||||
// Adreno 2xx series
|
||||
ADRENO225,
|
||||
ADRENO220,
|
||||
ADRENO205,
|
||||
ADRENO203,
|
||||
ADRENO200,
|
||||
kAdreno225,
|
||||
kAdreno220,
|
||||
kAdreno205,
|
||||
kAdreno203,
|
||||
kAdreno200,
|
||||
// Adreno 1xx series
|
||||
ADRENO130,
|
||||
kAdreno130,
|
||||
kAdreno120,
|
||||
kUnknown
|
||||
};
|
||||
|
||||
struct AdrenoInfo {
|
||||
AdrenoInfo() = default;
|
||||
explicit AdrenoInfo(const std::string& device_version);
|
||||
|
||||
AdrenoGpu adreno_gpu;
|
||||
|
||||
bool IsAdreno1xx() const;
|
||||
bool IsAdreno2xx() const;
|
||||
bool IsAdreno3xx() const;
|
||||
bool IsAdreno4xx() const;
|
||||
bool IsAdreno5xx() const;
|
||||
bool IsAdreno6xx() const;
|
||||
bool IsAdreno6xxOrHigher() const;
|
||||
|
||||
// This function returns some not very documented physical parameter of
|
||||
// Adreno6xx GPU.
|
||||
// We obtained it using Snapdragon Profiler.
|
||||
int GetMaximumWavesCount() const;
|
||||
|
||||
// returns amount of register memory per CU(Compute Unit) in bytes.
|
||||
int GetRegisterMemorySizePerComputeUnit() const;
|
||||
|
||||
// returns maximum possible amount of waves based on register usage.
|
||||
int GetMaximumWavesCount(int register_footprint_per_tread,
|
||||
bool full_wave = true) const;
|
||||
|
||||
int GetWaveSize(bool full_wave) const;
|
||||
|
||||
// Not supported on some Adreno devices with specific driver version.
|
||||
// b/131099086
|
||||
bool support_one_layer_texture_array = true;
|
||||
};
|
||||
|
||||
struct GpuInfo {
|
||||
GpuType type = GpuType::UNKNOWN;
|
||||
bool IsAdreno() const;
|
||||
bool IsApple() const;
|
||||
bool IsMali() const;
|
||||
bool IsPowerVR() const;
|
||||
bool IsNvidia() const;
|
||||
bool IsAMD() const;
|
||||
bool IsIntel() const;
|
||||
|
||||
GpuVendor vendor = GpuVendor::kUnknown;
|
||||
|
||||
std::string renderer_name;
|
||||
std::string vendor_name;
|
||||
std::string version;
|
||||
GpuModel gpu_model;
|
||||
int major_version = -1;
|
||||
int minor_version = -1;
|
||||
std::vector<std::string> extensions;
|
||||
@ -90,6 +140,8 @@ struct GpuInfo {
|
||||
int max_texture_size = 0;
|
||||
int max_image_units = 0;
|
||||
int max_array_texture_layers = 0;
|
||||
|
||||
AdrenoInfo adreno_info;
|
||||
};
|
||||
|
||||
inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
|
||||
@ -97,9 +149,10 @@ inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
|
||||
gpu_info.major_version > 3;
|
||||
}
|
||||
|
||||
// Analyzes `renderer` and returns matching `GpuType` and `GpuModel`.
|
||||
void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model,
|
||||
GpuType* gpu_type);
|
||||
// Currently it initializes vendor and AdrenoInfo if
|
||||
// vendor is kQualcomm
|
||||
void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
|
||||
GpuInfo* gpu_info);
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -86,12 +86,12 @@ class AdrenoCommandQueue : public DefaultCommandQueue {
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<CommandQueue> NewCommandQueue(const GpuInfo& gpu_info) {
|
||||
if (gpu_info.type == GpuType::ADRENO) {
|
||||
if (gpu_info.IsAdreno()) {
|
||||
int flush_every_n = 1;
|
||||
// On Adreno 630 and Adreno 505 there is up to 2x performance boost when
|
||||
// glFlush happens not so often.
|
||||
if (gpu_info.gpu_model == GpuModel::ADRENO630 ||
|
||||
gpu_info.gpu_model == GpuModel::ADRENO505) {
|
||||
if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630 ||
|
||||
gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno505) {
|
||||
flush_every_n = 10;
|
||||
}
|
||||
return absl::make_unique<AdrenoCommandQueue>(flush_every_n);
|
||||
|
@ -65,21 +65,19 @@ bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
|
||||
}
|
||||
|
||||
ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
|
||||
return gpu_info.type == GpuType::ADRENO ? ObjectType::TEXTURE
|
||||
: ObjectType::BUFFER;
|
||||
return gpu_info.IsAdreno() ? ObjectType::TEXTURE : ObjectType::BUFFER;
|
||||
}
|
||||
|
||||
ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info,
|
||||
const CompilationOptions& options) {
|
||||
if (gpu_info.type != GpuType::ADRENO) {
|
||||
if (!gpu_info.IsAdreno()) {
|
||||
return ObjectType::BUFFER;
|
||||
}
|
||||
switch (gpu_info.gpu_model) {
|
||||
case GpuModel::ADRENO630:
|
||||
return ObjectType::TEXTURE;
|
||||
default:
|
||||
return options.allow_precision_loss ? ObjectType::TEXTURE
|
||||
: ObjectType::BUFFER;
|
||||
if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
|
||||
return ObjectType::TEXTURE;
|
||||
} else {
|
||||
return options.allow_precision_loss ? ObjectType::TEXTURE
|
||||
: ObjectType::BUFFER;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -31,13 +31,13 @@ namespace gl {
|
||||
|
||||
ShaderCodegen::ShaderCodegen(const CompilationOptions& options,
|
||||
const GpuInfo& gpu_info)
|
||||
: options_(options), gpu_type_(gpu_info.type) {}
|
||||
: options_(options), gpu_type_(gpu_info.vendor) {}
|
||||
|
||||
absl::Status ShaderCodegen::Build(CompiledNodeAttributes attr,
|
||||
ShaderCode* shader_code) const {
|
||||
VariableAccessor variable_accessor(options_.inline_parameters,
|
||||
options_.vulkan_support);
|
||||
ObjectAccessor object_accessor(gpu_type_ == GpuType::MALI,
|
||||
ObjectAccessor object_accessor(gpu_type_ == GpuVendor::kMali,
|
||||
options_.sampler_textures, &variable_accessor);
|
||||
|
||||
const auto add_object = [&](const std::string& name, Object&& object) {
|
||||
|
@ -44,7 +44,7 @@ class ShaderCodegen {
|
||||
|
||||
private:
|
||||
const CompilationOptions options_;
|
||||
const GpuType gpu_type_;
|
||||
const GpuVendor gpu_type_;
|
||||
};
|
||||
|
||||
} // namespace gl
|
||||
|
@ -89,7 +89,7 @@ absl::Status EglEnvironment::Init() {
|
||||
}
|
||||
}
|
||||
|
||||
if (gpu_info_.type == GpuType::UNKNOWN) {
|
||||
if (gpu_info_.vendor == GpuVendor::kUnknown) {
|
||||
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
|
||||
}
|
||||
// TODO(akulik): when do we need ForceSyncTurning?
|
||||
@ -110,7 +110,7 @@ absl::Status EglEnvironment::InitSurfacelessContext() {
|
||||
// PowerVR support EGL_KHR_surfaceless_context, but glFenceSync crashes on
|
||||
// PowerVR when it is surface-less.
|
||||
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
|
||||
if (gpu_info_.type == GpuType::POWERVR) {
|
||||
if (gpu_info_.IsPowerVR()) {
|
||||
return absl::UnavailableError(
|
||||
"Surface-less context is not properly supported on powervr.");
|
||||
}
|
||||
|
@ -134,7 +134,7 @@ class Convolution : public NodeShader {
|
||||
/*workload=*/uint3(),
|
||||
/*workgroup=*/
|
||||
GetIdealWorkgroupIfPossible(
|
||||
ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D,
|
||||
*ctx.gpu_info, OperationType::CONVOLUTION_2D,
|
||||
HW(weights.h, weights.w), attr.strides, uint3(0, 0, 0),
|
||||
OHWI(weights.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2],
|
||||
ctx.input_shapes[0][3])),
|
||||
@ -149,8 +149,7 @@ class Convolution : public NodeShader {
|
||||
int SelectMultiplier(int32_t input_width,
|
||||
const NodeShader::GenerationContext& ctx) {
|
||||
std::vector<int> multipliers = {4, 2};
|
||||
if (!ctx.compiler_options.allow_precision_loss &&
|
||||
ctx.gpu_info->type == GpuType::MALI) {
|
||||
if (!ctx.compiler_options.allow_precision_loss && ctx.gpu_info->IsMali()) {
|
||||
multipliers = {2};
|
||||
}
|
||||
for (int i : multipliers) {
|
||||
@ -234,7 +233,7 @@ class Convolution1x1 : public NodeShader {
|
||||
|
||||
auto dst_depth = DivideRoundUp(ctx.output_shapes[0][3], 4);
|
||||
uint3 workgroup = uint3(16, 16, 1);
|
||||
if (ctx.gpu_info->type == GpuType::ADRENO) {
|
||||
if (ctx.gpu_info->IsAdreno()) {
|
||||
if (dst_depth >= 2) {
|
||||
workgroup = uint3(8, 8, 2);
|
||||
}
|
||||
@ -276,7 +275,7 @@ class Convolution1x1 : public NodeShader {
|
||||
DivideRoundUp(ctx.output_shapes[0][3], 4)),
|
||||
/*workgroup=*/
|
||||
GetIdealWorkgroupIfPossible(
|
||||
ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D,
|
||||
*ctx.gpu_info, OperationType::CONVOLUTION_2D,
|
||||
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
|
||||
workgroup,
|
||||
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
|
||||
|
@ -141,7 +141,7 @@ class DepthwiseConvolution : public NodeShader {
|
||||
/*workload=*/uint3(),
|
||||
/*workgroup=*/
|
||||
GetIdealWorkgroupIfPossible(
|
||||
ctx.gpu_info->gpu_model, OperationType::DEPTHWISE_CONVOLUTION,
|
||||
*ctx.gpu_info, OperationType::DEPTHWISE_CONVOLUTION,
|
||||
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
|
||||
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
|
||||
ctx.input_shapes[0][2], ctx.input_shapes[0][3])),
|
||||
|
@ -34,7 +34,7 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
|
||||
const GLubyte* renderer_name = glGetString(GL_RENDERER);
|
||||
if (renderer_name) {
|
||||
info.renderer_name = reinterpret_cast<const char*>(renderer_name);
|
||||
GetGpuModelAndType(info.renderer_name, &info.gpu_model, &info.type);
|
||||
GetGpuInfoFromDeviceDescription(info.renderer_name, &info);
|
||||
}
|
||||
|
||||
const GLubyte* vendor_name = glGetString(GL_VENDOR);
|
||||
|
@ -81,7 +81,7 @@ class WorkgroupsCalculatorForMali : public WorkgroupsCalculator {
|
||||
|
||||
std::unique_ptr<WorkgroupsCalculator> NewDefaultWorkgroupsCalculator(
|
||||
const GpuInfo& gpu_info) {
|
||||
if (gpu_info.type == GpuType::MALI) {
|
||||
if (gpu_info.IsMali()) {
|
||||
return absl::make_unique<WorkgroupsCalculatorForMali>(gpu_info);
|
||||
} else {
|
||||
return absl::make_unique<DefaultWorkgroupsCalculator>(gpu_info);
|
||||
|
@ -137,40 +137,45 @@ std::vector<IdealByType>* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr;
|
||||
std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;
|
||||
|
||||
// Put all ideal workgroups from the list together.
|
||||
const std::map<GpuModel, IdealWorkgroups>* kIdealWorkgroupsInfoPtr =
|
||||
new std::map<GpuModel, IdealWorkgroups>{
|
||||
{GpuModel::ADRENO630,
|
||||
const std::map<AdrenoGpu, IdealWorkgroups>* kIdealAdrenoWorkgroupsInfoPtr =
|
||||
new std::map<AdrenoGpu, IdealWorkgroups>{
|
||||
{AdrenoGpu::kAdreno630,
|
||||
{*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
|
||||
{GpuModel::ADRENO540, {*kIdealByTypeAdreno540Ptr, {}}},
|
||||
{GpuModel::ADRENO510,
|
||||
{AdrenoGpu::kAdreno540, {*kIdealByTypeAdreno540Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno510,
|
||||
{*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
|
||||
{GpuModel::ADRENO509, {*kIdealByTypeAdreno509Ptr, {}}},
|
||||
{GpuModel::ADRENO508, {*kIdealByTypeAdreno508Ptr, {}}},
|
||||
{GpuModel::ADRENO506, {*kIdealByTypeAdreno506Ptr, {}}},
|
||||
{GpuModel::ADRENO505, {*kIdealByTypeAdreno505Ptr, {}}},
|
||||
{GpuModel::ADRENO418, {*kIdealByTypeAdreno418Ptr, {}}},
|
||||
{GpuModel::ADRENO405, {*kIdealByTypeAdreno405Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno509, {*kIdealByTypeAdreno509Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno508, {*kIdealByTypeAdreno508Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno506, {*kIdealByTypeAdreno506Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno505, {*kIdealByTypeAdreno505Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno418, {*kIdealByTypeAdreno418Ptr, {}}},
|
||||
{AdrenoGpu::kAdreno405, {*kIdealByTypeAdreno405Ptr, {}}},
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
||||
HW kernel, HW strides, uint3 default_wg,
|
||||
OHWI workload) {
|
||||
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||
OperationType op_type, HW kernel, HW strides,
|
||||
uint3 default_wg, OHWI workload) {
|
||||
// Research showed that ideal workgroup approach doesn't work well with
|
||||
// convolutions, which have small amount of output channels or output
|
||||
// height/width dimensions
|
||||
if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;
|
||||
|
||||
if (!gpu_info.IsAdreno()) {
|
||||
return default_wg;
|
||||
}
|
||||
auto adreno_gpu_version = gpu_info.adreno_info.adreno_gpu;
|
||||
|
||||
// If GPU was investigated
|
||||
if (!kIdealWorkgroupsInfoPtr->count(gpu_model)) {
|
||||
if (!kIdealAdrenoWorkgroupsInfoPtr->count(adreno_gpu_version)) {
|
||||
return default_wg;
|
||||
}
|
||||
|
||||
// Try to find the ideal workgroup by the specific operation case, cause they
|
||||
// are expected to be better tuned than default "by type" cases
|
||||
for (const auto& specific_case :
|
||||
kIdealWorkgroupsInfoPtr->at(gpu_model).by_case) {
|
||||
kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_case) {
|
||||
if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
|
||||
return specific_case.ideal_workgroup;
|
||||
}
|
||||
@ -178,7 +183,7 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
||||
|
||||
// Try to find the ideal workgroup by the operation type
|
||||
for (const auto& default_case :
|
||||
kIdealWorkgroupsInfoPtr->at(gpu_model).by_type) {
|
||||
kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_type) {
|
||||
if (default_case.ParamsAccepted(op_type)) {
|
||||
return default_case.ideal_workgroup;
|
||||
}
|
||||
@ -189,9 +194,10 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
||||
return default_wg;
|
||||
}
|
||||
|
||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
||||
HW kernel, HW strides, OHWI workload) {
|
||||
return GetIdealWorkgroupIfPossible(gpu_model, op_type, kernel, strides,
|
||||
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||
OperationType op_type, HW kernel, HW strides,
|
||||
OHWI workload) {
|
||||
return GetIdealWorkgroupIfPossible(gpu_info, op_type, kernel, strides,
|
||||
kEmptyWorkgroupSize, workload);
|
||||
}
|
||||
|
||||
|
@ -28,15 +28,16 @@ namespace gl {
|
||||
// Picks up the ideal workgroup size for the given convolution case.
|
||||
// Ideal workgroup gives top 10% of the possible performance for the given case.
|
||||
// They are received after the workgroup performance research (b/117291356).
|
||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
||||
HW kernel, HW strides, OHWI workload);
|
||||
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||
OperationType op_type, HW kernel, HW strides,
|
||||
OHWI workload);
|
||||
|
||||
// Does the same as the function above. Use this one if your operation can
|
||||
// suggest some reasonable workgroup size. It's expected to give better
|
||||
// performance than the default workgroup calculator.
|
||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
||||
HW kernel, HW strides, uint3 default_wg,
|
||||
OHWI workload);
|
||||
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||
OperationType op_type, HW kernel, HW strides,
|
||||
uint3 default_wg, OHWI workload);
|
||||
|
||||
} // namespace gl
|
||||
} // namespace gpu
|
||||
|
Loading…
Reference in New Issue
Block a user