GpuInfo extended, added Adreno info.
Making it similar to OpenCL. PiperOrigin-RevId: 341140554 Change-Id: I46b4476f28046fd4e16f1cc1d6d918032c860446
This commit is contained in:
parent
f903439d8f
commit
430dbcc5de
@ -15,6 +15,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "absl/strings/ascii.h"
|
#include "absl/strings/ascii.h"
|
||||||
@ -23,79 +24,217 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
GpuType GetGpuType(const std::string& renderer) {
|
GpuVendor GetGpuVendor(const std::string& renderer) {
|
||||||
if (renderer.find("mali") != renderer.npos) {
|
if (renderer.find("mali") != renderer.npos) {
|
||||||
return GpuType::MALI;
|
return GpuVendor::kMali;
|
||||||
}
|
}
|
||||||
if (renderer.find("adreno") != renderer.npos) {
|
if (renderer.find("adreno") != renderer.npos) {
|
||||||
return GpuType::ADRENO;
|
return GpuVendor::kQualcomm;
|
||||||
}
|
}
|
||||||
if (renderer.find("powervr") != renderer.npos) {
|
if (renderer.find("powervr") != renderer.npos) {
|
||||||
return GpuType::POWERVR;
|
return GpuVendor::kPowerVR;
|
||||||
}
|
}
|
||||||
if (renderer.find("intel") != renderer.npos) {
|
if (renderer.find("intel") != renderer.npos) {
|
||||||
return GpuType::INTEL;
|
return GpuVendor::kIntel;
|
||||||
}
|
}
|
||||||
if (renderer.find("nvidia") != renderer.npos) {
|
if (renderer.find("nvidia") != renderer.npos) {
|
||||||
return GpuType::NVIDIA;
|
return GpuVendor::kNvidia;
|
||||||
}
|
}
|
||||||
return GpuType::UNKNOWN;
|
return GpuVendor::kUnknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuModel GetGpuModel(const std::string& renderer) {
|
AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) {
|
||||||
auto found_model = [&](std::string model) -> bool {
|
const std::map<std::string, AdrenoGpu> kMapping = {
|
||||||
return renderer.find(model) != renderer.npos;
|
// Adreno 6xx series
|
||||||
|
{"685", AdrenoGpu::kAdreno685},
|
||||||
|
{"680", AdrenoGpu::kAdreno680},
|
||||||
|
{"675", AdrenoGpu::kAdreno675},
|
||||||
|
{"650", AdrenoGpu::kAdreno650},
|
||||||
|
{"640", AdrenoGpu::kAdreno640},
|
||||||
|
{"630", AdrenoGpu::kAdreno630},
|
||||||
|
{"620", AdrenoGpu::kAdreno620},
|
||||||
|
{"616", AdrenoGpu::kAdreno618},
|
||||||
|
{"616", AdrenoGpu::kAdreno616},
|
||||||
|
{"615", AdrenoGpu::kAdreno615},
|
||||||
|
{"612", AdrenoGpu::kAdreno612},
|
||||||
|
{"610", AdrenoGpu::kAdreno610},
|
||||||
|
{"605", AdrenoGpu::kAdreno605},
|
||||||
|
// Adreno 5xx series
|
||||||
|
{"540", AdrenoGpu::kAdreno540},
|
||||||
|
{"530", AdrenoGpu::kAdreno530},
|
||||||
|
{"512", AdrenoGpu::kAdreno512},
|
||||||
|
{"510", AdrenoGpu::kAdreno510},
|
||||||
|
{"509", AdrenoGpu::kAdreno509},
|
||||||
|
{"508", AdrenoGpu::kAdreno508},
|
||||||
|
{"506", AdrenoGpu::kAdreno506},
|
||||||
|
{"505", AdrenoGpu::kAdreno505},
|
||||||
|
{"504", AdrenoGpu::kAdreno504},
|
||||||
|
// Adreno 4xx series
|
||||||
|
{"430", AdrenoGpu::kAdreno430},
|
||||||
|
{"420", AdrenoGpu::kAdreno420},
|
||||||
|
{"418", AdrenoGpu::kAdreno418},
|
||||||
|
{"405", AdrenoGpu::kAdreno405},
|
||||||
|
// Adreno 3xx series
|
||||||
|
{"330", AdrenoGpu::kAdreno330},
|
||||||
|
{"320", AdrenoGpu::kAdreno320},
|
||||||
|
{"308", AdrenoGpu::kAdreno308},
|
||||||
|
{"306", AdrenoGpu::kAdreno306},
|
||||||
|
{"305", AdrenoGpu::kAdreno305},
|
||||||
|
{"304", AdrenoGpu::kAdreno304},
|
||||||
|
// Adreno 2xx series
|
||||||
|
{"225", AdrenoGpu::kAdreno225},
|
||||||
|
{"220", AdrenoGpu::kAdreno220},
|
||||||
|
{"205", AdrenoGpu::kAdreno205},
|
||||||
|
{"203", AdrenoGpu::kAdreno203},
|
||||||
|
{"200", AdrenoGpu::kAdreno200},
|
||||||
|
// Adreno 1xx series
|
||||||
|
{"130", AdrenoGpu::kAdreno130},
|
||||||
|
{"120", AdrenoGpu::kAdreno120},
|
||||||
};
|
};
|
||||||
// Adreno 6xx series
|
|
||||||
if (found_model("640")) return GpuModel::ADRENO640;
|
for (const auto& v : kMapping) {
|
||||||
if (found_model("630")) return GpuModel::ADRENO630;
|
if (device_name.find(v.first) != std::string::npos) {
|
||||||
if (found_model("616")) return GpuModel::ADRENO616;
|
return v.second;
|
||||||
if (found_model("615")) return GpuModel::ADRENO615;
|
}
|
||||||
if (found_model("612")) return GpuModel::ADRENO612;
|
}
|
||||||
if (found_model("605")) return GpuModel::ADRENO605;
|
return AdrenoGpu::kUnknown;
|
||||||
// Adreno 5xx series
|
|
||||||
if (found_model("540")) return GpuModel::ADRENO540;
|
|
||||||
if (found_model("530")) return GpuModel::ADRENO530;
|
|
||||||
if (found_model("512")) return GpuModel::ADRENO512;
|
|
||||||
if (found_model("510")) return GpuModel::ADRENO510;
|
|
||||||
if (found_model("509")) return GpuModel::ADRENO509;
|
|
||||||
if (found_model("508")) return GpuModel::ADRENO508;
|
|
||||||
if (found_model("506")) return GpuModel::ADRENO506;
|
|
||||||
if (found_model("505")) return GpuModel::ADRENO505;
|
|
||||||
if (found_model("504")) return GpuModel::ADRENO504;
|
|
||||||
// Adreno 4xx series
|
|
||||||
if (found_model("430")) return GpuModel::ADRENO430;
|
|
||||||
if (found_model("420")) return GpuModel::ADRENO420;
|
|
||||||
if (found_model("418")) return GpuModel::ADRENO418;
|
|
||||||
if (found_model("405")) return GpuModel::ADRENO405;
|
|
||||||
// Adreno 3xx series
|
|
||||||
if (found_model("330")) return GpuModel::ADRENO330;
|
|
||||||
if (found_model("320")) return GpuModel::ADRENO320;
|
|
||||||
if (found_model("308")) return GpuModel::ADRENO308;
|
|
||||||
if (found_model("306")) return GpuModel::ADRENO306;
|
|
||||||
if (found_model("305")) return GpuModel::ADRENO305;
|
|
||||||
if (found_model("304")) return GpuModel::ADRENO304;
|
|
||||||
// Adreno 2xx series
|
|
||||||
if (found_model("225")) return GpuModel::ADRENO225;
|
|
||||||
if (found_model("220")) return GpuModel::ADRENO220;
|
|
||||||
if (found_model("205")) return GpuModel::ADRENO205;
|
|
||||||
if (found_model("203")) return GpuModel::ADRENO203;
|
|
||||||
if (found_model("200")) return GpuModel::ADRENO200;
|
|
||||||
// Adreno 1xx series
|
|
||||||
if (found_model("130")) return GpuModel::ADRENO130;
|
|
||||||
return GpuModel::UNKNOWN;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model,
|
AdrenoInfo::AdrenoInfo(const std::string& device_version)
|
||||||
GpuType* gpu_type) {
|
: adreno_gpu(GetAdrenoGpuVersion(device_version)) {}
|
||||||
std::string lowered = renderer;
|
|
||||||
absl::AsciiStrToLower(&lowered);
|
bool AdrenoInfo::IsAdreno1xx() const {
|
||||||
*gpu_type = GetGpuType(lowered);
|
return adreno_gpu == AdrenoGpu::kAdreno120 ||
|
||||||
*gpu_model =
|
adreno_gpu == AdrenoGpu::kAdreno130;
|
||||||
*gpu_type == GpuType::ADRENO ? GetGpuModel(lowered) : GpuModel::UNKNOWN;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AdrenoInfo::IsAdreno2xx() const {
|
||||||
|
return adreno_gpu == AdrenoGpu::kAdreno200 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno203 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno205 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno220 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno225;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdrenoInfo::IsAdreno3xx() const {
|
||||||
|
return adreno_gpu == AdrenoGpu::kAdreno304 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno305 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno306 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno308 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno320 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno330;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdrenoInfo::IsAdreno4xx() const {
|
||||||
|
return adreno_gpu == AdrenoGpu::kAdreno405 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno418 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno420 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno430;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdrenoInfo::IsAdreno5xx() const {
|
||||||
|
return adreno_gpu == AdrenoGpu::kAdreno504 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno505 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno506 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno508 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno509 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno510 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno512 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno530 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno540;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdrenoInfo::IsAdreno6xx() const {
|
||||||
|
return adreno_gpu == AdrenoGpu::kAdreno605 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno610 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno612 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno615 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno616 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno618 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno620 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno630 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno640 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno650 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno675 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno680 ||
|
||||||
|
adreno_gpu == AdrenoGpu::kAdreno685;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); }
|
||||||
|
|
||||||
|
int AdrenoInfo::GetMaximumWavesCount() const {
|
||||||
|
if (IsAdreno6xx()) {
|
||||||
|
if (adreno_gpu == AdrenoGpu::kAdreno640) {
|
||||||
|
return 30;
|
||||||
|
} else {
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// all other versions not supported
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
|
||||||
|
if (IsAdreno6xx()) {
|
||||||
|
if (adreno_gpu == AdrenoGpu::kAdreno640) {
|
||||||
|
return 128 * 144 * 16;
|
||||||
|
} else if (adreno_gpu == AdrenoGpu::kAdreno650) {
|
||||||
|
return 128 * 64 * 16;
|
||||||
|
} else {
|
||||||
|
return 128 * 96 * 16;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// all other versions not supported
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
|
||||||
|
bool full_wave) const {
|
||||||
|
const int register_usage_per_wave =
|
||||||
|
GetWaveSize(full_wave) * register_footprint_per_tread;
|
||||||
|
const int possible_waves_count =
|
||||||
|
GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
|
||||||
|
return std::min(possible_waves_count, GetMaximumWavesCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
int AdrenoInfo::GetWaveSize(bool full_wave) const {
|
||||||
|
if (IsAdreno6xx()) {
|
||||||
|
return full_wave ? 128 : 64;
|
||||||
|
} else if (IsAdreno5xx() || IsAdreno4xx()) {
|
||||||
|
return full_wave ? 64 : 32;
|
||||||
|
} else {
|
||||||
|
// all other versions not supported
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
|
||||||
|
GpuInfo* gpu_info) {
|
||||||
|
std::string lowered = gpu_description;
|
||||||
|
absl::AsciiStrToLower(&lowered);
|
||||||
|
gpu_info->vendor = GetGpuVendor(lowered);
|
||||||
|
if (gpu_info->IsAdreno()) {
|
||||||
|
gpu_info->adreno_info = AdrenoInfo(lowered);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; }
|
||||||
|
|
||||||
|
bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; }
|
||||||
|
|
||||||
|
bool GpuInfo::IsMali() const { return vendor == GpuVendor::kMali; }
|
||||||
|
|
||||||
|
bool GpuInfo::IsPowerVR() const { return vendor == GpuVendor::kPowerVR; }
|
||||||
|
|
||||||
|
bool GpuInfo::IsNvidia() const { return vendor == GpuVendor::kNvidia; }
|
||||||
|
|
||||||
|
bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; }
|
||||||
|
|
||||||
|
bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; }
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -23,63 +23,113 @@ namespace tflite {
|
|||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
// The VendorID returned by the GPU driver.
|
// The VendorID returned by the GPU driver.
|
||||||
enum class GpuType {
|
enum class GpuVendor {
|
||||||
UNKNOWN,
|
kApple,
|
||||||
APPLE,
|
kQualcomm,
|
||||||
MALI,
|
kMali,
|
||||||
ADRENO,
|
kPowerVR,
|
||||||
POWERVR,
|
kNvidia,
|
||||||
INTEL,
|
kAMD,
|
||||||
AMD,
|
kIntel,
|
||||||
NVIDIA,
|
kUnknown
|
||||||
};
|
};
|
||||||
enum class GpuModel {
|
|
||||||
UNKNOWN,
|
enum class AdrenoGpu {
|
||||||
// Adreno 6xx series
|
// Adreno 6xx series
|
||||||
ADRENO640,
|
kAdreno685,
|
||||||
ADRENO630,
|
kAdreno680,
|
||||||
ADRENO616,
|
kAdreno675,
|
||||||
ADRENO615,
|
kAdreno650,
|
||||||
ADRENO612,
|
kAdreno640,
|
||||||
ADRENO605,
|
kAdreno630,
|
||||||
|
kAdreno620,
|
||||||
|
kAdreno618,
|
||||||
|
kAdreno616,
|
||||||
|
kAdreno615,
|
||||||
|
kAdreno612,
|
||||||
|
kAdreno610,
|
||||||
|
kAdreno605,
|
||||||
// Adreno 5xx series
|
// Adreno 5xx series
|
||||||
ADRENO540,
|
kAdreno540,
|
||||||
ADRENO530,
|
kAdreno530,
|
||||||
ADRENO512,
|
kAdreno512,
|
||||||
ADRENO510,
|
kAdreno510,
|
||||||
ADRENO509,
|
kAdreno509,
|
||||||
ADRENO508,
|
kAdreno508,
|
||||||
ADRENO506,
|
kAdreno506,
|
||||||
ADRENO505,
|
kAdreno505,
|
||||||
ADRENO504,
|
kAdreno504,
|
||||||
// Adreno 4xx series
|
// Adreno 4xx series
|
||||||
ADRENO430,
|
kAdreno430,
|
||||||
ADRENO420,
|
kAdreno420,
|
||||||
ADRENO418,
|
kAdreno418,
|
||||||
ADRENO405,
|
kAdreno405,
|
||||||
// Adreno 3xx series
|
// Adreno 3xx series
|
||||||
ADRENO330,
|
kAdreno330,
|
||||||
ADRENO320,
|
kAdreno320,
|
||||||
ADRENO308,
|
kAdreno308,
|
||||||
ADRENO306,
|
kAdreno306,
|
||||||
ADRENO305,
|
kAdreno305,
|
||||||
ADRENO304,
|
kAdreno304,
|
||||||
// Adreno 2xx series
|
// Adreno 2xx series
|
||||||
ADRENO225,
|
kAdreno225,
|
||||||
ADRENO220,
|
kAdreno220,
|
||||||
ADRENO205,
|
kAdreno205,
|
||||||
ADRENO203,
|
kAdreno203,
|
||||||
ADRENO200,
|
kAdreno200,
|
||||||
// Adreno 1xx series
|
// Adreno 1xx series
|
||||||
ADRENO130,
|
kAdreno130,
|
||||||
|
kAdreno120,
|
||||||
|
kUnknown
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AdrenoInfo {
|
||||||
|
AdrenoInfo() = default;
|
||||||
|
explicit AdrenoInfo(const std::string& device_version);
|
||||||
|
|
||||||
|
AdrenoGpu adreno_gpu;
|
||||||
|
|
||||||
|
bool IsAdreno1xx() const;
|
||||||
|
bool IsAdreno2xx() const;
|
||||||
|
bool IsAdreno3xx() const;
|
||||||
|
bool IsAdreno4xx() const;
|
||||||
|
bool IsAdreno5xx() const;
|
||||||
|
bool IsAdreno6xx() const;
|
||||||
|
bool IsAdreno6xxOrHigher() const;
|
||||||
|
|
||||||
|
// This function returns some not very documented physical parameter of
|
||||||
|
// Adreno6xx GPU.
|
||||||
|
// We obtained it using Snapdragon Profiler.
|
||||||
|
int GetMaximumWavesCount() const;
|
||||||
|
|
||||||
|
// returns amount of register memory per CU(Compute Unit) in bytes.
|
||||||
|
int GetRegisterMemorySizePerComputeUnit() const;
|
||||||
|
|
||||||
|
// returns maximum possible amount of waves based on register usage.
|
||||||
|
int GetMaximumWavesCount(int register_footprint_per_tread,
|
||||||
|
bool full_wave = true) const;
|
||||||
|
|
||||||
|
int GetWaveSize(bool full_wave) const;
|
||||||
|
|
||||||
|
// Not supported on some Adreno devices with specific driver version.
|
||||||
|
// b/131099086
|
||||||
|
bool support_one_layer_texture_array = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GpuInfo {
|
struct GpuInfo {
|
||||||
GpuType type = GpuType::UNKNOWN;
|
bool IsAdreno() const;
|
||||||
|
bool IsApple() const;
|
||||||
|
bool IsMali() const;
|
||||||
|
bool IsPowerVR() const;
|
||||||
|
bool IsNvidia() const;
|
||||||
|
bool IsAMD() const;
|
||||||
|
bool IsIntel() const;
|
||||||
|
|
||||||
|
GpuVendor vendor = GpuVendor::kUnknown;
|
||||||
|
|
||||||
std::string renderer_name;
|
std::string renderer_name;
|
||||||
std::string vendor_name;
|
std::string vendor_name;
|
||||||
std::string version;
|
std::string version;
|
||||||
GpuModel gpu_model;
|
|
||||||
int major_version = -1;
|
int major_version = -1;
|
||||||
int minor_version = -1;
|
int minor_version = -1;
|
||||||
std::vector<std::string> extensions;
|
std::vector<std::string> extensions;
|
||||||
@ -90,6 +140,8 @@ struct GpuInfo {
|
|||||||
int max_texture_size = 0;
|
int max_texture_size = 0;
|
||||||
int max_image_units = 0;
|
int max_image_units = 0;
|
||||||
int max_array_texture_layers = 0;
|
int max_array_texture_layers = 0;
|
||||||
|
|
||||||
|
AdrenoInfo adreno_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
|
inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
|
||||||
@ -97,9 +149,10 @@ inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
|
|||||||
gpu_info.major_version > 3;
|
gpu_info.major_version > 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Analyzes `renderer` and returns matching `GpuType` and `GpuModel`.
|
// Currently it initializes vendor and AdrenoInfo if
|
||||||
void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model,
|
// vendor is kQualcomm
|
||||||
GpuType* gpu_type);
|
void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
|
||||||
|
GpuInfo* gpu_info);
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -86,12 +86,12 @@ class AdrenoCommandQueue : public DefaultCommandQueue {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::unique_ptr<CommandQueue> NewCommandQueue(const GpuInfo& gpu_info) {
|
std::unique_ptr<CommandQueue> NewCommandQueue(const GpuInfo& gpu_info) {
|
||||||
if (gpu_info.type == GpuType::ADRENO) {
|
if (gpu_info.IsAdreno()) {
|
||||||
int flush_every_n = 1;
|
int flush_every_n = 1;
|
||||||
// On Adreno 630 and Adreno 505 there is up to 2x performance boost when
|
// On Adreno 630 and Adreno 505 there is up to 2x performance boost when
|
||||||
// glFlush happens not so often.
|
// glFlush happens not so often.
|
||||||
if (gpu_info.gpu_model == GpuModel::ADRENO630 ||
|
if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630 ||
|
||||||
gpu_info.gpu_model == GpuModel::ADRENO505) {
|
gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno505) {
|
||||||
flush_every_n = 10;
|
flush_every_n = 10;
|
||||||
}
|
}
|
||||||
return absl::make_unique<AdrenoCommandQueue>(flush_every_n);
|
return absl::make_unique<AdrenoCommandQueue>(flush_every_n);
|
||||||
|
@ -65,21 +65,19 @@ bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
|
ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
|
||||||
return gpu_info.type == GpuType::ADRENO ? ObjectType::TEXTURE
|
return gpu_info.IsAdreno() ? ObjectType::TEXTURE : ObjectType::BUFFER;
|
||||||
: ObjectType::BUFFER;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info,
|
ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info,
|
||||||
const CompilationOptions& options) {
|
const CompilationOptions& options) {
|
||||||
if (gpu_info.type != GpuType::ADRENO) {
|
if (!gpu_info.IsAdreno()) {
|
||||||
return ObjectType::BUFFER;
|
return ObjectType::BUFFER;
|
||||||
}
|
}
|
||||||
switch (gpu_info.gpu_model) {
|
if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
|
||||||
case GpuModel::ADRENO630:
|
return ObjectType::TEXTURE;
|
||||||
return ObjectType::TEXTURE;
|
} else {
|
||||||
default:
|
return options.allow_precision_loss ? ObjectType::TEXTURE
|
||||||
return options.allow_precision_loss ? ObjectType::TEXTURE
|
: ObjectType::BUFFER;
|
||||||
: ObjectType::BUFFER;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,13 +31,13 @@ namespace gl {
|
|||||||
|
|
||||||
ShaderCodegen::ShaderCodegen(const CompilationOptions& options,
|
ShaderCodegen::ShaderCodegen(const CompilationOptions& options,
|
||||||
const GpuInfo& gpu_info)
|
const GpuInfo& gpu_info)
|
||||||
: options_(options), gpu_type_(gpu_info.type) {}
|
: options_(options), gpu_type_(gpu_info.vendor) {}
|
||||||
|
|
||||||
absl::Status ShaderCodegen::Build(CompiledNodeAttributes attr,
|
absl::Status ShaderCodegen::Build(CompiledNodeAttributes attr,
|
||||||
ShaderCode* shader_code) const {
|
ShaderCode* shader_code) const {
|
||||||
VariableAccessor variable_accessor(options_.inline_parameters,
|
VariableAccessor variable_accessor(options_.inline_parameters,
|
||||||
options_.vulkan_support);
|
options_.vulkan_support);
|
||||||
ObjectAccessor object_accessor(gpu_type_ == GpuType::MALI,
|
ObjectAccessor object_accessor(gpu_type_ == GpuVendor::kMali,
|
||||||
options_.sampler_textures, &variable_accessor);
|
options_.sampler_textures, &variable_accessor);
|
||||||
|
|
||||||
const auto add_object = [&](const std::string& name, Object&& object) {
|
const auto add_object = [&](const std::string& name, Object&& object) {
|
||||||
|
@ -44,7 +44,7 @@ class ShaderCodegen {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
const CompilationOptions options_;
|
const CompilationOptions options_;
|
||||||
const GpuType gpu_type_;
|
const GpuVendor gpu_type_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gl
|
} // namespace gl
|
||||||
|
@ -89,7 +89,7 @@ absl::Status EglEnvironment::Init() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gpu_info_.type == GpuType::UNKNOWN) {
|
if (gpu_info_.vendor == GpuVendor::kUnknown) {
|
||||||
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
|
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
|
||||||
}
|
}
|
||||||
// TODO(akulik): when do we need ForceSyncTurning?
|
// TODO(akulik): when do we need ForceSyncTurning?
|
||||||
@ -110,7 +110,7 @@ absl::Status EglEnvironment::InitSurfacelessContext() {
|
|||||||
// PowerVR support EGL_KHR_surfaceless_context, but glFenceSync crashes on
|
// PowerVR support EGL_KHR_surfaceless_context, but glFenceSync crashes on
|
||||||
// PowerVR when it is surface-less.
|
// PowerVR when it is surface-less.
|
||||||
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
|
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
|
||||||
if (gpu_info_.type == GpuType::POWERVR) {
|
if (gpu_info_.IsPowerVR()) {
|
||||||
return absl::UnavailableError(
|
return absl::UnavailableError(
|
||||||
"Surface-less context is not properly supported on powervr.");
|
"Surface-less context is not properly supported on powervr.");
|
||||||
}
|
}
|
||||||
|
@ -134,7 +134,7 @@ class Convolution : public NodeShader {
|
|||||||
/*workload=*/uint3(),
|
/*workload=*/uint3(),
|
||||||
/*workgroup=*/
|
/*workgroup=*/
|
||||||
GetIdealWorkgroupIfPossible(
|
GetIdealWorkgroupIfPossible(
|
||||||
ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D,
|
*ctx.gpu_info, OperationType::CONVOLUTION_2D,
|
||||||
HW(weights.h, weights.w), attr.strides, uint3(0, 0, 0),
|
HW(weights.h, weights.w), attr.strides, uint3(0, 0, 0),
|
||||||
OHWI(weights.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2],
|
OHWI(weights.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2],
|
||||||
ctx.input_shapes[0][3])),
|
ctx.input_shapes[0][3])),
|
||||||
@ -149,8 +149,7 @@ class Convolution : public NodeShader {
|
|||||||
int SelectMultiplier(int32_t input_width,
|
int SelectMultiplier(int32_t input_width,
|
||||||
const NodeShader::GenerationContext& ctx) {
|
const NodeShader::GenerationContext& ctx) {
|
||||||
std::vector<int> multipliers = {4, 2};
|
std::vector<int> multipliers = {4, 2};
|
||||||
if (!ctx.compiler_options.allow_precision_loss &&
|
if (!ctx.compiler_options.allow_precision_loss && ctx.gpu_info->IsMali()) {
|
||||||
ctx.gpu_info->type == GpuType::MALI) {
|
|
||||||
multipliers = {2};
|
multipliers = {2};
|
||||||
}
|
}
|
||||||
for (int i : multipliers) {
|
for (int i : multipliers) {
|
||||||
@ -234,7 +233,7 @@ class Convolution1x1 : public NodeShader {
|
|||||||
|
|
||||||
auto dst_depth = DivideRoundUp(ctx.output_shapes[0][3], 4);
|
auto dst_depth = DivideRoundUp(ctx.output_shapes[0][3], 4);
|
||||||
uint3 workgroup = uint3(16, 16, 1);
|
uint3 workgroup = uint3(16, 16, 1);
|
||||||
if (ctx.gpu_info->type == GpuType::ADRENO) {
|
if (ctx.gpu_info->IsAdreno()) {
|
||||||
if (dst_depth >= 2) {
|
if (dst_depth >= 2) {
|
||||||
workgroup = uint3(8, 8, 2);
|
workgroup = uint3(8, 8, 2);
|
||||||
}
|
}
|
||||||
@ -276,7 +275,7 @@ class Convolution1x1 : public NodeShader {
|
|||||||
DivideRoundUp(ctx.output_shapes[0][3], 4)),
|
DivideRoundUp(ctx.output_shapes[0][3], 4)),
|
||||||
/*workgroup=*/
|
/*workgroup=*/
|
||||||
GetIdealWorkgroupIfPossible(
|
GetIdealWorkgroupIfPossible(
|
||||||
ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D,
|
*ctx.gpu_info, OperationType::CONVOLUTION_2D,
|
||||||
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
|
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
|
||||||
workgroup,
|
workgroup,
|
||||||
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
|
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
|
||||||
|
@ -141,7 +141,7 @@ class DepthwiseConvolution : public NodeShader {
|
|||||||
/*workload=*/uint3(),
|
/*workload=*/uint3(),
|
||||||
/*workgroup=*/
|
/*workgroup=*/
|
||||||
GetIdealWorkgroupIfPossible(
|
GetIdealWorkgroupIfPossible(
|
||||||
ctx.gpu_info->gpu_model, OperationType::DEPTHWISE_CONVOLUTION,
|
*ctx.gpu_info, OperationType::DEPTHWISE_CONVOLUTION,
|
||||||
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
|
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
|
||||||
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
|
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
|
||||||
ctx.input_shapes[0][2], ctx.input_shapes[0][3])),
|
ctx.input_shapes[0][2], ctx.input_shapes[0][3])),
|
||||||
|
@ -34,7 +34,7 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
|
|||||||
const GLubyte* renderer_name = glGetString(GL_RENDERER);
|
const GLubyte* renderer_name = glGetString(GL_RENDERER);
|
||||||
if (renderer_name) {
|
if (renderer_name) {
|
||||||
info.renderer_name = reinterpret_cast<const char*>(renderer_name);
|
info.renderer_name = reinterpret_cast<const char*>(renderer_name);
|
||||||
GetGpuModelAndType(info.renderer_name, &info.gpu_model, &info.type);
|
GetGpuInfoFromDeviceDescription(info.renderer_name, &info);
|
||||||
}
|
}
|
||||||
|
|
||||||
const GLubyte* vendor_name = glGetString(GL_VENDOR);
|
const GLubyte* vendor_name = glGetString(GL_VENDOR);
|
||||||
|
@ -81,7 +81,7 @@ class WorkgroupsCalculatorForMali : public WorkgroupsCalculator {
|
|||||||
|
|
||||||
std::unique_ptr<WorkgroupsCalculator> NewDefaultWorkgroupsCalculator(
|
std::unique_ptr<WorkgroupsCalculator> NewDefaultWorkgroupsCalculator(
|
||||||
const GpuInfo& gpu_info) {
|
const GpuInfo& gpu_info) {
|
||||||
if (gpu_info.type == GpuType::MALI) {
|
if (gpu_info.IsMali()) {
|
||||||
return absl::make_unique<WorkgroupsCalculatorForMali>(gpu_info);
|
return absl::make_unique<WorkgroupsCalculatorForMali>(gpu_info);
|
||||||
} else {
|
} else {
|
||||||
return absl::make_unique<DefaultWorkgroupsCalculator>(gpu_info);
|
return absl::make_unique<DefaultWorkgroupsCalculator>(gpu_info);
|
||||||
|
@ -137,40 +137,45 @@ std::vector<IdealByType>* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr;
|
|||||||
std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;
|
std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;
|
||||||
|
|
||||||
// Put all ideal workgroups from the list together.
|
// Put all ideal workgroups from the list together.
|
||||||
const std::map<GpuModel, IdealWorkgroups>* kIdealWorkgroupsInfoPtr =
|
const std::map<AdrenoGpu, IdealWorkgroups>* kIdealAdrenoWorkgroupsInfoPtr =
|
||||||
new std::map<GpuModel, IdealWorkgroups>{
|
new std::map<AdrenoGpu, IdealWorkgroups>{
|
||||||
{GpuModel::ADRENO630,
|
{AdrenoGpu::kAdreno630,
|
||||||
{*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
|
{*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
|
||||||
{GpuModel::ADRENO540, {*kIdealByTypeAdreno540Ptr, {}}},
|
{AdrenoGpu::kAdreno540, {*kIdealByTypeAdreno540Ptr, {}}},
|
||||||
{GpuModel::ADRENO510,
|
{AdrenoGpu::kAdreno510,
|
||||||
{*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
|
{*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
|
||||||
{GpuModel::ADRENO509, {*kIdealByTypeAdreno509Ptr, {}}},
|
{AdrenoGpu::kAdreno509, {*kIdealByTypeAdreno509Ptr, {}}},
|
||||||
{GpuModel::ADRENO508, {*kIdealByTypeAdreno508Ptr, {}}},
|
{AdrenoGpu::kAdreno508, {*kIdealByTypeAdreno508Ptr, {}}},
|
||||||
{GpuModel::ADRENO506, {*kIdealByTypeAdreno506Ptr, {}}},
|
{AdrenoGpu::kAdreno506, {*kIdealByTypeAdreno506Ptr, {}}},
|
||||||
{GpuModel::ADRENO505, {*kIdealByTypeAdreno505Ptr, {}}},
|
{AdrenoGpu::kAdreno505, {*kIdealByTypeAdreno505Ptr, {}}},
|
||||||
{GpuModel::ADRENO418, {*kIdealByTypeAdreno418Ptr, {}}},
|
{AdrenoGpu::kAdreno418, {*kIdealByTypeAdreno418Ptr, {}}},
|
||||||
{GpuModel::ADRENO405, {*kIdealByTypeAdreno405Ptr, {}}},
|
{AdrenoGpu::kAdreno405, {*kIdealByTypeAdreno405Ptr, {}}},
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||||
HW kernel, HW strides, uint3 default_wg,
|
OperationType op_type, HW kernel, HW strides,
|
||||||
OHWI workload) {
|
uint3 default_wg, OHWI workload) {
|
||||||
// Research showed that ideal workgroup approach doesn't work well with
|
// Research showed that ideal workgroup approach doesn't work well with
|
||||||
// convolutions, which have small amount of output channels or output
|
// convolutions, which have small amount of output channels or output
|
||||||
// height/width dimensions
|
// height/width dimensions
|
||||||
if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;
|
if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;
|
||||||
|
|
||||||
|
if (!gpu_info.IsAdreno()) {
|
||||||
|
return default_wg;
|
||||||
|
}
|
||||||
|
auto adreno_gpu_version = gpu_info.adreno_info.adreno_gpu;
|
||||||
|
|
||||||
// If GPU was investigated
|
// If GPU was investigated
|
||||||
if (!kIdealWorkgroupsInfoPtr->count(gpu_model)) {
|
if (!kIdealAdrenoWorkgroupsInfoPtr->count(adreno_gpu_version)) {
|
||||||
return default_wg;
|
return default_wg;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to find the ideal workgroup by the specific operation case, cause they
|
// Try to find the ideal workgroup by the specific operation case, cause they
|
||||||
// are expected to be better tuned than default "by type" cases
|
// are expected to be better tuned than default "by type" cases
|
||||||
for (const auto& specific_case :
|
for (const auto& specific_case :
|
||||||
kIdealWorkgroupsInfoPtr->at(gpu_model).by_case) {
|
kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_case) {
|
||||||
if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
|
if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
|
||||||
return specific_case.ideal_workgroup;
|
return specific_case.ideal_workgroup;
|
||||||
}
|
}
|
||||||
@ -178,7 +183,7 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
|||||||
|
|
||||||
// Try to find the ideal workgroup by the operation type
|
// Try to find the ideal workgroup by the operation type
|
||||||
for (const auto& default_case :
|
for (const auto& default_case :
|
||||||
kIdealWorkgroupsInfoPtr->at(gpu_model).by_type) {
|
kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_type) {
|
||||||
if (default_case.ParamsAccepted(op_type)) {
|
if (default_case.ParamsAccepted(op_type)) {
|
||||||
return default_case.ideal_workgroup;
|
return default_case.ideal_workgroup;
|
||||||
}
|
}
|
||||||
@ -189,9 +194,10 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
|||||||
return default_wg;
|
return default_wg;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||||
HW kernel, HW strides, OHWI workload) {
|
OperationType op_type, HW kernel, HW strides,
|
||||||
return GetIdealWorkgroupIfPossible(gpu_model, op_type, kernel, strides,
|
OHWI workload) {
|
||||||
|
return GetIdealWorkgroupIfPossible(gpu_info, op_type, kernel, strides,
|
||||||
kEmptyWorkgroupSize, workload);
|
kEmptyWorkgroupSize, workload);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,15 +28,16 @@ namespace gl {
|
|||||||
// Picks up the ideal workgroup size for the given convolution case.
|
// Picks up the ideal workgroup size for the given convolution case.
|
||||||
// Ideal workgroup gives top 10% of the possible performance for the given case.
|
// Ideal workgroup gives top 10% of the possible performance for the given case.
|
||||||
// They are received after the workgroup performance research (b/117291356).
|
// They are received after the workgroup performance research (b/117291356).
|
||||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||||
HW kernel, HW strides, OHWI workload);
|
OperationType op_type, HW kernel, HW strides,
|
||||||
|
OHWI workload);
|
||||||
|
|
||||||
// Does the same as the function above. Use this one if your operation can
|
// Does the same as the function above. Use this one if your operation can
|
||||||
// suggest some reasonable workgroup size. It's expected to give better
|
// suggest some reasonable workgroup size. It's expected to give better
|
||||||
// performance than the default workgroup calculator.
|
// performance than the default workgroup calculator.
|
||||||
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
|
uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
|
||||||
HW kernel, HW strides, uint3 default_wg,
|
OperationType op_type, HW kernel, HW strides,
|
||||||
OHWI workload);
|
uint3 default_wg, OHWI workload);
|
||||||
|
|
||||||
} // namespace gl
|
} // namespace gl
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
Loading…
Reference in New Issue
Block a user