GpuInfo extended, added Adreno info.

Making it similar to OpenCL.

PiperOrigin-RevId: 341140554
Change-Id: I46b4476f28046fd4e16f1cc1d6d918032c860446
This commit is contained in:
Raman Sarokin 2020-11-06 16:34:52 -08:00 committed by TensorFlower Gardener
parent f903439d8f
commit 430dbcc5de
13 changed files with 348 additions and 152 deletions

View File

@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
#include <map>
#include <string> #include <string>
#include "absl/strings/ascii.h" #include "absl/strings/ascii.h"
@ -23,79 +24,217 @@ namespace tflite {
namespace gpu { namespace gpu {
namespace { namespace {
GpuType GetGpuType(const std::string& renderer) { GpuVendor GetGpuVendor(const std::string& renderer) {
if (renderer.find("mali") != renderer.npos) { if (renderer.find("mali") != renderer.npos) {
return GpuType::MALI; return GpuVendor::kMali;
} }
if (renderer.find("adreno") != renderer.npos) { if (renderer.find("adreno") != renderer.npos) {
return GpuType::ADRENO; return GpuVendor::kQualcomm;
} }
if (renderer.find("powervr") != renderer.npos) { if (renderer.find("powervr") != renderer.npos) {
return GpuType::POWERVR; return GpuVendor::kPowerVR;
} }
if (renderer.find("intel") != renderer.npos) { if (renderer.find("intel") != renderer.npos) {
return GpuType::INTEL; return GpuVendor::kIntel;
} }
if (renderer.find("nvidia") != renderer.npos) { if (renderer.find("nvidia") != renderer.npos) {
return GpuType::NVIDIA; return GpuVendor::kNvidia;
} }
return GpuType::UNKNOWN; return GpuVendor::kUnknown;
} }
GpuModel GetGpuModel(const std::string& renderer) { AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) {
auto found_model = [&](std::string model) -> bool { const std::map<std::string, AdrenoGpu> kMapping = {
return renderer.find(model) != renderer.npos; // Adreno 6xx series
{"685", AdrenoGpu::kAdreno685},
{"680", AdrenoGpu::kAdreno680},
{"675", AdrenoGpu::kAdreno675},
{"650", AdrenoGpu::kAdreno650},
{"640", AdrenoGpu::kAdreno640},
{"630", AdrenoGpu::kAdreno630},
{"620", AdrenoGpu::kAdreno620},
{"616", AdrenoGpu::kAdreno618},
{"616", AdrenoGpu::kAdreno616},
{"615", AdrenoGpu::kAdreno615},
{"612", AdrenoGpu::kAdreno612},
{"610", AdrenoGpu::kAdreno610},
{"605", AdrenoGpu::kAdreno605},
// Adreno 5xx series
{"540", AdrenoGpu::kAdreno540},
{"530", AdrenoGpu::kAdreno530},
{"512", AdrenoGpu::kAdreno512},
{"510", AdrenoGpu::kAdreno510},
{"509", AdrenoGpu::kAdreno509},
{"508", AdrenoGpu::kAdreno508},
{"506", AdrenoGpu::kAdreno506},
{"505", AdrenoGpu::kAdreno505},
{"504", AdrenoGpu::kAdreno504},
// Adreno 4xx series
{"430", AdrenoGpu::kAdreno430},
{"420", AdrenoGpu::kAdreno420},
{"418", AdrenoGpu::kAdreno418},
{"405", AdrenoGpu::kAdreno405},
// Adreno 3xx series
{"330", AdrenoGpu::kAdreno330},
{"320", AdrenoGpu::kAdreno320},
{"308", AdrenoGpu::kAdreno308},
{"306", AdrenoGpu::kAdreno306},
{"305", AdrenoGpu::kAdreno305},
{"304", AdrenoGpu::kAdreno304},
// Adreno 2xx series
{"225", AdrenoGpu::kAdreno225},
{"220", AdrenoGpu::kAdreno220},
{"205", AdrenoGpu::kAdreno205},
{"203", AdrenoGpu::kAdreno203},
{"200", AdrenoGpu::kAdreno200},
// Adreno 1xx series
{"130", AdrenoGpu::kAdreno130},
{"120", AdrenoGpu::kAdreno120},
}; };
// Adreno 6xx series
if (found_model("640")) return GpuModel::ADRENO640; for (const auto& v : kMapping) {
if (found_model("630")) return GpuModel::ADRENO630; if (device_name.find(v.first) != std::string::npos) {
if (found_model("616")) return GpuModel::ADRENO616; return v.second;
if (found_model("615")) return GpuModel::ADRENO615; }
if (found_model("612")) return GpuModel::ADRENO612; }
if (found_model("605")) return GpuModel::ADRENO605; return AdrenoGpu::kUnknown;
// Adreno 5xx series
if (found_model("540")) return GpuModel::ADRENO540;
if (found_model("530")) return GpuModel::ADRENO530;
if (found_model("512")) return GpuModel::ADRENO512;
if (found_model("510")) return GpuModel::ADRENO510;
if (found_model("509")) return GpuModel::ADRENO509;
if (found_model("508")) return GpuModel::ADRENO508;
if (found_model("506")) return GpuModel::ADRENO506;
if (found_model("505")) return GpuModel::ADRENO505;
if (found_model("504")) return GpuModel::ADRENO504;
// Adreno 4xx series
if (found_model("430")) return GpuModel::ADRENO430;
if (found_model("420")) return GpuModel::ADRENO420;
if (found_model("418")) return GpuModel::ADRENO418;
if (found_model("405")) return GpuModel::ADRENO405;
// Adreno 3xx series
if (found_model("330")) return GpuModel::ADRENO330;
if (found_model("320")) return GpuModel::ADRENO320;
if (found_model("308")) return GpuModel::ADRENO308;
if (found_model("306")) return GpuModel::ADRENO306;
if (found_model("305")) return GpuModel::ADRENO305;
if (found_model("304")) return GpuModel::ADRENO304;
// Adreno 2xx series
if (found_model("225")) return GpuModel::ADRENO225;
if (found_model("220")) return GpuModel::ADRENO220;
if (found_model("205")) return GpuModel::ADRENO205;
if (found_model("203")) return GpuModel::ADRENO203;
if (found_model("200")) return GpuModel::ADRENO200;
// Adreno 1xx series
if (found_model("130")) return GpuModel::ADRENO130;
return GpuModel::UNKNOWN;
} }
} // namespace } // namespace
void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model, AdrenoInfo::AdrenoInfo(const std::string& device_version)
GpuType* gpu_type) { : adreno_gpu(GetAdrenoGpuVersion(device_version)) {}
std::string lowered = renderer;
absl::AsciiStrToLower(&lowered); bool AdrenoInfo::IsAdreno1xx() const {
*gpu_type = GetGpuType(lowered); return adreno_gpu == AdrenoGpu::kAdreno120 ||
*gpu_model = adreno_gpu == AdrenoGpu::kAdreno130;
*gpu_type == GpuType::ADRENO ? GetGpuModel(lowered) : GpuModel::UNKNOWN;
} }
bool AdrenoInfo::IsAdreno2xx() const {
return adreno_gpu == AdrenoGpu::kAdreno200 ||
adreno_gpu == AdrenoGpu::kAdreno203 ||
adreno_gpu == AdrenoGpu::kAdreno205 ||
adreno_gpu == AdrenoGpu::kAdreno220 ||
adreno_gpu == AdrenoGpu::kAdreno225;
}
bool AdrenoInfo::IsAdreno3xx() const {
return adreno_gpu == AdrenoGpu::kAdreno304 ||
adreno_gpu == AdrenoGpu::kAdreno305 ||
adreno_gpu == AdrenoGpu::kAdreno306 ||
adreno_gpu == AdrenoGpu::kAdreno308 ||
adreno_gpu == AdrenoGpu::kAdreno320 ||
adreno_gpu == AdrenoGpu::kAdreno330;
}
bool AdrenoInfo::IsAdreno4xx() const {
return adreno_gpu == AdrenoGpu::kAdreno405 ||
adreno_gpu == AdrenoGpu::kAdreno418 ||
adreno_gpu == AdrenoGpu::kAdreno420 ||
adreno_gpu == AdrenoGpu::kAdreno430;
}
bool AdrenoInfo::IsAdreno5xx() const {
return adreno_gpu == AdrenoGpu::kAdreno504 ||
adreno_gpu == AdrenoGpu::kAdreno505 ||
adreno_gpu == AdrenoGpu::kAdreno506 ||
adreno_gpu == AdrenoGpu::kAdreno508 ||
adreno_gpu == AdrenoGpu::kAdreno509 ||
adreno_gpu == AdrenoGpu::kAdreno510 ||
adreno_gpu == AdrenoGpu::kAdreno512 ||
adreno_gpu == AdrenoGpu::kAdreno530 ||
adreno_gpu == AdrenoGpu::kAdreno540;
}
bool AdrenoInfo::IsAdreno6xx() const {
return adreno_gpu == AdrenoGpu::kAdreno605 ||
adreno_gpu == AdrenoGpu::kAdreno610 ||
adreno_gpu == AdrenoGpu::kAdreno612 ||
adreno_gpu == AdrenoGpu::kAdreno615 ||
adreno_gpu == AdrenoGpu::kAdreno616 ||
adreno_gpu == AdrenoGpu::kAdreno618 ||
adreno_gpu == AdrenoGpu::kAdreno620 ||
adreno_gpu == AdrenoGpu::kAdreno630 ||
adreno_gpu == AdrenoGpu::kAdreno640 ||
adreno_gpu == AdrenoGpu::kAdreno650 ||
adreno_gpu == AdrenoGpu::kAdreno675 ||
adreno_gpu == AdrenoGpu::kAdreno680 ||
adreno_gpu == AdrenoGpu::kAdreno685;
}
bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); }
int AdrenoInfo::GetMaximumWavesCount() const {
if (IsAdreno6xx()) {
if (adreno_gpu == AdrenoGpu::kAdreno640) {
return 30;
} else {
return 16;
}
} else {
// all other versions not supported
return 1;
}
}
int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const {
if (IsAdreno6xx()) {
if (adreno_gpu == AdrenoGpu::kAdreno640) {
return 128 * 144 * 16;
} else if (adreno_gpu == AdrenoGpu::kAdreno650) {
return 128 * 64 * 16;
} else {
return 128 * 96 * 16;
}
} else {
// all other versions not supported
return 1;
}
}
int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread,
bool full_wave) const {
const int register_usage_per_wave =
GetWaveSize(full_wave) * register_footprint_per_tread;
const int possible_waves_count =
GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
return std::min(possible_waves_count, GetMaximumWavesCount());
}
int AdrenoInfo::GetWaveSize(bool full_wave) const {
if (IsAdreno6xx()) {
return full_wave ? 128 : 64;
} else if (IsAdreno5xx() || IsAdreno4xx()) {
return full_wave ? 64 : 32;
} else {
// all other versions not supported
return 1;
}
}
void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
GpuInfo* gpu_info) {
std::string lowered = gpu_description;
absl::AsciiStrToLower(&lowered);
gpu_info->vendor = GetGpuVendor(lowered);
if (gpu_info->IsAdreno()) {
gpu_info->adreno_info = AdrenoInfo(lowered);
}
}
bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; }
bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; }
bool GpuInfo::IsMali() const { return vendor == GpuVendor::kMali; }
bool GpuInfo::IsPowerVR() const { return vendor == GpuVendor::kPowerVR; }
bool GpuInfo::IsNvidia() const { return vendor == GpuVendor::kNvidia; }
bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; }
bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; }
} // namespace gpu } // namespace gpu
} // namespace tflite } // namespace tflite

View File

@ -23,63 +23,113 @@ namespace tflite {
namespace gpu { namespace gpu {
// The VendorID returned by the GPU driver. // The VendorID returned by the GPU driver.
enum class GpuType { enum class GpuVendor {
UNKNOWN, kApple,
APPLE, kQualcomm,
MALI, kMali,
ADRENO, kPowerVR,
POWERVR, kNvidia,
INTEL, kAMD,
AMD, kIntel,
NVIDIA, kUnknown
}; };
enum class GpuModel {
UNKNOWN, enum class AdrenoGpu {
// Adreno 6xx series // Adreno 6xx series
ADRENO640, kAdreno685,
ADRENO630, kAdreno680,
ADRENO616, kAdreno675,
ADRENO615, kAdreno650,
ADRENO612, kAdreno640,
ADRENO605, kAdreno630,
kAdreno620,
kAdreno618,
kAdreno616,
kAdreno615,
kAdreno612,
kAdreno610,
kAdreno605,
// Adreno 5xx series // Adreno 5xx series
ADRENO540, kAdreno540,
ADRENO530, kAdreno530,
ADRENO512, kAdreno512,
ADRENO510, kAdreno510,
ADRENO509, kAdreno509,
ADRENO508, kAdreno508,
ADRENO506, kAdreno506,
ADRENO505, kAdreno505,
ADRENO504, kAdreno504,
// Adreno 4xx series // Adreno 4xx series
ADRENO430, kAdreno430,
ADRENO420, kAdreno420,
ADRENO418, kAdreno418,
ADRENO405, kAdreno405,
// Adreno 3xx series // Adreno 3xx series
ADRENO330, kAdreno330,
ADRENO320, kAdreno320,
ADRENO308, kAdreno308,
ADRENO306, kAdreno306,
ADRENO305, kAdreno305,
ADRENO304, kAdreno304,
// Adreno 2xx series // Adreno 2xx series
ADRENO225, kAdreno225,
ADRENO220, kAdreno220,
ADRENO205, kAdreno205,
ADRENO203, kAdreno203,
ADRENO200, kAdreno200,
// Adreno 1xx series // Adreno 1xx series
ADRENO130, kAdreno130,
kAdreno120,
kUnknown
};
struct AdrenoInfo {
AdrenoInfo() = default;
explicit AdrenoInfo(const std::string& device_version);
AdrenoGpu adreno_gpu;
bool IsAdreno1xx() const;
bool IsAdreno2xx() const;
bool IsAdreno3xx() const;
bool IsAdreno4xx() const;
bool IsAdreno5xx() const;
bool IsAdreno6xx() const;
bool IsAdreno6xxOrHigher() const;
// This function returns some not very documented physical parameter of
// Adreno6xx GPU.
// We obtained it using Snapdragon Profiler.
int GetMaximumWavesCount() const;
// returns amount of register memory per CU(Compute Unit) in bytes.
int GetRegisterMemorySizePerComputeUnit() const;
// returns maximum possible amount of waves based on register usage.
int GetMaximumWavesCount(int register_footprint_per_tread,
bool full_wave = true) const;
int GetWaveSize(bool full_wave) const;
// Not supported on some Adreno devices with specific driver version.
// b/131099086
bool support_one_layer_texture_array = true;
}; };
struct GpuInfo { struct GpuInfo {
GpuType type = GpuType::UNKNOWN; bool IsAdreno() const;
bool IsApple() const;
bool IsMali() const;
bool IsPowerVR() const;
bool IsNvidia() const;
bool IsAMD() const;
bool IsIntel() const;
GpuVendor vendor = GpuVendor::kUnknown;
std::string renderer_name; std::string renderer_name;
std::string vendor_name; std::string vendor_name;
std::string version; std::string version;
GpuModel gpu_model;
int major_version = -1; int major_version = -1;
int minor_version = -1; int minor_version = -1;
std::vector<std::string> extensions; std::vector<std::string> extensions;
@ -90,6 +140,8 @@ struct GpuInfo {
int max_texture_size = 0; int max_texture_size = 0;
int max_image_units = 0; int max_image_units = 0;
int max_array_texture_layers = 0; int max_array_texture_layers = 0;
AdrenoInfo adreno_info;
}; };
inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) { inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
@ -97,9 +149,10 @@ inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
gpu_info.major_version > 3; gpu_info.major_version > 3;
} }
// Analyzes `renderer` and returns matching `GpuType` and `GpuModel`. // Currently it initializes vendor and AdrenoInfo if
void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model, // vendor is kQualcomm
GpuType* gpu_type); void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
GpuInfo* gpu_info);
} // namespace gpu } // namespace gpu
} // namespace tflite } // namespace tflite

View File

@ -86,12 +86,12 @@ class AdrenoCommandQueue : public DefaultCommandQueue {
} // namespace } // namespace
std::unique_ptr<CommandQueue> NewCommandQueue(const GpuInfo& gpu_info) { std::unique_ptr<CommandQueue> NewCommandQueue(const GpuInfo& gpu_info) {
if (gpu_info.type == GpuType::ADRENO) { if (gpu_info.IsAdreno()) {
int flush_every_n = 1; int flush_every_n = 1;
// On Adreno 630 and Adreno 505 there is up to 2x performance boost when // On Adreno 630 and Adreno 505 there is up to 2x performance boost when
// glFlush happens not so often. // glFlush happens not so often.
if (gpu_info.gpu_model == GpuModel::ADRENO630 || if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630 ||
gpu_info.gpu_model == GpuModel::ADRENO505) { gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno505) {
flush_every_n = 10; flush_every_n = 10;
} }
return absl::make_unique<AdrenoCommandQueue>(flush_every_n); return absl::make_unique<AdrenoCommandQueue>(flush_every_n);

View File

@ -65,21 +65,19 @@ bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
} }
ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) { ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
return gpu_info.type == GpuType::ADRENO ? ObjectType::TEXTURE return gpu_info.IsAdreno() ? ObjectType::TEXTURE : ObjectType::BUFFER;
: ObjectType::BUFFER;
} }
ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info, ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info,
const CompilationOptions& options) { const CompilationOptions& options) {
if (gpu_info.type != GpuType::ADRENO) { if (!gpu_info.IsAdreno()) {
return ObjectType::BUFFER; return ObjectType::BUFFER;
} }
switch (gpu_info.gpu_model) { if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
case GpuModel::ADRENO630: return ObjectType::TEXTURE;
return ObjectType::TEXTURE; } else {
default: return options.allow_precision_loss ? ObjectType::TEXTURE
return options.allow_precision_loss ? ObjectType::TEXTURE : ObjectType::BUFFER;
: ObjectType::BUFFER;
} }
} }

View File

@ -31,13 +31,13 @@ namespace gl {
ShaderCodegen::ShaderCodegen(const CompilationOptions& options, ShaderCodegen::ShaderCodegen(const CompilationOptions& options,
const GpuInfo& gpu_info) const GpuInfo& gpu_info)
: options_(options), gpu_type_(gpu_info.type) {} : options_(options), gpu_type_(gpu_info.vendor) {}
absl::Status ShaderCodegen::Build(CompiledNodeAttributes attr, absl::Status ShaderCodegen::Build(CompiledNodeAttributes attr,
ShaderCode* shader_code) const { ShaderCode* shader_code) const {
VariableAccessor variable_accessor(options_.inline_parameters, VariableAccessor variable_accessor(options_.inline_parameters,
options_.vulkan_support); options_.vulkan_support);
ObjectAccessor object_accessor(gpu_type_ == GpuType::MALI, ObjectAccessor object_accessor(gpu_type_ == GpuVendor::kMali,
options_.sampler_textures, &variable_accessor); options_.sampler_textures, &variable_accessor);
const auto add_object = [&](const std::string& name, Object&& object) { const auto add_object = [&](const std::string& name, Object&& object) {

View File

@ -44,7 +44,7 @@ class ShaderCodegen {
private: private:
const CompilationOptions options_; const CompilationOptions options_;
const GpuType gpu_type_; const GpuVendor gpu_type_;
}; };
} // namespace gl } // namespace gl

View File

@ -89,7 +89,7 @@ absl::Status EglEnvironment::Init() {
} }
} }
if (gpu_info_.type == GpuType::UNKNOWN) { if (gpu_info_.vendor == GpuVendor::kUnknown) {
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_)); RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
} }
// TODO(akulik): when do we need ForceSyncTurning? // TODO(akulik): when do we need ForceSyncTurning?
@ -110,7 +110,7 @@ absl::Status EglEnvironment::InitSurfacelessContext() {
// PowerVR support EGL_KHR_surfaceless_context, but glFenceSync crashes on // PowerVR support EGL_KHR_surfaceless_context, but glFenceSync crashes on
// PowerVR when it is surface-less. // PowerVR when it is surface-less.
RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_)); RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
if (gpu_info_.type == GpuType::POWERVR) { if (gpu_info_.IsPowerVR()) {
return absl::UnavailableError( return absl::UnavailableError(
"Surface-less context is not properly supported on powervr."); "Surface-less context is not properly supported on powervr.");
} }

View File

@ -134,7 +134,7 @@ class Convolution : public NodeShader {
/*workload=*/uint3(), /*workload=*/uint3(),
/*workgroup=*/ /*workgroup=*/
GetIdealWorkgroupIfPossible( GetIdealWorkgroupIfPossible(
ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D, *ctx.gpu_info, OperationType::CONVOLUTION_2D,
HW(weights.h, weights.w), attr.strides, uint3(0, 0, 0), HW(weights.h, weights.w), attr.strides, uint3(0, 0, 0),
OHWI(weights.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2], OHWI(weights.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2],
ctx.input_shapes[0][3])), ctx.input_shapes[0][3])),
@ -149,8 +149,7 @@ class Convolution : public NodeShader {
int SelectMultiplier(int32_t input_width, int SelectMultiplier(int32_t input_width,
const NodeShader::GenerationContext& ctx) { const NodeShader::GenerationContext& ctx) {
std::vector<int> multipliers = {4, 2}; std::vector<int> multipliers = {4, 2};
if (!ctx.compiler_options.allow_precision_loss && if (!ctx.compiler_options.allow_precision_loss && ctx.gpu_info->IsMali()) {
ctx.gpu_info->type == GpuType::MALI) {
multipliers = {2}; multipliers = {2};
} }
for (int i : multipliers) { for (int i : multipliers) {
@ -234,7 +233,7 @@ class Convolution1x1 : public NodeShader {
auto dst_depth = DivideRoundUp(ctx.output_shapes[0][3], 4); auto dst_depth = DivideRoundUp(ctx.output_shapes[0][3], 4);
uint3 workgroup = uint3(16, 16, 1); uint3 workgroup = uint3(16, 16, 1);
if (ctx.gpu_info->type == GpuType::ADRENO) { if (ctx.gpu_info->IsAdreno()) {
if (dst_depth >= 2) { if (dst_depth >= 2) {
workgroup = uint3(8, 8, 2); workgroup = uint3(8, 8, 2);
} }
@ -276,7 +275,7 @@ class Convolution1x1 : public NodeShader {
DivideRoundUp(ctx.output_shapes[0][3], 4)), DivideRoundUp(ctx.output_shapes[0][3], 4)),
/*workgroup=*/ /*workgroup=*/
GetIdealWorkgroupIfPossible( GetIdealWorkgroupIfPossible(
ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D, *ctx.gpu_info, OperationType::CONVOLUTION_2D,
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides, HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
workgroup, workgroup,
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1], OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],

View File

@ -141,7 +141,7 @@ class DepthwiseConvolution : public NodeShader {
/*workload=*/uint3(), /*workload=*/uint3(),
/*workgroup=*/ /*workgroup=*/
GetIdealWorkgroupIfPossible( GetIdealWorkgroupIfPossible(
ctx.gpu_info->gpu_model, OperationType::DEPTHWISE_CONVOLUTION, *ctx.gpu_info, OperationType::DEPTHWISE_CONVOLUTION,
HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides, HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides,
OHWI(attr.weights.shape.o, ctx.input_shapes[0][1], OHWI(attr.weights.shape.o, ctx.input_shapes[0][1],
ctx.input_shapes[0][2], ctx.input_shapes[0][3])), ctx.input_shapes[0][2], ctx.input_shapes[0][3])),

View File

@ -34,7 +34,7 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
const GLubyte* renderer_name = glGetString(GL_RENDERER); const GLubyte* renderer_name = glGetString(GL_RENDERER);
if (renderer_name) { if (renderer_name) {
info.renderer_name = reinterpret_cast<const char*>(renderer_name); info.renderer_name = reinterpret_cast<const char*>(renderer_name);
GetGpuModelAndType(info.renderer_name, &info.gpu_model, &info.type); GetGpuInfoFromDeviceDescription(info.renderer_name, &info);
} }
const GLubyte* vendor_name = glGetString(GL_VENDOR); const GLubyte* vendor_name = glGetString(GL_VENDOR);

View File

@ -81,7 +81,7 @@ class WorkgroupsCalculatorForMali : public WorkgroupsCalculator {
std::unique_ptr<WorkgroupsCalculator> NewDefaultWorkgroupsCalculator( std::unique_ptr<WorkgroupsCalculator> NewDefaultWorkgroupsCalculator(
const GpuInfo& gpu_info) { const GpuInfo& gpu_info) {
if (gpu_info.type == GpuType::MALI) { if (gpu_info.IsMali()) {
return absl::make_unique<WorkgroupsCalculatorForMali>(gpu_info); return absl::make_unique<WorkgroupsCalculatorForMali>(gpu_info);
} else { } else {
return absl::make_unique<DefaultWorkgroupsCalculator>(gpu_info); return absl::make_unique<DefaultWorkgroupsCalculator>(gpu_info);

View File

@ -137,40 +137,45 @@ std::vector<IdealByType>* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr;
std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr; std::vector<IdealByType>* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr;
// Put all ideal workgroups from the list together. // Put all ideal workgroups from the list together.
const std::map<GpuModel, IdealWorkgroups>* kIdealWorkgroupsInfoPtr = const std::map<AdrenoGpu, IdealWorkgroups>* kIdealAdrenoWorkgroupsInfoPtr =
new std::map<GpuModel, IdealWorkgroups>{ new std::map<AdrenoGpu, IdealWorkgroups>{
{GpuModel::ADRENO630, {AdrenoGpu::kAdreno630,
{*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}}, {*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}},
{GpuModel::ADRENO540, {*kIdealByTypeAdreno540Ptr, {}}}, {AdrenoGpu::kAdreno540, {*kIdealByTypeAdreno540Ptr, {}}},
{GpuModel::ADRENO510, {AdrenoGpu::kAdreno510,
{*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}}, {*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}},
{GpuModel::ADRENO509, {*kIdealByTypeAdreno509Ptr, {}}}, {AdrenoGpu::kAdreno509, {*kIdealByTypeAdreno509Ptr, {}}},
{GpuModel::ADRENO508, {*kIdealByTypeAdreno508Ptr, {}}}, {AdrenoGpu::kAdreno508, {*kIdealByTypeAdreno508Ptr, {}}},
{GpuModel::ADRENO506, {*kIdealByTypeAdreno506Ptr, {}}}, {AdrenoGpu::kAdreno506, {*kIdealByTypeAdreno506Ptr, {}}},
{GpuModel::ADRENO505, {*kIdealByTypeAdreno505Ptr, {}}}, {AdrenoGpu::kAdreno505, {*kIdealByTypeAdreno505Ptr, {}}},
{GpuModel::ADRENO418, {*kIdealByTypeAdreno418Ptr, {}}}, {AdrenoGpu::kAdreno418, {*kIdealByTypeAdreno418Ptr, {}}},
{GpuModel::ADRENO405, {*kIdealByTypeAdreno405Ptr, {}}}, {AdrenoGpu::kAdreno405, {*kIdealByTypeAdreno405Ptr, {}}},
}; };
} // namespace } // namespace
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
HW kernel, HW strides, uint3 default_wg, OperationType op_type, HW kernel, HW strides,
OHWI workload) { uint3 default_wg, OHWI workload) {
// Research showed that ideal workgroup approach doesn't work well with // Research showed that ideal workgroup approach doesn't work well with
// convolutions, which have small amount of output channels or output // convolutions, which have small amount of output channels or output
// height/width dimensions // height/width dimensions
if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg; if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg;
if (!gpu_info.IsAdreno()) {
return default_wg;
}
auto adreno_gpu_version = gpu_info.adreno_info.adreno_gpu;
// If GPU was investigated // If GPU was investigated
if (!kIdealWorkgroupsInfoPtr->count(gpu_model)) { if (!kIdealAdrenoWorkgroupsInfoPtr->count(adreno_gpu_version)) {
return default_wg; return default_wg;
} }
// Try to find the ideal workgroup by the specific operation case, cause they // Try to find the ideal workgroup by the specific operation case, cause they
// are expected to be better tuned than default "by type" cases // are expected to be better tuned than default "by type" cases
for (const auto& specific_case : for (const auto& specific_case :
kIdealWorkgroupsInfoPtr->at(gpu_model).by_case) { kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_case) {
if (specific_case.ParamsAccepted(op_type, kernel, strides)) { if (specific_case.ParamsAccepted(op_type, kernel, strides)) {
return specific_case.ideal_workgroup; return specific_case.ideal_workgroup;
} }
@ -178,7 +183,7 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
// Try to find the ideal workgroup by the operation type // Try to find the ideal workgroup by the operation type
for (const auto& default_case : for (const auto& default_case :
kIdealWorkgroupsInfoPtr->at(gpu_model).by_type) { kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_type) {
if (default_case.ParamsAccepted(op_type)) { if (default_case.ParamsAccepted(op_type)) {
return default_case.ideal_workgroup; return default_case.ideal_workgroup;
} }
@ -189,9 +194,10 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type,
return default_wg; return default_wg;
} }
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
HW kernel, HW strides, OHWI workload) { OperationType op_type, HW kernel, HW strides,
return GetIdealWorkgroupIfPossible(gpu_model, op_type, kernel, strides, OHWI workload) {
return GetIdealWorkgroupIfPossible(gpu_info, op_type, kernel, strides,
kEmptyWorkgroupSize, workload); kEmptyWorkgroupSize, workload);
} }

View File

@ -28,15 +28,16 @@ namespace gl {
// Picks up the ideal workgroup size for the given convolution case. // Picks up the ideal workgroup size for the given convolution case.
// Ideal workgroup gives top 10% of the possible performance for the given case. // Ideal workgroup gives top 10% of the possible performance for the given case.
// They are received after the workgroup performance research (b/117291356). // They are received after the workgroup performance research (b/117291356).
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
HW kernel, HW strides, OHWI workload); OperationType op_type, HW kernel, HW strides,
OHWI workload);
// Does the same as the function above. Use this one if your operation can // Does the same as the function above. Use this one if your operation can
// suggest some reasonable workgroup size. It's expected to give better // suggest some reasonable workgroup size. It's expected to give better
// performance than the default workgroup calculator. // performance than the default workgroup calculator.
uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info,
HW kernel, HW strides, uint3 default_wg, OperationType op_type, HW kernel, HW strides,
OHWI workload); uint3 default_wg, OHWI workload);
} // namespace gl } // namespace gl
} // namespace gpu } // namespace gpu