diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.cc b/tensorflow/lite/delegates/gpu/common/gpu_info.cc index b56745df971..40a4e4b3c9e 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.cc +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" +#include #include #include "absl/strings/ascii.h" @@ -23,79 +24,217 @@ namespace tflite { namespace gpu { namespace { -GpuType GetGpuType(const std::string& renderer) { +GpuVendor GetGpuVendor(const std::string& renderer) { if (renderer.find("mali") != renderer.npos) { - return GpuType::MALI; + return GpuVendor::kMali; } if (renderer.find("adreno") != renderer.npos) { - return GpuType::ADRENO; + return GpuVendor::kQualcomm; } if (renderer.find("powervr") != renderer.npos) { - return GpuType::POWERVR; + return GpuVendor::kPowerVR; } if (renderer.find("intel") != renderer.npos) { - return GpuType::INTEL; + return GpuVendor::kIntel; } if (renderer.find("nvidia") != renderer.npos) { - return GpuType::NVIDIA; + return GpuVendor::kNvidia; } - return GpuType::UNKNOWN; + return GpuVendor::kUnknown; } -GpuModel GetGpuModel(const std::string& renderer) { - auto found_model = [&](std::string model) -> bool { - return renderer.find(model) != renderer.npos; +AdrenoGpu GetAdrenoGpuVersion(const std::string& device_name) { + const std::map kMapping = { + // Adreno 6xx series + {"685", AdrenoGpu::kAdreno685}, + {"680", AdrenoGpu::kAdreno680}, + {"675", AdrenoGpu::kAdreno675}, + {"650", AdrenoGpu::kAdreno650}, + {"640", AdrenoGpu::kAdreno640}, + {"630", AdrenoGpu::kAdreno630}, + {"620", AdrenoGpu::kAdreno620}, + {"616", AdrenoGpu::kAdreno618}, + {"616", AdrenoGpu::kAdreno616}, + {"615", AdrenoGpu::kAdreno615}, + {"612", AdrenoGpu::kAdreno612}, + {"610", AdrenoGpu::kAdreno610}, + {"605", AdrenoGpu::kAdreno605}, + // Adreno 5xx series + {"540", AdrenoGpu::kAdreno540}, + {"530", AdrenoGpu::kAdreno530}, + {"512", AdrenoGpu::kAdreno512}, + {"510", AdrenoGpu::kAdreno510}, + {"509", AdrenoGpu::kAdreno509}, + {"508", AdrenoGpu::kAdreno508}, + {"506", AdrenoGpu::kAdreno506}, + {"505", AdrenoGpu::kAdreno505}, + {"504", AdrenoGpu::kAdreno504}, + // Adreno 4xx series + {"430", AdrenoGpu::kAdreno430}, + {"420", AdrenoGpu::kAdreno420}, + {"418", AdrenoGpu::kAdreno418}, + {"405", AdrenoGpu::kAdreno405}, + // Adreno 3xx series + {"330", AdrenoGpu::kAdreno330}, + {"320", AdrenoGpu::kAdreno320}, + {"308", AdrenoGpu::kAdreno308}, + {"306", AdrenoGpu::kAdreno306}, + {"305", AdrenoGpu::kAdreno305}, + {"304", AdrenoGpu::kAdreno304}, + // Adreno 2xx series + {"225", AdrenoGpu::kAdreno225}, + {"220", AdrenoGpu::kAdreno220}, + {"205", AdrenoGpu::kAdreno205}, + {"203", AdrenoGpu::kAdreno203}, + {"200", AdrenoGpu::kAdreno200}, + // Adreno 1xx series + {"130", AdrenoGpu::kAdreno130}, + {"120", AdrenoGpu::kAdreno120}, }; - // Adreno 6xx series - if (found_model("640")) return GpuModel::ADRENO640; - if (found_model("630")) return GpuModel::ADRENO630; - if (found_model("616")) return GpuModel::ADRENO616; - if (found_model("615")) return GpuModel::ADRENO615; - if (found_model("612")) return GpuModel::ADRENO612; - if (found_model("605")) return GpuModel::ADRENO605; - // Adreno 5xx series - if (found_model("540")) return GpuModel::ADRENO540; - if (found_model("530")) return GpuModel::ADRENO530; - if (found_model("512")) return GpuModel::ADRENO512; - if (found_model("510")) return GpuModel::ADRENO510; - if (found_model("509")) return GpuModel::ADRENO509; - if (found_model("508")) return GpuModel::ADRENO508; - if (found_model("506")) return GpuModel::ADRENO506; - if (found_model("505")) return GpuModel::ADRENO505; - if (found_model("504")) return GpuModel::ADRENO504; - // Adreno 4xx series - if (found_model("430")) return GpuModel::ADRENO430; - if (found_model("420")) return GpuModel::ADRENO420; - if (found_model("418")) return GpuModel::ADRENO418; - if (found_model("405")) return GpuModel::ADRENO405; - // Adreno 3xx series - if (found_model("330")) return GpuModel::ADRENO330; - if (found_model("320")) return GpuModel::ADRENO320; - if (found_model("308")) return GpuModel::ADRENO308; - if (found_model("306")) return GpuModel::ADRENO306; - if (found_model("305")) return GpuModel::ADRENO305; - if (found_model("304")) return GpuModel::ADRENO304; - // Adreno 2xx series - if (found_model("225")) return GpuModel::ADRENO225; - if (found_model("220")) return GpuModel::ADRENO220; - if (found_model("205")) return GpuModel::ADRENO205; - if (found_model("203")) return GpuModel::ADRENO203; - if (found_model("200")) return GpuModel::ADRENO200; - // Adreno 1xx series - if (found_model("130")) return GpuModel::ADRENO130; - return GpuModel::UNKNOWN; + + for (const auto& v : kMapping) { + if (device_name.find(v.first) != std::string::npos) { + return v.second; + } + } + return AdrenoGpu::kUnknown; } } // namespace -void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model, - GpuType* gpu_type) { - std::string lowered = renderer; - absl::AsciiStrToLower(&lowered); - *gpu_type = GetGpuType(lowered); - *gpu_model = - *gpu_type == GpuType::ADRENO ? GetGpuModel(lowered) : GpuModel::UNKNOWN; +AdrenoInfo::AdrenoInfo(const std::string& device_version) + : adreno_gpu(GetAdrenoGpuVersion(device_version)) {} + +bool AdrenoInfo::IsAdreno1xx() const { + return adreno_gpu == AdrenoGpu::kAdreno120 || + adreno_gpu == AdrenoGpu::kAdreno130; } +bool AdrenoInfo::IsAdreno2xx() const { + return adreno_gpu == AdrenoGpu::kAdreno200 || + adreno_gpu == AdrenoGpu::kAdreno203 || + adreno_gpu == AdrenoGpu::kAdreno205 || + adreno_gpu == AdrenoGpu::kAdreno220 || + adreno_gpu == AdrenoGpu::kAdreno225; +} + +bool AdrenoInfo::IsAdreno3xx() const { + return adreno_gpu == AdrenoGpu::kAdreno304 || + adreno_gpu == AdrenoGpu::kAdreno305 || + adreno_gpu == AdrenoGpu::kAdreno306 || + adreno_gpu == AdrenoGpu::kAdreno308 || + adreno_gpu == AdrenoGpu::kAdreno320 || + adreno_gpu == AdrenoGpu::kAdreno330; +} + +bool AdrenoInfo::IsAdreno4xx() const { + return adreno_gpu == AdrenoGpu::kAdreno405 || + adreno_gpu == AdrenoGpu::kAdreno418 || + adreno_gpu == AdrenoGpu::kAdreno420 || + adreno_gpu == AdrenoGpu::kAdreno430; +} + +bool AdrenoInfo::IsAdreno5xx() const { + return adreno_gpu == AdrenoGpu::kAdreno504 || + adreno_gpu == AdrenoGpu::kAdreno505 || + adreno_gpu == AdrenoGpu::kAdreno506 || + adreno_gpu == AdrenoGpu::kAdreno508 || + adreno_gpu == AdrenoGpu::kAdreno509 || + adreno_gpu == AdrenoGpu::kAdreno510 || + adreno_gpu == AdrenoGpu::kAdreno512 || + adreno_gpu == AdrenoGpu::kAdreno530 || + adreno_gpu == AdrenoGpu::kAdreno540; +} + +bool AdrenoInfo::IsAdreno6xx() const { + return adreno_gpu == AdrenoGpu::kAdreno605 || + adreno_gpu == AdrenoGpu::kAdreno610 || + adreno_gpu == AdrenoGpu::kAdreno612 || + adreno_gpu == AdrenoGpu::kAdreno615 || + adreno_gpu == AdrenoGpu::kAdreno616 || + adreno_gpu == AdrenoGpu::kAdreno618 || + adreno_gpu == AdrenoGpu::kAdreno620 || + adreno_gpu == AdrenoGpu::kAdreno630 || + adreno_gpu == AdrenoGpu::kAdreno640 || + adreno_gpu == AdrenoGpu::kAdreno650 || + adreno_gpu == AdrenoGpu::kAdreno675 || + adreno_gpu == AdrenoGpu::kAdreno680 || + adreno_gpu == AdrenoGpu::kAdreno685; +} + +bool AdrenoInfo::IsAdreno6xxOrHigher() const { return IsAdreno6xx(); } + +int AdrenoInfo::GetMaximumWavesCount() const { + if (IsAdreno6xx()) { + if (adreno_gpu == AdrenoGpu::kAdreno640) { + return 30; + } else { + return 16; + } + } else { + // all other versions not supported + return 1; + } +} + +int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const { + if (IsAdreno6xx()) { + if (adreno_gpu == AdrenoGpu::kAdreno640) { + return 128 * 144 * 16; + } else if (adreno_gpu == AdrenoGpu::kAdreno650) { + return 128 * 64 * 16; + } else { + return 128 * 96 * 16; + } + } else { + // all other versions not supported + return 1; + } +} + +int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, + bool full_wave) const { + const int register_usage_per_wave = + GetWaveSize(full_wave) * register_footprint_per_tread; + const int possible_waves_count = + GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave; + return std::min(possible_waves_count, GetMaximumWavesCount()); +} + +int AdrenoInfo::GetWaveSize(bool full_wave) const { + if (IsAdreno6xx()) { + return full_wave ? 128 : 64; + } else if (IsAdreno5xx() || IsAdreno4xx()) { + return full_wave ? 64 : 32; + } else { + // all other versions not supported + return 1; + } +} + +void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, + GpuInfo* gpu_info) { + std::string lowered = gpu_description; + absl::AsciiStrToLower(&lowered); + gpu_info->vendor = GetGpuVendor(lowered); + if (gpu_info->IsAdreno()) { + gpu_info->adreno_info = AdrenoInfo(lowered); + } +} + +bool GpuInfo::IsAdreno() const { return vendor == GpuVendor::kQualcomm; } + +bool GpuInfo::IsApple() const { return vendor == GpuVendor::kApple; } + +bool GpuInfo::IsMali() const { return vendor == GpuVendor::kMali; } + +bool GpuInfo::IsPowerVR() const { return vendor == GpuVendor::kPowerVR; } + +bool GpuInfo::IsNvidia() const { return vendor == GpuVendor::kNvidia; } + +bool GpuInfo::IsAMD() const { return vendor == GpuVendor::kAMD; } + +bool GpuInfo::IsIntel() const { return vendor == GpuVendor::kIntel; } + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.h b/tensorflow/lite/delegates/gpu/common/gpu_info.h index fa50dc99d4f..053021bfe2a 100644 --- a/tensorflow/lite/delegates/gpu/common/gpu_info.h +++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h @@ -23,63 +23,113 @@ namespace tflite { namespace gpu { // The VendorID returned by the GPU driver. -enum class GpuType { - UNKNOWN, - APPLE, - MALI, - ADRENO, - POWERVR, - INTEL, - AMD, - NVIDIA, +enum class GpuVendor { + kApple, + kQualcomm, + kMali, + kPowerVR, + kNvidia, + kAMD, + kIntel, + kUnknown }; -enum class GpuModel { - UNKNOWN, + +enum class AdrenoGpu { // Adreno 6xx series - ADRENO640, - ADRENO630, - ADRENO616, - ADRENO615, - ADRENO612, - ADRENO605, + kAdreno685, + kAdreno680, + kAdreno675, + kAdreno650, + kAdreno640, + kAdreno630, + kAdreno620, + kAdreno618, + kAdreno616, + kAdreno615, + kAdreno612, + kAdreno610, + kAdreno605, // Adreno 5xx series - ADRENO540, - ADRENO530, - ADRENO512, - ADRENO510, - ADRENO509, - ADRENO508, - ADRENO506, - ADRENO505, - ADRENO504, + kAdreno540, + kAdreno530, + kAdreno512, + kAdreno510, + kAdreno509, + kAdreno508, + kAdreno506, + kAdreno505, + kAdreno504, // Adreno 4xx series - ADRENO430, - ADRENO420, - ADRENO418, - ADRENO405, + kAdreno430, + kAdreno420, + kAdreno418, + kAdreno405, // Adreno 3xx series - ADRENO330, - ADRENO320, - ADRENO308, - ADRENO306, - ADRENO305, - ADRENO304, + kAdreno330, + kAdreno320, + kAdreno308, + kAdreno306, + kAdreno305, + kAdreno304, // Adreno 2xx series - ADRENO225, - ADRENO220, - ADRENO205, - ADRENO203, - ADRENO200, + kAdreno225, + kAdreno220, + kAdreno205, + kAdreno203, + kAdreno200, // Adreno 1xx series - ADRENO130, + kAdreno130, + kAdreno120, + kUnknown +}; + +struct AdrenoInfo { + AdrenoInfo() = default; + explicit AdrenoInfo(const std::string& device_version); + + AdrenoGpu adreno_gpu; + + bool IsAdreno1xx() const; + bool IsAdreno2xx() const; + bool IsAdreno3xx() const; + bool IsAdreno4xx() const; + bool IsAdreno5xx() const; + bool IsAdreno6xx() const; + bool IsAdreno6xxOrHigher() const; + + // This function returns some not very documented physical parameter of + // Adreno6xx GPU. + // We obtained it using Snapdragon Profiler. + int GetMaximumWavesCount() const; + + // returns amount of register memory per CU(Compute Unit) in bytes. + int GetRegisterMemorySizePerComputeUnit() const; + + // returns maximum possible amount of waves based on register usage. + int GetMaximumWavesCount(int register_footprint_per_tread, + bool full_wave = true) const; + + int GetWaveSize(bool full_wave) const; + + // Not supported on some Adreno devices with specific driver version. + // b/131099086 + bool support_one_layer_texture_array = true; }; struct GpuInfo { - GpuType type = GpuType::UNKNOWN; + bool IsAdreno() const; + bool IsApple() const; + bool IsMali() const; + bool IsPowerVR() const; + bool IsNvidia() const; + bool IsAMD() const; + bool IsIntel() const; + + GpuVendor vendor = GpuVendor::kUnknown; + std::string renderer_name; std::string vendor_name; std::string version; - GpuModel gpu_model; int major_version = -1; int minor_version = -1; std::vector extensions; @@ -90,6 +140,8 @@ struct GpuInfo { int max_texture_size = 0; int max_image_units = 0; int max_array_texture_layers = 0; + + AdrenoInfo adreno_info; }; inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) { @@ -97,9 +149,10 @@ inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) { gpu_info.major_version > 3; } -// Analyzes `renderer` and returns matching `GpuType` and `GpuModel`. -void GetGpuModelAndType(const std::string& renderer, GpuModel* gpu_model, - GpuType* gpu_type); +// Currently it initializes vendor and AdrenoInfo if +// vendor is kQualcomm +void GetGpuInfoFromDeviceDescription(const std::string& gpu_description, + GpuInfo* gpu_info); } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/gl/command_queue.cc b/tensorflow/lite/delegates/gpu/gl/command_queue.cc index 8500a50859c..4dccce97568 100644 --- a/tensorflow/lite/delegates/gpu/gl/command_queue.cc +++ b/tensorflow/lite/delegates/gpu/gl/command_queue.cc @@ -86,12 +86,12 @@ class AdrenoCommandQueue : public DefaultCommandQueue { } // namespace std::unique_ptr NewCommandQueue(const GpuInfo& gpu_info) { - if (gpu_info.type == GpuType::ADRENO) { + if (gpu_info.IsAdreno()) { int flush_every_n = 1; // On Adreno 630 and Adreno 505 there is up to 2x performance boost when // glFlush happens not so often. - if (gpu_info.gpu_model == GpuModel::ADRENO630 || - gpu_info.gpu_model == GpuModel::ADRENO505) { + if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630 || + gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno505) { flush_every_n = 10; } return absl::make_unique(flush_every_n); diff --git a/tensorflow/lite/delegates/gpu/gl/compiler.cc b/tensorflow/lite/delegates/gpu/gl/compiler.cc index eba25171ca3..fc2adbdfcc6 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler.cc @@ -65,21 +65,19 @@ bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) { } ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) { - return gpu_info.type == GpuType::ADRENO ? ObjectType::TEXTURE - : ObjectType::BUFFER; + return gpu_info.IsAdreno() ? ObjectType::TEXTURE : ObjectType::BUFFER; } ObjectType ChooseFastestRefObjectType(const GpuInfo& gpu_info, const CompilationOptions& options) { - if (gpu_info.type != GpuType::ADRENO) { + if (!gpu_info.IsAdreno()) { return ObjectType::BUFFER; } - switch (gpu_info.gpu_model) { - case GpuModel::ADRENO630: - return ObjectType::TEXTURE; - default: - return options.allow_precision_loss ? ObjectType::TEXTURE - : ObjectType::BUFFER; + if (gpu_info.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) { + return ObjectType::TEXTURE; + } else { + return options.allow_precision_loss ? ObjectType::TEXTURE + : ObjectType::BUFFER; } } diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc b/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc index 34c24edc5a3..29d1616de98 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.cc @@ -31,13 +31,13 @@ namespace gl { ShaderCodegen::ShaderCodegen(const CompilationOptions& options, const GpuInfo& gpu_info) - : options_(options), gpu_type_(gpu_info.type) {} + : options_(options), gpu_type_(gpu_info.vendor) {} absl::Status ShaderCodegen::Build(CompiledNodeAttributes attr, ShaderCode* shader_code) const { VariableAccessor variable_accessor(options_.inline_parameters, options_.vulkan_support); - ObjectAccessor object_accessor(gpu_type_ == GpuType::MALI, + ObjectAccessor object_accessor(gpu_type_ == GpuVendor::kMali, options_.sampler_textures, &variable_accessor); const auto add_object = [&](const std::string& name, Object&& object) { diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.h b/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.h index 12d2708d221..492d965b6eb 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.h +++ b/tensorflow/lite/delegates/gpu/gl/compiler/shader_codegen.h @@ -44,7 +44,7 @@ class ShaderCodegen { private: const CompilationOptions options_; - const GpuType gpu_type_; + const GpuVendor gpu_type_; }; } // namespace gl diff --git a/tensorflow/lite/delegates/gpu/gl/egl_environment.cc b/tensorflow/lite/delegates/gpu/gl/egl_environment.cc index 8ae75acd933..8debf2eab42 100644 --- a/tensorflow/lite/delegates/gpu/gl/egl_environment.cc +++ b/tensorflow/lite/delegates/gpu/gl/egl_environment.cc @@ -89,7 +89,7 @@ absl::Status EglEnvironment::Init() { } } - if (gpu_info_.type == GpuType::UNKNOWN) { + if (gpu_info_.vendor == GpuVendor::kUnknown) { RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_)); } // TODO(akulik): when do we need ForceSyncTurning? @@ -110,7 +110,7 @@ absl::Status EglEnvironment::InitSurfacelessContext() { // PowerVR support EGL_KHR_surfaceless_context, but glFenceSync crashes on // PowerVR when it is surface-less. RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_)); - if (gpu_info_.type == GpuType::POWERVR) { + if (gpu_info_.IsPowerVR()) { return absl::UnavailableError( "Surface-less context is not properly supported on powervr."); } diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc index 990d86436fa..f6d8247b96a 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc @@ -134,7 +134,7 @@ class Convolution : public NodeShader { /*workload=*/uint3(), /*workgroup=*/ GetIdealWorkgroupIfPossible( - ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D, + *ctx.gpu_info, OperationType::CONVOLUTION_2D, HW(weights.h, weights.w), attr.strides, uint3(0, 0, 0), OHWI(weights.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2], ctx.input_shapes[0][3])), @@ -149,8 +149,7 @@ class Convolution : public NodeShader { int SelectMultiplier(int32_t input_width, const NodeShader::GenerationContext& ctx) { std::vector multipliers = {4, 2}; - if (!ctx.compiler_options.allow_precision_loss && - ctx.gpu_info->type == GpuType::MALI) { + if (!ctx.compiler_options.allow_precision_loss && ctx.gpu_info->IsMali()) { multipliers = {2}; } for (int i : multipliers) { @@ -234,7 +233,7 @@ class Convolution1x1 : public NodeShader { auto dst_depth = DivideRoundUp(ctx.output_shapes[0][3], 4); uint3 workgroup = uint3(16, 16, 1); - if (ctx.gpu_info->type == GpuType::ADRENO) { + if (ctx.gpu_info->IsAdreno()) { if (dst_depth >= 2) { workgroup = uint3(8, 8, 2); } @@ -276,7 +275,7 @@ class Convolution1x1 : public NodeShader { DivideRoundUp(ctx.output_shapes[0][3], 4)), /*workgroup=*/ GetIdealWorkgroupIfPossible( - ctx.gpu_info->gpu_model, OperationType::CONVOLUTION_2D, + *ctx.gpu_info, OperationType::CONVOLUTION_2D, HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides, workgroup, OHWI(attr.weights.shape.o, ctx.input_shapes[0][1], diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc index ceda5b68ca8..b6c65c47c6a 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc @@ -141,7 +141,7 @@ class DepthwiseConvolution : public NodeShader { /*workload=*/uint3(), /*workgroup=*/ GetIdealWorkgroupIfPossible( - ctx.gpu_info->gpu_model, OperationType::DEPTHWISE_CONVOLUTION, + *ctx.gpu_info, OperationType::DEPTHWISE_CONVOLUTION, HW(attr.weights.shape.h, attr.weights.shape.w), attr.strides, OHWI(attr.weights.shape.o, ctx.input_shapes[0][1], ctx.input_shapes[0][2], ctx.input_shapes[0][3])), diff --git a/tensorflow/lite/delegates/gpu/gl/request_gpu_info.cc b/tensorflow/lite/delegates/gpu/gl/request_gpu_info.cc index 0769a5014b4..ea7e9f5169d 100644 --- a/tensorflow/lite/delegates/gpu/gl/request_gpu_info.cc +++ b/tensorflow/lite/delegates/gpu/gl/request_gpu_info.cc @@ -34,7 +34,7 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) { const GLubyte* renderer_name = glGetString(GL_RENDERER); if (renderer_name) { info.renderer_name = reinterpret_cast(renderer_name); - GetGpuModelAndType(info.renderer_name, &info.gpu_model, &info.type); + GetGpuInfoFromDeviceDescription(info.renderer_name, &info); } const GLubyte* vendor_name = glGetString(GL_VENDOR); diff --git a/tensorflow/lite/delegates/gpu/gl/workgroups/default_calculator.cc b/tensorflow/lite/delegates/gpu/gl/workgroups/default_calculator.cc index 7b6358e3a95..c14fa1795d7 100644 --- a/tensorflow/lite/delegates/gpu/gl/workgroups/default_calculator.cc +++ b/tensorflow/lite/delegates/gpu/gl/workgroups/default_calculator.cc @@ -81,7 +81,7 @@ class WorkgroupsCalculatorForMali : public WorkgroupsCalculator { std::unique_ptr NewDefaultWorkgroupsCalculator( const GpuInfo& gpu_info) { - if (gpu_info.type == GpuType::MALI) { + if (gpu_info.IsMali()) { return absl::make_unique(gpu_info); } else { return absl::make_unique(gpu_info); diff --git a/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.cc b/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.cc index b67cc36c903..801824398e2 100644 --- a/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.cc +++ b/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.cc @@ -137,40 +137,45 @@ std::vector* kIdealByTypeAdreno418Ptr = kIdealByTypeAdreno508Ptr; std::vector* kIdealByTypeAdreno405Ptr = kIdealByTypeAdreno508Ptr; // Put all ideal workgroups from the list together. -const std::map* kIdealWorkgroupsInfoPtr = - new std::map{ - {GpuModel::ADRENO630, +const std::map* kIdealAdrenoWorkgroupsInfoPtr = + new std::map{ + {AdrenoGpu::kAdreno630, {*kIdealByTypeAdreno630Ptr, *kIdealByCaseAdreno630Ptr}}, - {GpuModel::ADRENO540, {*kIdealByTypeAdreno540Ptr, {}}}, - {GpuModel::ADRENO510, + {AdrenoGpu::kAdreno540, {*kIdealByTypeAdreno540Ptr, {}}}, + {AdrenoGpu::kAdreno510, {*kIdealByTypeAdreno510Ptr, *kIdealByCaseAdreno510Ptr}}, - {GpuModel::ADRENO509, {*kIdealByTypeAdreno509Ptr, {}}}, - {GpuModel::ADRENO508, {*kIdealByTypeAdreno508Ptr, {}}}, - {GpuModel::ADRENO506, {*kIdealByTypeAdreno506Ptr, {}}}, - {GpuModel::ADRENO505, {*kIdealByTypeAdreno505Ptr, {}}}, - {GpuModel::ADRENO418, {*kIdealByTypeAdreno418Ptr, {}}}, - {GpuModel::ADRENO405, {*kIdealByTypeAdreno405Ptr, {}}}, + {AdrenoGpu::kAdreno509, {*kIdealByTypeAdreno509Ptr, {}}}, + {AdrenoGpu::kAdreno508, {*kIdealByTypeAdreno508Ptr, {}}}, + {AdrenoGpu::kAdreno506, {*kIdealByTypeAdreno506Ptr, {}}}, + {AdrenoGpu::kAdreno505, {*kIdealByTypeAdreno505Ptr, {}}}, + {AdrenoGpu::kAdreno418, {*kIdealByTypeAdreno418Ptr, {}}}, + {AdrenoGpu::kAdreno405, {*kIdealByTypeAdreno405Ptr, {}}}, }; } // namespace -uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, - HW kernel, HW strides, uint3 default_wg, - OHWI workload) { +uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info, + OperationType op_type, HW kernel, HW strides, + uint3 default_wg, OHWI workload) { // Research showed that ideal workgroup approach doesn't work well with // convolutions, which have small amount of output channels or output // height/width dimensions if (workload.o < 32 || workload.h <= 5 || workload.w <= 5) return default_wg; + if (!gpu_info.IsAdreno()) { + return default_wg; + } + auto adreno_gpu_version = gpu_info.adreno_info.adreno_gpu; + // If GPU was investigated - if (!kIdealWorkgroupsInfoPtr->count(gpu_model)) { + if (!kIdealAdrenoWorkgroupsInfoPtr->count(adreno_gpu_version)) { return default_wg; } // Try to find the ideal workgroup by the specific operation case, cause they // are expected to be better tuned than default "by type" cases for (const auto& specific_case : - kIdealWorkgroupsInfoPtr->at(gpu_model).by_case) { + kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_case) { if (specific_case.ParamsAccepted(op_type, kernel, strides)) { return specific_case.ideal_workgroup; } @@ -178,7 +183,7 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, // Try to find the ideal workgroup by the operation type for (const auto& default_case : - kIdealWorkgroupsInfoPtr->at(gpu_model).by_type) { + kIdealAdrenoWorkgroupsInfoPtr->at(adreno_gpu_version).by_type) { if (default_case.ParamsAccepted(op_type)) { return default_case.ideal_workgroup; } @@ -189,9 +194,10 @@ uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, return default_wg; } -uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, - HW kernel, HW strides, OHWI workload) { - return GetIdealWorkgroupIfPossible(gpu_model, op_type, kernel, strides, +uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info, + OperationType op_type, HW kernel, HW strides, + OHWI workload) { + return GetIdealWorkgroupIfPossible(gpu_info, op_type, kernel, strides, kEmptyWorkgroupSize, workload); } diff --git a/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h b/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h index 34f628cb7cf..57d4ffde6db 100644 --- a/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h +++ b/tensorflow/lite/delegates/gpu/gl/workgroups/ideal_workgroup_picker.h @@ -28,15 +28,16 @@ namespace gl { // Picks up the ideal workgroup size for the given convolution case. // Ideal workgroup gives top 10% of the possible performance for the given case. // They are received after the workgroup performance research (b/117291356). -uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, - HW kernel, HW strides, OHWI workload); +uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info, + OperationType op_type, HW kernel, HW strides, + OHWI workload); // Does the same as the function above. Use this one if your operation can // suggest some reasonable workgroup size. It's expected to give better // performance than the default workgroup calculator. -uint3 GetIdealWorkgroupIfPossible(GpuModel gpu_model, OperationType op_type, - HW kernel, HW strides, uint3 default_wg, - OHWI workload); +uint3 GetIdealWorkgroupIfPossible(const GpuInfo& gpu_info, + OperationType op_type, HW kernel, HW strides, + uint3 default_wg, OHWI workload); } // namespace gl } // namespace gpu