Some info(max_work_group_invocations) moved from GpuInfo to OpenGlInfo.
PiperOrigin-RevId: 343506144 Change-Id: I0fc14c2c352c9616d80f0ba0a9a71c2b9317d713
This commit is contained in:
parent
766bc047d8
commit
cbadb3c5ce
@ -393,6 +393,54 @@ int GpuInfo::GetComputeUnitsCount() const {
|
||||
}
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForX() const { return max_work_group_size[0]; }
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForY() const { return max_work_group_size[1]; }
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupSizeForZ() const { return max_work_group_size[2]; }
|
||||
|
||||
int GpuInfo::GetMaxWorkGroupTotalSize() const {
|
||||
if (IsApiOpenGl()) {
|
||||
return opengl_info.max_work_group_invocations;
|
||||
} else if (IsApiVulkan()) {
|
||||
return vulkan_info.max_compute_work_group_invocations;
|
||||
} else if (IsApiMetal()) {
|
||||
return 256;
|
||||
} else {
|
||||
return 256;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DWidth() const {
|
||||
if (IsApiOpenGl()) {
|
||||
return opengl_info.max_texture_size;
|
||||
} else if (IsApiVulkan()) {
|
||||
return vulkan_info.max_image_dimension_2d;
|
||||
} else {
|
||||
return 2048;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DHeight() const {
|
||||
if (IsApiOpenGl()) {
|
||||
return opengl_info.max_texture_size;
|
||||
} else if (IsApiVulkan()) {
|
||||
return vulkan_info.max_image_dimension_2d;
|
||||
} else {
|
||||
return 2048;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
|
||||
if (IsApiOpenGl()) {
|
||||
return opengl_info.max_array_texture_layers;
|
||||
} else if (IsApiVulkan()) {
|
||||
return vulkan_info.max_image_array_layers;
|
||||
} else {
|
||||
return 256;
|
||||
}
|
||||
}
|
||||
|
||||
int GpuInfo::GetMaxImageArguments() const {
|
||||
if (IsApiOpenGl()) {
|
||||
return opengl_info.max_image_units;
|
||||
|
@ -208,6 +208,9 @@ struct OpenGlInfo {
|
||||
int max_image_units = 0;
|
||||
int max_ssbo_bindings = 0;
|
||||
int max_image_bindings = 0;
|
||||
int max_work_group_invocations = 0;
|
||||
int max_texture_size = 0;
|
||||
int max_array_texture_layers = 0;
|
||||
};
|
||||
|
||||
struct VulkanInfo {
|
||||
@ -218,6 +221,9 @@ struct VulkanInfo {
|
||||
uint32_t api_version_patch = -1;
|
||||
|
||||
uint32_t max_per_stage_descriptor_sampled_images = 0;
|
||||
uint32_t max_compute_work_group_invocations;
|
||||
uint32_t max_image_dimension_2d;
|
||||
uint32_t max_image_array_layers;
|
||||
};
|
||||
|
||||
struct GpuInfo {
|
||||
@ -239,14 +245,20 @@ struct GpuInfo {
|
||||
|
||||
int GetMaxImageArguments() const;
|
||||
|
||||
int GetMaxWorkGroupSizeForX() const;
|
||||
int GetMaxWorkGroupSizeForY() const;
|
||||
int GetMaxWorkGroupSizeForZ() const;
|
||||
int GetMaxWorkGroupTotalSize() const;
|
||||
|
||||
uint64_t GetMaxImage2DWidth() const;
|
||||
uint64_t GetMaxImage2DHeight() const;
|
||||
uint64_t GetMaxImage2DArrayLayers() const;
|
||||
|
||||
GpuVendor vendor = GpuVendor::kUnknown;
|
||||
GpuApi gpu_api = GpuApi::kUnknown;
|
||||
|
||||
std::vector<std::string> extensions;
|
||||
std::vector<int> max_work_group_size;
|
||||
int max_work_group_invocations;
|
||||
int max_texture_size = 0;
|
||||
int max_array_texture_layers = 0;
|
||||
|
||||
std::vector<int> supported_subgroup_sizes;
|
||||
|
||||
|
@ -189,15 +189,15 @@ template std::vector<uint3> GenerateWorkGroupSizes(
|
||||
template <typename T>
|
||||
void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
|
||||
const T& max_work_group_size,
|
||||
const int max_work_group_invocations,
|
||||
const int max_work_group_total_size,
|
||||
std::vector<T>* work_groups) {
|
||||
auto alignment = WorkGroupSizeAlignment::PRECISE;
|
||||
*work_groups = GenerateWorkGroupSizes<T>(
|
||||
grid, /*min_work_group_total_size = */ 32, max_work_group_invocations,
|
||||
grid, /*min_work_group_total_size = */ 32, max_work_group_total_size,
|
||||
max_work_group_size, alignment, alignment, alignment);
|
||||
// If the grid parameter too small, method below cannot generate workgroups.
|
||||
if (work_groups->empty()) {
|
||||
AddCornerCases(grid, max_work_group_invocations, max_work_group_size,
|
||||
AddCornerCases(grid, max_work_group_total_size, max_work_group_size,
|
||||
alignment, alignment, alignment, work_groups);
|
||||
}
|
||||
}
|
||||
@ -206,11 +206,11 @@ void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
|
||||
|
||||
template void GenerateWorkGroupSizesAlignedToGrid(
|
||||
const int3& grid, const int3& max_work_group_size,
|
||||
const int max_work_group_invocations, std::vector<int3>* work_groups);
|
||||
const int max_work_group_total_size, std::vector<int3>* work_groups);
|
||||
|
||||
template void GenerateWorkGroupSizesAlignedToGrid(
|
||||
const uint3& grid, const uint3& max_work_group_size,
|
||||
const int max_work_group_invocations, std::vector<uint3>* work_groups);
|
||||
const int max_work_group_total_size, std::vector<uint3>* work_groups);
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -41,7 +41,7 @@ std::vector<T> GenerateWorkGroupSizes(
|
||||
template <typename T>
|
||||
void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
|
||||
const T& max_work_group_size,
|
||||
const int max_work_group_invocations,
|
||||
const int max_work_group_total_size,
|
||||
std::vector<T>* work_groups);
|
||||
|
||||
} // namespace gpu
|
||||
|
@ -43,25 +43,27 @@ namespace gl {
|
||||
namespace {
|
||||
|
||||
struct ExceedSizeChecker {
|
||||
bool operator()(uint32_t v) const { return v > max_size; }
|
||||
bool operator()(uint32_t v) const { return v > max_size.x; }
|
||||
|
||||
bool operator()(const uint2& v) const {
|
||||
return v.x > max_size || v.y > max_size;
|
||||
return v.x > max_size.x || v.y > max_size.y;
|
||||
}
|
||||
|
||||
bool operator()(const uint3& v) const {
|
||||
return v.x > max_size || v.y > max_size || v.z > max_z_size;
|
||||
return v.x > max_size.x || v.y > max_size.y || v.z > max_z_size;
|
||||
}
|
||||
|
||||
int max_size;
|
||||
int2 max_size;
|
||||
int max_z_size;
|
||||
};
|
||||
|
||||
// Returns true if any size variable exceeds the given limit
|
||||
bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
|
||||
return absl::visit(ExceedSizeChecker{gpu_info.max_texture_size,
|
||||
gpu_info.max_array_texture_layers},
|
||||
object.size);
|
||||
ExceedSizeChecker size_checker;
|
||||
size_checker.max_size =
|
||||
int2(gpu_info.GetMaxImage2DWidth(), gpu_info.GetMaxImage2DHeight());
|
||||
size_checker.max_z_size = gpu_info.GetMaxImage2DArrayLayers();
|
||||
return absl::visit(size_checker, object.size);
|
||||
}
|
||||
|
||||
ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
|
||||
|
@ -76,10 +76,11 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
|
||||
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2,
|
||||
&info.max_work_group_size[2]);
|
||||
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS,
|
||||
&info.max_work_group_invocations);
|
||||
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.max_texture_size);
|
||||
&info.opengl_info.max_work_group_invocations);
|
||||
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.opengl_info.max_texture_size);
|
||||
glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.opengl_info.max_image_units);
|
||||
glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &info.max_array_texture_layers);
|
||||
glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS,
|
||||
&info.opengl_info.max_array_texture_layers);
|
||||
RETURN_IF_ERROR(GetOpenGlErrors());
|
||||
*gpu_info = info;
|
||||
return absl::OkStatus();
|
||||
|
@ -29,26 +29,26 @@ uint64_t CalculateProduct(const uint3& value) {
|
||||
}
|
||||
|
||||
void MaybeShrinkWorkgroup(const GpuInfo& gpu_info, uint3* wg) {
|
||||
while (wg->x > gpu_info.max_work_group_size[0]) {
|
||||
while (wg->x > gpu_info.GetMaxWorkGroupSizeForX()) {
|
||||
wg->x /= 2;
|
||||
}
|
||||
|
||||
while (wg->y > gpu_info.max_work_group_size[1]) {
|
||||
while (wg->y > gpu_info.GetMaxWorkGroupSizeForY()) {
|
||||
wg->y /= 2;
|
||||
}
|
||||
|
||||
while (wg->z > gpu_info.max_work_group_size[2]) {
|
||||
while (wg->z > gpu_info.GetMaxWorkGroupSizeForZ()) {
|
||||
wg->z /= 2;
|
||||
}
|
||||
|
||||
// Code below decreases amount of invocations per workgroup in a balanced way.
|
||||
// As example, workgroup size is x=16, y=8, z=8 (16x8x8 = 1024), but
|
||||
// max_work_group_invocations = 512. We need to fit this limit and we can
|
||||
// max_work_group_total_size = 512. We need to fit this limit and we can
|
||||
// reduce workgroup size in different ways, but we want to use the most
|
||||
// balanced way. So code below will find the maximal of three dimensions and
|
||||
// reduce it, so the whole workgroup is kept balanced by all dimensions. And
|
||||
// the final reduced workgroup will be x=8, y=8, z=8 for the given example.
|
||||
while (CalculateProduct(*wg) > gpu_info.max_work_group_invocations) {
|
||||
while (CalculateProduct(*wg) > gpu_info.GetMaxWorkGroupTotalSize()) {
|
||||
unsigned int* max = &wg->x;
|
||||
if (wg->y > *max) max = &wg->y;
|
||||
if (wg->z > *max) max = &wg->z;
|
||||
|
Loading…
Reference in New Issue
Block a user