Some info(max_work_group_invocations) moved from GpuInfo to OpenGlInfo.

PiperOrigin-RevId: 343506144
Change-Id: I0fc14c2c352c9616d80f0ba0a9a71c2b9317d713
This commit is contained in:
Raman Sarokin 2020-11-20 09:26:55 -08:00 committed by TensorFlower Gardener
parent 766bc047d8
commit cbadb3c5ce
7 changed files with 87 additions and 24 deletions

View File

@ -393,6 +393,54 @@ int GpuInfo::GetComputeUnitsCount() const {
}
}
int GpuInfo::GetMaxWorkGroupSizeForX() const { return max_work_group_size[0]; }
int GpuInfo::GetMaxWorkGroupSizeForY() const { return max_work_group_size[1]; }
int GpuInfo::GetMaxWorkGroupSizeForZ() const { return max_work_group_size[2]; }
int GpuInfo::GetMaxWorkGroupTotalSize() const {
if (IsApiOpenGl()) {
return opengl_info.max_work_group_invocations;
} else if (IsApiVulkan()) {
return vulkan_info.max_compute_work_group_invocations;
} else if (IsApiMetal()) {
return 256;
} else {
return 256;
}
}
uint64_t GpuInfo::GetMaxImage2DWidth() const {
if (IsApiOpenGl()) {
return opengl_info.max_texture_size;
} else if (IsApiVulkan()) {
return vulkan_info.max_image_dimension_2d;
} else {
return 2048;
}
}
uint64_t GpuInfo::GetMaxImage2DHeight() const {
if (IsApiOpenGl()) {
return opengl_info.max_texture_size;
} else if (IsApiVulkan()) {
return vulkan_info.max_image_dimension_2d;
} else {
return 2048;
}
}
uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
if (IsApiOpenGl()) {
return opengl_info.max_array_texture_layers;
} else if (IsApiVulkan()) {
return vulkan_info.max_image_array_layers;
} else {
return 256;
}
}
int GpuInfo::GetMaxImageArguments() const {
if (IsApiOpenGl()) {
return opengl_info.max_image_units;

View File

@ -208,6 +208,9 @@ struct OpenGlInfo {
int max_image_units = 0;
int max_ssbo_bindings = 0;
int max_image_bindings = 0;
int max_work_group_invocations = 0;
int max_texture_size = 0;
int max_array_texture_layers = 0;
};
struct VulkanInfo {
@ -218,6 +221,9 @@ struct VulkanInfo {
uint32_t api_version_patch = -1;
uint32_t max_per_stage_descriptor_sampled_images = 0;
uint32_t max_compute_work_group_invocations;
uint32_t max_image_dimension_2d;
uint32_t max_image_array_layers;
};
struct GpuInfo {
@ -239,14 +245,20 @@ struct GpuInfo {
int GetMaxImageArguments() const;
int GetMaxWorkGroupSizeForX() const;
int GetMaxWorkGroupSizeForY() const;
int GetMaxWorkGroupSizeForZ() const;
int GetMaxWorkGroupTotalSize() const;
uint64_t GetMaxImage2DWidth() const;
uint64_t GetMaxImage2DHeight() const;
uint64_t GetMaxImage2DArrayLayers() const;
GpuVendor vendor = GpuVendor::kUnknown;
GpuApi gpu_api = GpuApi::kUnknown;
std::vector<std::string> extensions;
std::vector<int> max_work_group_size;
int max_work_group_invocations;
int max_texture_size = 0;
int max_array_texture_layers = 0;
std::vector<int> supported_subgroup_sizes;

View File

@ -189,15 +189,15 @@ template std::vector<uint3> GenerateWorkGroupSizes(
template <typename T>
void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
const T& max_work_group_size,
const int max_work_group_invocations,
const int max_work_group_total_size,
std::vector<T>* work_groups) {
auto alignment = WorkGroupSizeAlignment::PRECISE;
*work_groups = GenerateWorkGroupSizes<T>(
grid, /*min_work_group_total_size = */ 32, max_work_group_invocations,
grid, /*min_work_group_total_size = */ 32, max_work_group_total_size,
max_work_group_size, alignment, alignment, alignment);
// If the grid parameter too small, method below cannot generate workgroups.
if (work_groups->empty()) {
AddCornerCases(grid, max_work_group_invocations, max_work_group_size,
AddCornerCases(grid, max_work_group_total_size, max_work_group_size,
alignment, alignment, alignment, work_groups);
}
}
@ -206,11 +206,11 @@ void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
template void GenerateWorkGroupSizesAlignedToGrid(
const int3& grid, const int3& max_work_group_size,
const int max_work_group_invocations, std::vector<int3>* work_groups);
const int max_work_group_total_size, std::vector<int3>* work_groups);
template void GenerateWorkGroupSizesAlignedToGrid(
const uint3& grid, const uint3& max_work_group_size,
const int max_work_group_invocations, std::vector<uint3>* work_groups);
const int max_work_group_total_size, std::vector<uint3>* work_groups);
} // namespace gpu
} // namespace tflite

View File

@ -41,7 +41,7 @@ std::vector<T> GenerateWorkGroupSizes(
template <typename T>
void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
const T& max_work_group_size,
const int max_work_group_invocations,
const int max_work_group_total_size,
std::vector<T>* work_groups);
} // namespace gpu

View File

@ -43,25 +43,27 @@ namespace gl {
namespace {
struct ExceedSizeChecker {
bool operator()(uint32_t v) const { return v > max_size; }
bool operator()(uint32_t v) const { return v > max_size.x; }
bool operator()(const uint2& v) const {
return v.x > max_size || v.y > max_size;
return v.x > max_size.x || v.y > max_size.y;
}
bool operator()(const uint3& v) const {
return v.x > max_size || v.y > max_size || v.z > max_z_size;
return v.x > max_size.x || v.y > max_size.y || v.z > max_z_size;
}
int max_size;
int2 max_size;
int max_z_size;
};
// Returns true if any size variable exceeds the given limit
bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
return absl::visit(ExceedSizeChecker{gpu_info.max_texture_size,
gpu_info.max_array_texture_layers},
object.size);
ExceedSizeChecker size_checker;
size_checker.max_size =
int2(gpu_info.GetMaxImage2DWidth(), gpu_info.GetMaxImage2DHeight());
size_checker.max_z_size = gpu_info.GetMaxImage2DArrayLayers();
return absl::visit(size_checker, object.size);
}
ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {

View File

@ -76,10 +76,11 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2,
&info.max_work_group_size[2]);
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS,
&info.max_work_group_invocations);
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.max_texture_size);
&info.opengl_info.max_work_group_invocations);
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.opengl_info.max_texture_size);
glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.opengl_info.max_image_units);
glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &info.max_array_texture_layers);
glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS,
&info.opengl_info.max_array_texture_layers);
RETURN_IF_ERROR(GetOpenGlErrors());
*gpu_info = info;
return absl::OkStatus();

View File

@ -29,26 +29,26 @@ uint64_t CalculateProduct(const uint3& value) {
}
void MaybeShrinkWorkgroup(const GpuInfo& gpu_info, uint3* wg) {
while (wg->x > gpu_info.max_work_group_size[0]) {
while (wg->x > gpu_info.GetMaxWorkGroupSizeForX()) {
wg->x /= 2;
}
while (wg->y > gpu_info.max_work_group_size[1]) {
while (wg->y > gpu_info.GetMaxWorkGroupSizeForY()) {
wg->y /= 2;
}
while (wg->z > gpu_info.max_work_group_size[2]) {
while (wg->z > gpu_info.GetMaxWorkGroupSizeForZ()) {
wg->z /= 2;
}
// Code below decreases amount of invocations per workgroup in a balanced way.
// As example, workgroup size is x=16, y=8, z=8 (16x8x8 = 1024), but
// max_work_group_invocations = 512. We need to fit this limit and we can
// max_work_group_total_size = 512. We need to fit this limit and we can
// reduce workgroup size in different ways, but we want to use the most
// balanced way. So code below will find the maximal of three dimensions and
// reduce it, so the whole workgroup is kept balanced by all dimensions. And
// the final reduced workgroup will be x=8, y=8, z=8 for the given example.
while (CalculateProduct(*wg) > gpu_info.max_work_group_invocations) {
while (CalculateProduct(*wg) > gpu_info.GetMaxWorkGroupTotalSize()) {
unsigned int* max = &wg->x;
if (wg->y > *max) max = &wg->y;
if (wg->z > *max) max = &wg->z;