Some info(max_work_group_invocations) moved from GpuInfo to OpenGlInfo.

PiperOrigin-RevId: 343506144 Change-Id: I0fc14c2c352c9616d80f0ba0a9a71c2b9317d713
2020-11-20 09:26:55 -08:00 · 2020-11-20 09:26:55 -08:00 · cbadb3c5ce
commit cbadb3c5ce
parent 766bc047d8
7 changed files with 87 additions and 24 deletions
--- a/tensorflow/lite/delegates/gpu/common/gpu_info.cc
+++ b/tensorflow/lite/delegates/gpu/common/gpu_info.cc
@ -393,6 +393,54 @@ int GpuInfo::GetComputeUnitsCount() const {
  }
 }

+int GpuInfo::GetMaxWorkGroupSizeForX() const { return max_work_group_size[0]; }
+
+int GpuInfo::GetMaxWorkGroupSizeForY() const { return max_work_group_size[1]; }
+
+int GpuInfo::GetMaxWorkGroupSizeForZ() const { return max_work_group_size[2]; }
+
+int GpuInfo::GetMaxWorkGroupTotalSize() const {
+  if (IsApiOpenGl()) {
+    return opengl_info.max_work_group_invocations;
+  } else if (IsApiVulkan()) {
+    return vulkan_info.max_compute_work_group_invocations;
+  } else if (IsApiMetal()) {
+    return 256;
+  } else {
+    return 256;
+  }
+}
+
+uint64_t GpuInfo::GetMaxImage2DWidth() const {
+  if (IsApiOpenGl()) {
+    return opengl_info.max_texture_size;
+  } else if (IsApiVulkan()) {
+    return vulkan_info.max_image_dimension_2d;
+  } else {
+    return 2048;
+  }
+}
+
+uint64_t GpuInfo::GetMaxImage2DHeight() const {
+  if (IsApiOpenGl()) {
+    return opengl_info.max_texture_size;
+  } else if (IsApiVulkan()) {
+    return vulkan_info.max_image_dimension_2d;
+  } else {
+    return 2048;
+  }
+}
+
+uint64_t GpuInfo::GetMaxImage2DArrayLayers() const {
+  if (IsApiOpenGl()) {
+    return opengl_info.max_array_texture_layers;
+  } else if (IsApiVulkan()) {
+    return vulkan_info.max_image_array_layers;
+  } else {
+    return 256;
+  }
+}
+
 int GpuInfo::GetMaxImageArguments() const {
  if (IsApiOpenGl()) {
    return opengl_info.max_image_units;
--- a/tensorflow/lite/delegates/gpu/common/gpu_info.h
+++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h
@ -208,6 +208,9 @@ struct OpenGlInfo {
  int max_image_units = 0;
  int max_ssbo_bindings = 0;
  int max_image_bindings = 0;
+  int max_work_group_invocations = 0;
+  int max_texture_size = 0;
+  int max_array_texture_layers = 0;
 };

 struct VulkanInfo {
@ -218,6 +221,9 @@ struct VulkanInfo {
  uint32_t api_version_patch = -1;

  uint32_t max_per_stage_descriptor_sampled_images = 0;
+  uint32_t max_compute_work_group_invocations;
+  uint32_t max_image_dimension_2d;
+  uint32_t max_image_array_layers;
 };

 struct GpuInfo {
@ -239,14 +245,20 @@ struct GpuInfo {

  int GetMaxImageArguments() const;

+  int GetMaxWorkGroupSizeForX() const;
+  int GetMaxWorkGroupSizeForY() const;
+  int GetMaxWorkGroupSizeForZ() const;
+  int GetMaxWorkGroupTotalSize() const;
+
+  uint64_t GetMaxImage2DWidth() const;
+  uint64_t GetMaxImage2DHeight() const;
+  uint64_t GetMaxImage2DArrayLayers() const;
+
  GpuVendor vendor = GpuVendor::kUnknown;
  GpuApi gpu_api = GpuApi::kUnknown;

  std::vector<std::string> extensions;
  std::vector<int> max_work_group_size;
-  int max_work_group_invocations;
-  int max_texture_size = 0;
-  int max_array_texture_layers = 0;

  std::vector<int> supported_subgroup_sizes;

--- a/tensorflow/lite/delegates/gpu/common/workgroup_selection.cc
+++ b/tensorflow/lite/delegates/gpu/common/workgroup_selection.cc
@ -189,15 +189,15 @@ template std::vector<uint3> GenerateWorkGroupSizes(
 template <typename T>
 void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
                                         const T& max_work_group_size,
-                                         const int max_work_group_invocations,
+                                         const int max_work_group_total_size,
                                         std::vector<T>* work_groups) {
  auto alignment = WorkGroupSizeAlignment::PRECISE;
  *work_groups = GenerateWorkGroupSizes<T>(
-      grid, /*min_work_group_total_size = */ 32, max_work_group_invocations,
+      grid, /*min_work_group_total_size = */ 32, max_work_group_total_size,
      max_work_group_size, alignment, alignment, alignment);
  // If the grid parameter too small, method below cannot generate workgroups.
  if (work_groups->empty()) {
-    AddCornerCases(grid, max_work_group_invocations, max_work_group_size,
+    AddCornerCases(grid, max_work_group_total_size, max_work_group_size,
                   alignment, alignment, alignment, work_groups);
  }
 }
@ -206,11 +206,11 @@ void GenerateWorkGroupSizesAlignedToGrid(const T& grid,

 template void GenerateWorkGroupSizesAlignedToGrid(
    const int3& grid, const int3& max_work_group_size,
-    const int max_work_group_invocations, std::vector<int3>* work_groups);
+    const int max_work_group_total_size, std::vector<int3>* work_groups);

 template void GenerateWorkGroupSizesAlignedToGrid(
    const uint3& grid, const uint3& max_work_group_size,
-    const int max_work_group_invocations, std::vector<uint3>* work_groups);
+    const int max_work_group_total_size, std::vector<uint3>* work_groups);

 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/common/workgroup_selection.h
+++ b/tensorflow/lite/delegates/gpu/common/workgroup_selection.h
@ -41,7 +41,7 @@ std::vector<T> GenerateWorkGroupSizes(
 template <typename T>
 void GenerateWorkGroupSizesAlignedToGrid(const T& grid,
                                         const T& max_work_group_size,
-                                         const int max_work_group_invocations,
+                                         const int max_work_group_total_size,
                                         std::vector<T>* work_groups);

 }  // namespace gpu
--- a/tensorflow/lite/delegates/gpu/gl/compiler.cc
+++ b/tensorflow/lite/delegates/gpu/gl/compiler.cc
@ -43,25 +43,27 @@ namespace gl {
 namespace {

 struct ExceedSizeChecker {
-  bool operator()(uint32_t v) const { return v > max_size; }
+  bool operator()(uint32_t v) const { return v > max_size.x; }

  bool operator()(const uint2& v) const {
-    return v.x > max_size || v.y > max_size;
+    return v.x > max_size.x || v.y > max_size.y;
  }

  bool operator()(const uint3& v) const {
-    return v.x > max_size || v.y > max_size || v.z > max_z_size;
+    return v.x > max_size.x || v.y > max_size.y || v.z > max_z_size;
  }

-  int max_size;
+  int2 max_size;
  int max_z_size;
 };

 // Returns true if any size variable exceeds the given limit
 bool ExceedsMaxSize(const Object& object, const GpuInfo& gpu_info) {
-  return absl::visit(ExceedSizeChecker{gpu_info.max_texture_size,
-                                       gpu_info.max_array_texture_layers},
-                     object.size);
+  ExceedSizeChecker size_checker;
+  size_checker.max_size =
+      int2(gpu_info.GetMaxImage2DWidth(), gpu_info.GetMaxImage2DHeight());
+  size_checker.max_z_size = gpu_info.GetMaxImage2DArrayLayers();
+  return absl::visit(size_checker, object.size);
 }

 ObjectType ChooseFastestObjectType(const GpuInfo& gpu_info) {
--- a/tensorflow/lite/delegates/gpu/gl/request_gpu_info.cc
+++ b/tensorflow/lite/delegates/gpu/gl/request_gpu_info.cc
@ -76,10 +76,11 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
  glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2,
                  &info.max_work_group_size[2]);
  glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS,
-                &info.max_work_group_invocations);
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.max_texture_size);
+                &info.opengl_info.max_work_group_invocations);
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.opengl_info.max_texture_size);
  glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.opengl_info.max_image_units);
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &info.max_array_texture_layers);
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS,
+                &info.opengl_info.max_array_texture_layers);
  RETURN_IF_ERROR(GetOpenGlErrors());
  *gpu_info = info;
  return absl::OkStatus();
--- a/tensorflow/lite/delegates/gpu/gl/workgroups/calculator.cc
+++ b/tensorflow/lite/delegates/gpu/gl/workgroups/calculator.cc
@ -29,26 +29,26 @@ uint64_t CalculateProduct(const uint3& value) {
 }

 void MaybeShrinkWorkgroup(const GpuInfo& gpu_info, uint3* wg) {
-  while (wg->x > gpu_info.max_work_group_size[0]) {
+  while (wg->x > gpu_info.GetMaxWorkGroupSizeForX()) {
    wg->x /= 2;
  }

-  while (wg->y > gpu_info.max_work_group_size[1]) {
+  while (wg->y > gpu_info.GetMaxWorkGroupSizeForY()) {
    wg->y /= 2;
  }

-  while (wg->z > gpu_info.max_work_group_size[2]) {
+  while (wg->z > gpu_info.GetMaxWorkGroupSizeForZ()) {
    wg->z /= 2;
  }

  // Code below decreases amount of invocations per workgroup in a balanced way.
  // As example, workgroup size is x=16, y=8, z=8 (16x8x8 = 1024), but
-  // max_work_group_invocations = 512. We need to fit this limit and we can
+  // max_work_group_total_size = 512. We need to fit this limit and we can
  // reduce workgroup size in different ways, but we want to use the most
  // balanced way. So code below will find the maximal of three dimensions and
  // reduce it, so the whole workgroup is kept balanced by all dimensions. And
  // the final reduced workgroup will be x=8, y=8, z=8 for the given example.
-  while (CalculateProduct(*wg) > gpu_info.max_work_group_invocations) {
+  while (CalculateProduct(*wg) > gpu_info.GetMaxWorkGroupTotalSize()) {
    unsigned int* max = &wg->x;
    if (wg->y > *max) max = &wg->y;
    if (wg->z > *max) max = &wg->z;