Report the specific GPU model name on the Overview Page.

PiperOrigin-RevId: 347727571 Change-Id: I69d8abab05a1ed8628002d02128a74412c0ac12e
2020-12-15 17:41:27 -08:00 · 2020-12-15 17:41:27 -08:00 · 442824b76e
commit 442824b76e
parent 9add8454de
5 changed files with 43 additions and 7 deletions
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
 #include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
 #include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
-#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
@ -48,7 +47,6 @@ limitations under the License.

 namespace tensorflow {
 namespace profiler {
-namespace {

 DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
  DeviceCapabilities cap;
@ -79,8 +77,6 @@ DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
  return cap;
 }

-}  // namespace
-
 PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
                    double peak_hbm_bw_giga_bytes_per_second) {
  PerfEnv result;
@ -164,6 +160,8 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
                    op_stats.mutable_run_environment());

  KernelReportMap reports;
+  absl::string_view gpu_model = "";
+
  // TODO(b/161942993) parallelize XPlane processing per thread.
  for (const XPlane* device_trace : device_planes) {
    if (options.generate_op_metrics_db) {
@ -174,6 +172,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
          ConvertDeviceTraceXPlaneToOpMetricsDb(*device_trace);
      op_metrics_db_combiner.Combine(device_op_metrics_db);
    }
+    if (gpu_model.empty()) {
+      gpu_model = GpuModelName(GetDeviceCapFromXPlane(*device_trace));
+    }
    if (options.generate_step_db) {
      CombineStepEvents(ConvertDeviceTraceXPlaneToStepEvents(*device_trace),
                        &step_events);
@ -184,6 +185,11 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
    }
  }

+  if (!gpu_model.empty()) {
+    // Overwrites the device type with the more specific GPU model name.
+    op_stats.mutable_run_environment()->set_device_type(std::string(gpu_model));
+  }
+
  // Combine into reports.
  if (options.generate_kernel_stats_db) {
    CopyTopKDurationKernelReportsToDb(reports,
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats.h
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.h
@ -18,6 +18,7 @@ limitations under the License.

 #include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
 #include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"

@ -39,6 +40,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
 void PropagateXSpaceDiagnosticsToOpStats(const XSpace& space,
                                         OpStats* op_stats);

+// Extracts DeviceCapabilities from XPlane stats.
+DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane);
+
 // Populates PerfEnv.
 PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
                    double peak_hbm_bw_giga_bytes_per_second);
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
@ -87,7 +87,7 @@ TEST(ConvertXPlaneToOpStats, RunEnvironment) {
  OpStats op_stats = ConvertXSpaceToOpStats(space, OpStatsOptions());
  const RunEnvironment& run_env = op_stats.run_environment();

-  EXPECT_EQ("GPU", run_env.device_type());
+  EXPECT_EQ("Nvidia GPU", run_env.device_type());
  EXPECT_EQ(1, run_env.host_count());
  EXPECT_EQ(1, run_env.task_count());
  EXPECT_EQ(2, run_env.device_core_count());
--- a/tensorflow/core/profiler/utils/hardware_type_utils.cc
+++ b/tensorflow/core/profiler/utils/hardware_type_utils.cc
@ -15,6 +15,7 @@ limitations under the License.

 #include "tensorflow/core/profiler/utils/hardware_type_utils.h"

+#include "absl/strings/match.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
@ -85,9 +86,31 @@ double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap) {
         device_cap.clock_rate_in_ghz();
 }

+absl::string_view GpuModelName(const DeviceCapabilities& device_cap) {
+  switch (device_cap.compute_capability().major()) {
+    case 2:
+      return "Nvidia GPU (Fermi)";
+    case 3:
+      return "Nvidia GPU (Kepler)";
+    case 5:
+      return "Nvidia GPU (Maxwell)";
+    case 6:
+      return "Nvidia GPU (Pascal)";
+    case 7:
+      if (device_cap.compute_capability().minor() < 5) {
+        return "Nvidia GPU (Volta)";
+      } else {
+        return "Nvidia GPU (Turing)";
+      }
+    case 8:
+      return "Nvidia GPU (Ampere)";
+    default:
+      return "Nvidia GPU";
+  }
+}
+
 HardwareType ParseHardwareType(absl::string_view device_type) {
-  if (device_type == "GPU" || device_type == "Nvidia GPU")
-    return HardwareType::GPU;
+  if (absl::StrContains(device_type, "GPU")) return HardwareType::GPU;
  if (device_type == "CPU") return HardwareType::CPU_ONLY;
  if (device_type == "TPU") return HardwareType::TPU;
  return HardwareType::UNKNOWN_HARDWARE;
--- a/tensorflow/core/profiler/utils/hardware_type_utils.h
+++ b/tensorflow/core/profiler/utils/hardware_type_utils.h
@ -26,6 +26,9 @@ namespace profiler {
 // streaming multiprocessor.
 double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap);

+// Returns the GPU model name from the given DeviceCapabilities.
+absl::string_view GpuModelName(const DeviceCapabilities& device_cap);
+
 HardwareType ParseHardwareType(absl::string_view device_type);

 // Returns true if the given hardware type has a device.