Report the specific GPU model name on the Overview Page.

PiperOrigin-RevId: 347727571
Change-Id: I69d8abab05a1ed8628002d02128a74412c0ac12e
This commit is contained in:
A. Unique TensorFlower 2020-12-15 17:41:27 -08:00 committed by TensorFlower Gardener
parent 9add8454de
commit 442824b76e
5 changed files with 43 additions and 7 deletions

View File

@ -29,7 +29,6 @@ limitations under the License.
#include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
@ -48,7 +47,6 @@ limitations under the License.
namespace tensorflow {
namespace profiler {
namespace {
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
DeviceCapabilities cap;
@ -79,8 +77,6 @@ DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
return cap;
}
} // namespace
PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
double peak_hbm_bw_giga_bytes_per_second) {
PerfEnv result;
@ -164,6 +160,8 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
op_stats.mutable_run_environment());
KernelReportMap reports;
absl::string_view gpu_model = "";
// TODO(b/161942993) parallelize XPlane processing per thread.
for (const XPlane* device_trace : device_planes) {
if (options.generate_op_metrics_db) {
@ -174,6 +172,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
ConvertDeviceTraceXPlaneToOpMetricsDb(*device_trace);
op_metrics_db_combiner.Combine(device_op_metrics_db);
}
if (gpu_model.empty()) {
gpu_model = GpuModelName(GetDeviceCapFromXPlane(*device_trace));
}
if (options.generate_step_db) {
CombineStepEvents(ConvertDeviceTraceXPlaneToStepEvents(*device_trace),
&step_events);
@ -184,6 +185,11 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
}
}
if (!gpu_model.empty()) {
// Overwrites the device type with the more specific GPU model name.
op_stats.mutable_run_environment()->set_device_type(std::string(gpu_model));
}
// Combine into reports.
if (options.generate_kernel_stats_db) {
CopyTopKDurationKernelReportsToDb(reports,

View File

@ -18,6 +18,7 @@ limitations under the License.
#include "absl/container/flat_hash_set.h"
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
@ -39,6 +40,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
void PropagateXSpaceDiagnosticsToOpStats(const XSpace& space,
OpStats* op_stats);
// Extracts DeviceCapabilities from XPlane stats.
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane);
// Populates PerfEnv.
PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
double peak_hbm_bw_giga_bytes_per_second);

View File

@ -87,7 +87,7 @@ TEST(ConvertXPlaneToOpStats, RunEnvironment) {
OpStats op_stats = ConvertXSpaceToOpStats(space, OpStatsOptions());
const RunEnvironment& run_env = op_stats.run_environment();
EXPECT_EQ("GPU", run_env.device_type());
EXPECT_EQ("Nvidia GPU", run_env.device_type());
EXPECT_EQ(1, run_env.host_count());
EXPECT_EQ(1, run_env.task_count());
EXPECT_EQ(2, run_env.device_core_count());

View File

@ -15,6 +15,7 @@ limitations under the License.
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
#include "absl/strings/match.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
@ -85,9 +86,31 @@ double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap) {
device_cap.clock_rate_in_ghz();
}
absl::string_view GpuModelName(const DeviceCapabilities& device_cap) {
switch (device_cap.compute_capability().major()) {
case 2:
return "Nvidia GPU (Fermi)";
case 3:
return "Nvidia GPU (Kepler)";
case 5:
return "Nvidia GPU (Maxwell)";
case 6:
return "Nvidia GPU (Pascal)";
case 7:
if (device_cap.compute_capability().minor() < 5) {
return "Nvidia GPU (Volta)";
} else {
return "Nvidia GPU (Turing)";
}
case 8:
return "Nvidia GPU (Ampere)";
default:
return "Nvidia GPU";
}
}
HardwareType ParseHardwareType(absl::string_view device_type) {
if (device_type == "GPU" || device_type == "Nvidia GPU")
return HardwareType::GPU;
if (absl::StrContains(device_type, "GPU")) return HardwareType::GPU;
if (device_type == "CPU") return HardwareType::CPU_ONLY;
if (device_type == "TPU") return HardwareType::TPU;
return HardwareType::UNKNOWN_HARDWARE;

View File

@ -26,6 +26,9 @@ namespace profiler {
// streaming multiprocessor.
double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap);
// Returns the GPU model name from the given DeviceCapabilities.
absl::string_view GpuModelName(const DeviceCapabilities& device_cap);
HardwareType ParseHardwareType(absl::string_view device_type);
// Returns true if the given hardware type has a device.