Report the specific GPU model name on the Overview Page.
PiperOrigin-RevId: 347727571 Change-Id: I69d8abab05a1ed8628002d02128a74412c0ac12e
This commit is contained in:
parent
9add8454de
commit
442824b76e
@ -29,7 +29,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
|
||||
#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
|
||||
#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
@ -48,7 +47,6 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
namespace {
|
||||
|
||||
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
|
||||
DeviceCapabilities cap;
|
||||
@ -79,8 +77,6 @@ DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) {
|
||||
return cap;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
|
||||
double peak_hbm_bw_giga_bytes_per_second) {
|
||||
PerfEnv result;
|
||||
@ -164,6 +160,8 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
|
||||
op_stats.mutable_run_environment());
|
||||
|
||||
KernelReportMap reports;
|
||||
absl::string_view gpu_model = "";
|
||||
|
||||
// TODO(b/161942993) parallelize XPlane processing per thread.
|
||||
for (const XPlane* device_trace : device_planes) {
|
||||
if (options.generate_op_metrics_db) {
|
||||
@ -174,6 +172,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
|
||||
ConvertDeviceTraceXPlaneToOpMetricsDb(*device_trace);
|
||||
op_metrics_db_combiner.Combine(device_op_metrics_db);
|
||||
}
|
||||
if (gpu_model.empty()) {
|
||||
gpu_model = GpuModelName(GetDeviceCapFromXPlane(*device_trace));
|
||||
}
|
||||
if (options.generate_step_db) {
|
||||
CombineStepEvents(ConvertDeviceTraceXPlaneToStepEvents(*device_trace),
|
||||
&step_events);
|
||||
@ -184,6 +185,11 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
|
||||
}
|
||||
}
|
||||
|
||||
if (!gpu_model.empty()) {
|
||||
// Overwrites the device type with the more specific GPU model name.
|
||||
op_stats.mutable_run_environment()->set_device_type(std::string(gpu_model));
|
||||
}
|
||||
|
||||
// Combine into reports.
|
||||
if (options.generate_kernel_stats_db) {
|
||||
CopyTopKDurationKernelReportsToDb(reports,
|
||||
|
@ -18,6 +18,7 @@ limitations under the License.
|
||||
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "tensorflow/core/platform/status.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/op_stats.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
|
||||
@ -39,6 +40,9 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space,
|
||||
void PropagateXSpaceDiagnosticsToOpStats(const XSpace& space,
|
||||
OpStats* op_stats);
|
||||
|
||||
// Extracts DeviceCapabilities from XPlane stats.
|
||||
DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane);
|
||||
|
||||
// Populates PerfEnv.
|
||||
PerfEnv MakePerfEnv(double peak_tera_flops_per_second,
|
||||
double peak_hbm_bw_giga_bytes_per_second);
|
||||
|
@ -87,7 +87,7 @@ TEST(ConvertXPlaneToOpStats, RunEnvironment) {
|
||||
OpStats op_stats = ConvertXSpaceToOpStats(space, OpStatsOptions());
|
||||
const RunEnvironment& run_env = op_stats.run_environment();
|
||||
|
||||
EXPECT_EQ("GPU", run_env.device_type());
|
||||
EXPECT_EQ("Nvidia GPU", run_env.device_type());
|
||||
EXPECT_EQ(1, run_env.host_count());
|
||||
EXPECT_EQ(1, run_env.task_count());
|
||||
EXPECT_EQ(2, run_env.device_core_count());
|
||||
|
@ -15,6 +15,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
|
||||
|
||||
#include "absl/strings/match.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
|
||||
@ -85,9 +86,31 @@ double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap) {
|
||||
device_cap.clock_rate_in_ghz();
|
||||
}
|
||||
|
||||
absl::string_view GpuModelName(const DeviceCapabilities& device_cap) {
|
||||
switch (device_cap.compute_capability().major()) {
|
||||
case 2:
|
||||
return "Nvidia GPU (Fermi)";
|
||||
case 3:
|
||||
return "Nvidia GPU (Kepler)";
|
||||
case 5:
|
||||
return "Nvidia GPU (Maxwell)";
|
||||
case 6:
|
||||
return "Nvidia GPU (Pascal)";
|
||||
case 7:
|
||||
if (device_cap.compute_capability().minor() < 5) {
|
||||
return "Nvidia GPU (Volta)";
|
||||
} else {
|
||||
return "Nvidia GPU (Turing)";
|
||||
}
|
||||
case 8:
|
||||
return "Nvidia GPU (Ampere)";
|
||||
default:
|
||||
return "Nvidia GPU";
|
||||
}
|
||||
}
|
||||
|
||||
HardwareType ParseHardwareType(absl::string_view device_type) {
|
||||
if (device_type == "GPU" || device_type == "Nvidia GPU")
|
||||
return HardwareType::GPU;
|
||||
if (absl::StrContains(device_type, "GPU")) return HardwareType::GPU;
|
||||
if (device_type == "CPU") return HardwareType::CPU_ONLY;
|
||||
if (device_type == "TPU") return HardwareType::TPU;
|
||||
return HardwareType::UNKNOWN_HARDWARE;
|
||||
|
@ -26,6 +26,9 @@ namespace profiler {
|
||||
// streaming multiprocessor.
|
||||
double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap);
|
||||
|
||||
// Returns the GPU model name from the given DeviceCapabilities.
|
||||
absl::string_view GpuModelName(const DeviceCapabilities& device_cap);
|
||||
|
||||
HardwareType ParseHardwareType(absl::string_view device_type);
|
||||
|
||||
// Returns true if the given hardware type has a device.
|
||||
|
Loading…
Reference in New Issue
Block a user