Make step breakdown the same with overview page.
PiperOrigin-RevId: 336917686 Change-Id: If5daa078f8696e9e2a80ed62685605a2bed6495c
This commit is contained in:
parent
c8a9751c55
commit
9031396802
@ -129,6 +129,7 @@ cc_library(
|
|||||||
copts = tf_profiler_copts(),
|
copts = tf_profiler_copts(),
|
||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/core:lib_internal",
|
||||||
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:pod_stats_proto_cc",
|
"//tensorflow/core/profiler/protobuf:pod_stats_proto_cc",
|
||||||
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
|
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "google/protobuf/any.pb.h"
|
#include "google/protobuf/any.pb.h"
|
||||||
#include "absl/strings/string_view.h"
|
#include "absl/strings/string_view.h"
|
||||||
|
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
|
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
|
||||||
#include "tensorflow/core/profiler/utils/diagnostics.h"
|
#include "tensorflow/core/profiler/utils/diagnostics.h"
|
||||||
@ -38,14 +39,31 @@ PodStatsRecord CreatePodStatsRecord(absl::string_view host_name,
|
|||||||
record.set_step_num(step_info.step_num());
|
record.set_step_num(step_info.step_num());
|
||||||
record.set_total_duration_us(PicosToMicros(step_info.duration_ps()));
|
record.set_total_duration_us(PicosToMicros(step_info.duration_ps()));
|
||||||
auto& step_breakdown_map = *record.mutable_step_breakdown_us();
|
auto& step_breakdown_map = *record.mutable_step_breakdown_us();
|
||||||
std::vector<std::pair<uint64, std::string>> metrics;
|
std::vector<std::pair<uint64, absl::string_view>> metrics;
|
||||||
for (const auto& entry : generic.type_ps()) {
|
|
||||||
step_breakdown_map[entry.first] = PicosToMicros(entry.second);
|
auto add_event = [&](GenericEventType type,
|
||||||
metrics.emplace_back(
|
std::initializer_list<EventType> event_list) {
|
||||||
entry.second, PrintEventTypeLabel(static_cast<EventType>(entry.first)));
|
uint64 ps = 0;
|
||||||
}
|
for (const auto& event_type : event_list) {
|
||||||
|
ps += gtl::FindWithDefault(generic.type_ps(), event_type, /*value=*/0);
|
||||||
|
}
|
||||||
|
step_breakdown_map[type] = PicosToMicros(ps);
|
||||||
|
metrics.emplace_back(ps, GetGenericEventTypeStr(type));
|
||||||
|
};
|
||||||
|
|
||||||
|
add_event(kDeviceCompute, {DEVICE_COMPUTE_32, DEVICE_COMPUTE_16});
|
||||||
|
add_event(kDeviceToDevice, {DEVICE_TO_DEVICE, DEVICE_WAIT_DEVICE});
|
||||||
|
add_event(kDeviceCollectives, {DEVICE_COLLECTIVES});
|
||||||
|
add_event(kHostCompute, {HOST_COMPUTE});
|
||||||
|
add_event(kHostPrepare, {HOST_PREPARE});
|
||||||
|
add_event(kInput, {HOST_WAIT_INPUT, HOST_TO_DEVICE, DEVICE_WAIT_HOST});
|
||||||
|
add_event(kOutput, {DEVICE_TO_HOST});
|
||||||
|
add_event(kCompile, {HOST_COMPILE});
|
||||||
|
add_event(kAllOthers, {UNKNOWN_TIME});
|
||||||
|
|
||||||
std::sort(metrics.begin(), metrics.end());
|
std::sort(metrics.begin(), metrics.end());
|
||||||
record.set_bottleneck(metrics.back().second);
|
record.set_bottleneck(metrics.back().second.data(),
|
||||||
|
metrics.back().second.size());
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,25 +71,14 @@ PodStatsRecord CreatePodStatsRecord(absl::string_view host_name,
|
|||||||
|
|
||||||
PodStatsDatabase ConvertOpStatsToPodStats(const OpStats& op_stats) {
|
PodStatsDatabase ConvertOpStatsToPodStats(const OpStats& op_stats) {
|
||||||
PodStatsDatabase pod_stats_db;
|
PodStatsDatabase pod_stats_db;
|
||||||
auto add_event = [&pod_stats_db](EventType type) {
|
for (int i = GenericEventType::kFirstGenericEventType;
|
||||||
StepBreakdownEvents* event = pod_stats_db.add_step_breakdown_events();
|
i <= GenericEventType::kLastGenericEventType; i++) {
|
||||||
event->set_id(type);
|
auto& event = *pod_stats_db.add_step_breakdown_events();
|
||||||
event->set_name(PrintEventTypeLabel(type));
|
event.set_id(i);
|
||||||
};
|
absl::string_view type_str =
|
||||||
add_event(HOST_COMPUTE);
|
GetGenericEventTypeStr(static_cast<GenericEventType>(i));
|
||||||
add_event(HOST_COMPILE);
|
event.set_name(type_str.data(), type_str.size());
|
||||||
add_event(HOST_TO_HOST);
|
}
|
||||||
add_event(HOST_TO_DEVICE);
|
|
||||||
add_event(HOST_PREPARE);
|
|
||||||
add_event(DEVICE_COLLECTIVES);
|
|
||||||
add_event(HOST_WAIT_INPUT);
|
|
||||||
add_event(DEVICE_TO_DEVICE);
|
|
||||||
add_event(DEVICE_TO_HOST);
|
|
||||||
add_event(DEVICE_COMPUTE_32);
|
|
||||||
add_event(DEVICE_COMPUTE_16);
|
|
||||||
add_event(DEVICE_WAIT_DEVICE);
|
|
||||||
add_event(DEVICE_WAIT_HOST);
|
|
||||||
add_event(UNKNOWN_TIME);
|
|
||||||
|
|
||||||
for (const auto& step_sequence : op_stats.step_db().step_sequence()) {
|
for (const auto& step_sequence : op_stats.step_db().step_sequence()) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -35,9 +35,10 @@ const double kMaxError = 1e-6;
|
|||||||
constexpr int kStepNum = 2;
|
constexpr int kStepNum = 2;
|
||||||
constexpr int kCoreId = 1;
|
constexpr int kCoreId = 1;
|
||||||
constexpr int kStepTimePs = 1000;
|
constexpr int kStepTimePs = 1000;
|
||||||
constexpr int kHostComputePs = 100;
|
constexpr int kHostComputePs = 50;
|
||||||
constexpr int kHostCompilePs = 50;
|
constexpr int kHostCompilePs = 50;
|
||||||
constexpr int kHostToHostPs = 50;
|
constexpr int kHostToHostPs = 50;
|
||||||
|
constexpr int kHostToDevicePs = 50;
|
||||||
constexpr int kHostPreparePs = 50;
|
constexpr int kHostPreparePs = 50;
|
||||||
constexpr int kDeviceCollectivePs = 350;
|
constexpr int kDeviceCollectivePs = 350;
|
||||||
constexpr int kHostWaitInputPs = 50;
|
constexpr int kHostWaitInputPs = 50;
|
||||||
@ -60,6 +61,7 @@ void CreateOpStats(OpStats* op_stats) {
|
|||||||
type_ps[HOST_COMPUTE] = kHostComputePs;
|
type_ps[HOST_COMPUTE] = kHostComputePs;
|
||||||
type_ps[HOST_COMPILE] = kHostCompilePs;
|
type_ps[HOST_COMPILE] = kHostCompilePs;
|
||||||
type_ps[HOST_TO_HOST] = kHostToHostPs;
|
type_ps[HOST_TO_HOST] = kHostToHostPs;
|
||||||
|
type_ps[HOST_TO_DEVICE] = kHostToDevicePs;
|
||||||
type_ps[HOST_PREPARE] = kHostPreparePs;
|
type_ps[HOST_PREPARE] = kHostPreparePs;
|
||||||
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
|
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
|
||||||
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
|
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
|
||||||
@ -83,34 +85,25 @@ TEST(OpStatsToPodStats, GpuPodStats) {
|
|||||||
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
|
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
|
||||||
kMaxError);
|
kMaxError);
|
||||||
const auto& breakdown = record.step_breakdown_us();
|
const auto& breakdown = record.step_breakdown_us();
|
||||||
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(HOST_COMPUTE),
|
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps + kDeviceCompute16Ps),
|
||||||
kMaxError);
|
breakdown.at(kDeviceCompute), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(HOST_COMPILE),
|
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs + kDeviceWaitDevicePs),
|
||||||
kMaxError);
|
breakdown.at(kDeviceToDevice), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kHostToHostPs), breakdown.at(HOST_TO_HOST),
|
|
||||||
kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(HOST_PREPARE),
|
|
||||||
kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
|
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
|
||||||
breakdown.at(DEVICE_COLLECTIVES), kMaxError);
|
breakdown.at(kDeviceCollectives), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kHostWaitInputPs), breakdown.at(HOST_WAIT_INPUT),
|
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(kHostCompute),
|
||||||
kMaxError);
|
kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs), breakdown.at(DEVICE_TO_DEVICE),
|
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(kHostPrepare),
|
||||||
kMaxError);
|
kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(DEVICE_TO_HOST),
|
EXPECT_NEAR(
|
||||||
kMaxError);
|
PicosToMicros(kHostWaitInputPs + kHostToDevicePs + kDeviceWaitHostPs),
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps),
|
breakdown.at(kInput), kMaxError);
|
||||||
breakdown.at(DEVICE_COMPUTE_32), kMaxError);
|
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(kOutput), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceCompute16Ps),
|
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(kCompile), kMaxError);
|
||||||
breakdown.at(DEVICE_COMPUTE_16), kMaxError);
|
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(kAllOthers),
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceWaitDevicePs),
|
|
||||||
breakdown.at(DEVICE_WAIT_DEVICE), kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceWaitHostPs), breakdown.at(DEVICE_WAIT_HOST),
|
|
||||||
kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(UNKNOWN_TIME),
|
|
||||||
kMaxError);
|
kMaxError);
|
||||||
|
|
||||||
EXPECT_EQ(PrintEventTypeLabel(DEVICE_COLLECTIVES), record.bottleneck());
|
EXPECT_EQ(GetGenericEventTypeStr(kDeviceCollectives), record.bottleneck());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(OpStatsToPodStats, Diagnostics) {
|
TEST(OpStatsToPodStats, Diagnostics) {
|
||||||
|
@ -36,9 +36,10 @@ const double kMaxError = 1e-6;
|
|||||||
constexpr int kStepNum = 2;
|
constexpr int kStepNum = 2;
|
||||||
constexpr int kCoreId = 1;
|
constexpr int kCoreId = 1;
|
||||||
constexpr int kStepTimePs = 1000;
|
constexpr int kStepTimePs = 1000;
|
||||||
constexpr int kHostComputePs = 100;
|
constexpr int kHostComputePs = 50;
|
||||||
constexpr int kHostCompilePs = 50;
|
constexpr int kHostCompilePs = 50;
|
||||||
constexpr int kHostToHostPs = 50;
|
constexpr int kHostToHostPs = 50;
|
||||||
|
constexpr int kHostToDevicePs = 50;
|
||||||
constexpr int kHostPreparePs = 50;
|
constexpr int kHostPreparePs = 50;
|
||||||
constexpr int kDeviceCollectivePs = 350;
|
constexpr int kDeviceCollectivePs = 350;
|
||||||
constexpr int kHostWaitInputPs = 50;
|
constexpr int kHostWaitInputPs = 50;
|
||||||
@ -61,6 +62,7 @@ void CreateOpStats(OpStats* op_stats) {
|
|||||||
type_ps[HOST_COMPUTE] = kHostComputePs;
|
type_ps[HOST_COMPUTE] = kHostComputePs;
|
||||||
type_ps[HOST_COMPILE] = kHostCompilePs;
|
type_ps[HOST_COMPILE] = kHostCompilePs;
|
||||||
type_ps[HOST_TO_HOST] = kHostToHostPs;
|
type_ps[HOST_TO_HOST] = kHostToHostPs;
|
||||||
|
type_ps[HOST_TO_DEVICE] = kHostToDevicePs;
|
||||||
type_ps[HOST_PREPARE] = kHostPreparePs;
|
type_ps[HOST_PREPARE] = kHostPreparePs;
|
||||||
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
|
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
|
||||||
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
|
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
|
||||||
@ -87,34 +89,25 @@ TEST(OpStatsToPodViewer, GpuPodViewer) {
|
|||||||
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
|
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
|
||||||
kMaxError);
|
kMaxError);
|
||||||
const auto& breakdown = record.step_breakdown_us();
|
const auto& breakdown = record.step_breakdown_us();
|
||||||
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(HOST_COMPUTE),
|
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps + kDeviceCompute16Ps),
|
||||||
kMaxError);
|
breakdown.at(kDeviceCompute), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(HOST_COMPILE),
|
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs + kDeviceWaitDevicePs),
|
||||||
kMaxError);
|
breakdown.at(kDeviceToDevice), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kHostToHostPs), breakdown.at(HOST_TO_HOST),
|
|
||||||
kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(HOST_PREPARE),
|
|
||||||
kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
|
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
|
||||||
breakdown.at(DEVICE_COLLECTIVES), kMaxError);
|
breakdown.at(kDeviceCollectives), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kHostWaitInputPs), breakdown.at(HOST_WAIT_INPUT),
|
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(kHostCompute),
|
||||||
kMaxError);
|
kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs), breakdown.at(DEVICE_TO_DEVICE),
|
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(kHostPrepare),
|
||||||
kMaxError);
|
kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(DEVICE_TO_HOST),
|
EXPECT_NEAR(
|
||||||
kMaxError);
|
PicosToMicros(kHostWaitInputPs + kHostToDevicePs + kDeviceWaitHostPs),
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps),
|
breakdown.at(kInput), kMaxError);
|
||||||
breakdown.at(DEVICE_COMPUTE_32), kMaxError);
|
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(kOutput), kMaxError);
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceCompute16Ps),
|
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(kCompile), kMaxError);
|
||||||
breakdown.at(DEVICE_COMPUTE_16), kMaxError);
|
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(kAllOthers),
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceWaitDevicePs),
|
|
||||||
breakdown.at(DEVICE_WAIT_DEVICE), kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kDeviceWaitHostPs), breakdown.at(DEVICE_WAIT_HOST),
|
|
||||||
kMaxError);
|
|
||||||
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(UNKNOWN_TIME),
|
|
||||||
kMaxError);
|
kMaxError);
|
||||||
|
|
||||||
EXPECT_EQ(PrintEventTypeLabel(DEVICE_COLLECTIVES), record.bottleneck());
|
EXPECT_EQ(GetGenericEventTypeStr(kDeviceCollectives), record.bottleneck());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(OpStatsToPodViewer, Diagnostics) {
|
TEST(OpStatsToPodViewer, Diagnostics) {
|
||||||
|
@ -162,8 +162,35 @@ EventType ClassifyDeviceCompute(absl::string_view event_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr int kNumGenericEventTypes = GenericEventType::kLastGenericEventType -
|
||||||
|
GenericEventType::kFirstGenericEventType +
|
||||||
|
1;
|
||||||
|
|
||||||
|
using GenericEventTypeStrMap =
|
||||||
|
absl::flat_hash_map<GenericEventType, absl::string_view>;
|
||||||
|
|
||||||
|
const GenericEventTypeStrMap& GetGenericEventTypeStrMap() {
|
||||||
|
static const auto* generic_event_type_str_map = new GenericEventTypeStrMap({
|
||||||
|
{kDeviceCompute, "Device compute"},
|
||||||
|
{kDeviceToDevice, "Device to device"},
|
||||||
|
{kDeviceCollectives, "Device collective communication"},
|
||||||
|
{kHostCompute, "Host compute"},
|
||||||
|
{kHostPrepare, "Kernel launch"},
|
||||||
|
{kInput, "Input"},
|
||||||
|
{kOutput, "Output"},
|
||||||
|
{kCompile, "Compilation"},
|
||||||
|
{kAllOthers, "All others"},
|
||||||
|
});
|
||||||
|
DCHECK_EQ(generic_event_type_str_map->size(), kNumGenericEventTypes);
|
||||||
|
return *generic_event_type_str_map;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
absl::string_view GetGenericEventTypeStr(GenericEventType event_type) {
|
||||||
|
return GetGenericEventTypeStrMap().at(event_type);
|
||||||
|
}
|
||||||
|
|
||||||
EventType ClassifyGpuEvent(absl::string_view event_name,
|
EventType ClassifyGpuEvent(absl::string_view event_name,
|
||||||
absl::string_view tensor_shapes) {
|
absl::string_view tensor_shapes) {
|
||||||
if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD"))
|
if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD"))
|
||||||
@ -231,42 +258,6 @@ std::string PrintEventType(EventType event_type) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string PrintEventTypeLabel(EventType event_type) {
|
|
||||||
switch (event_type) {
|
|
||||||
case UNKNOWN_TIME:
|
|
||||||
return "Machine idle or unknown events";
|
|
||||||
case HOST_COMPUTE:
|
|
||||||
return "Host compute";
|
|
||||||
case HOST_COMPILE:
|
|
||||||
return "Host compile";
|
|
||||||
case HOST_TO_HOST:
|
|
||||||
return "Host to host";
|
|
||||||
case HOST_TO_DEVICE:
|
|
||||||
return "Host to device";
|
|
||||||
case HOST_PREPARE:
|
|
||||||
return "Host prepare";
|
|
||||||
case DEVICE_COLLECTIVES:
|
|
||||||
return "Device collectives";
|
|
||||||
case HOST_WAIT_INPUT:
|
|
||||||
return "Host wait input";
|
|
||||||
case DEVICE_TO_DEVICE:
|
|
||||||
return "Device to device";
|
|
||||||
case DEVICE_TO_HOST:
|
|
||||||
return "Device to host";
|
|
||||||
case DEVICE_COMPUTE_32:
|
|
||||||
return "Device compute 32-bit";
|
|
||||||
case DEVICE_COMPUTE_16:
|
|
||||||
return "Device compute 16-bit";
|
|
||||||
case DEVICE_WAIT_DEVICE:
|
|
||||||
return "Device wait device";
|
|
||||||
case DEVICE_WAIT_HOST:
|
|
||||||
return "Device wait host";
|
|
||||||
default:
|
|
||||||
DCHECK(false);
|
|
||||||
return "Unknown event type";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) {
|
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) {
|
||||||
return absl::StrCat("(", PrintEventType(event_type_span.type), ", ",
|
return absl::StrCat("(", PrintEventType(event_type_span.type), ", ",
|
||||||
event_type_span.span.DebugString(), ")");
|
event_type_span.span.DebugString(), ")");
|
||||||
|
@ -68,6 +68,30 @@ enum EventType {
|
|||||||
LAST_EVENT_TYPE = DEVICE_WAIT_HOST
|
LAST_EVENT_TYPE = DEVICE_WAIT_HOST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Generic event types that shown to the user.
|
||||||
|
enum GenericEventType {
|
||||||
|
kFirstGenericEventType = 1,
|
||||||
|
// Device is computing.
|
||||||
|
kDeviceCompute = kFirstGenericEventType,
|
||||||
|
// Device-to-device communication.
|
||||||
|
kDeviceToDevice,
|
||||||
|
// Collective Ops such as All-Reduce and NCCL.
|
||||||
|
kDeviceCollectives,
|
||||||
|
// Host is computing.
|
||||||
|
kHostCompute,
|
||||||
|
// Host is preparing to launch a computation on device.
|
||||||
|
kHostPrepare,
|
||||||
|
// Device waiting for input from the host.
|
||||||
|
kInput,
|
||||||
|
// Device sending output to the host.
|
||||||
|
kOutput,
|
||||||
|
// Host is compling.
|
||||||
|
kCompile,
|
||||||
|
// No recognized event associated with the time.
|
||||||
|
kAllOthers,
|
||||||
|
kLastGenericEventType = kAllOthers,
|
||||||
|
};
|
||||||
|
|
||||||
// Contains the type and timespan of an event.
|
// Contains the type and timespan of an event.
|
||||||
struct EventTypeSpan {
|
struct EventTypeSpan {
|
||||||
EventType type; // type of this event.
|
EventType type; // type of this event.
|
||||||
@ -197,8 +221,8 @@ EventType ClassifyGpuEvent(absl::string_view event_name,
|
|||||||
// Returns the name of the given EventType.
|
// Returns the name of the given EventType.
|
||||||
std::string PrintEventType(EventType event_type);
|
std::string PrintEventType(EventType event_type);
|
||||||
|
|
||||||
// Returns the label of the given EventType.
|
// Returns the string of the given GenericEventType.
|
||||||
std::string PrintEventTypeLabel(EventType event_type);
|
absl::string_view GetGenericEventTypeStr(GenericEventType event_type);
|
||||||
|
|
||||||
// Returns a string that prints the given EventTypeSpan.
|
// Returns a string that prints the given EventTypeSpan.
|
||||||
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);
|
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user