Make step breakdown the same with overview page.

PiperOrigin-RevId: 336917686
Change-Id: If5daa078f8696e9e2a80ed62685605a2bed6495c
This commit is contained in:
A. Unique TensorFlower 2020-10-13 11:23:24 -07:00 committed by TensorFlower Gardener
parent c8a9751c55
commit 9031396802
6 changed files with 121 additions and 112 deletions

View File

@ -129,6 +129,7 @@ cc_library(
copts = tf_profiler_copts(), copts = tf_profiler_copts(),
deps = [ deps = [
"//tensorflow/core:lib", "//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:pod_stats_proto_cc", "//tensorflow/core/profiler/protobuf:pod_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:steps_db_proto_cc",

View File

@ -17,6 +17,7 @@ limitations under the License.
#include "google/protobuf/any.pb.h" #include "google/protobuf/any.pb.h"
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/diagnostics.h"
@ -38,14 +39,31 @@ PodStatsRecord CreatePodStatsRecord(absl::string_view host_name,
record.set_step_num(step_info.step_num()); record.set_step_num(step_info.step_num());
record.set_total_duration_us(PicosToMicros(step_info.duration_ps())); record.set_total_duration_us(PicosToMicros(step_info.duration_ps()));
auto& step_breakdown_map = *record.mutable_step_breakdown_us(); auto& step_breakdown_map = *record.mutable_step_breakdown_us();
std::vector<std::pair<uint64, std::string>> metrics; std::vector<std::pair<uint64, absl::string_view>> metrics;
for (const auto& entry : generic.type_ps()) {
step_breakdown_map[entry.first] = PicosToMicros(entry.second); auto add_event = [&](GenericEventType type,
metrics.emplace_back( std::initializer_list<EventType> event_list) {
entry.second, PrintEventTypeLabel(static_cast<EventType>(entry.first))); uint64 ps = 0;
} for (const auto& event_type : event_list) {
ps += gtl::FindWithDefault(generic.type_ps(), event_type, /*value=*/0);
}
step_breakdown_map[type] = PicosToMicros(ps);
metrics.emplace_back(ps, GetGenericEventTypeStr(type));
};
add_event(kDeviceCompute, {DEVICE_COMPUTE_32, DEVICE_COMPUTE_16});
add_event(kDeviceToDevice, {DEVICE_TO_DEVICE, DEVICE_WAIT_DEVICE});
add_event(kDeviceCollectives, {DEVICE_COLLECTIVES});
add_event(kHostCompute, {HOST_COMPUTE});
add_event(kHostPrepare, {HOST_PREPARE});
add_event(kInput, {HOST_WAIT_INPUT, HOST_TO_DEVICE, DEVICE_WAIT_HOST});
add_event(kOutput, {DEVICE_TO_HOST});
add_event(kCompile, {HOST_COMPILE});
add_event(kAllOthers, {UNKNOWN_TIME});
std::sort(metrics.begin(), metrics.end()); std::sort(metrics.begin(), metrics.end());
record.set_bottleneck(metrics.back().second); record.set_bottleneck(metrics.back().second.data(),
metrics.back().second.size());
return record; return record;
} }
@ -53,25 +71,14 @@ PodStatsRecord CreatePodStatsRecord(absl::string_view host_name,
PodStatsDatabase ConvertOpStatsToPodStats(const OpStats& op_stats) { PodStatsDatabase ConvertOpStatsToPodStats(const OpStats& op_stats) {
PodStatsDatabase pod_stats_db; PodStatsDatabase pod_stats_db;
auto add_event = [&pod_stats_db](EventType type) { for (int i = GenericEventType::kFirstGenericEventType;
StepBreakdownEvents* event = pod_stats_db.add_step_breakdown_events(); i <= GenericEventType::kLastGenericEventType; i++) {
event->set_id(type); auto& event = *pod_stats_db.add_step_breakdown_events();
event->set_name(PrintEventTypeLabel(type)); event.set_id(i);
}; absl::string_view type_str =
add_event(HOST_COMPUTE); GetGenericEventTypeStr(static_cast<GenericEventType>(i));
add_event(HOST_COMPILE); event.set_name(type_str.data(), type_str.size());
add_event(HOST_TO_HOST); }
add_event(HOST_TO_DEVICE);
add_event(HOST_PREPARE);
add_event(DEVICE_COLLECTIVES);
add_event(HOST_WAIT_INPUT);
add_event(DEVICE_TO_DEVICE);
add_event(DEVICE_TO_HOST);
add_event(DEVICE_COMPUTE_32);
add_event(DEVICE_COMPUTE_16);
add_event(DEVICE_WAIT_DEVICE);
add_event(DEVICE_WAIT_HOST);
add_event(UNKNOWN_TIME);
for (const auto& step_sequence : op_stats.step_db().step_sequence()) { for (const auto& step_sequence : op_stats.step_db().step_sequence()) {
int count = 0; int count = 0;

View File

@ -35,9 +35,10 @@ const double kMaxError = 1e-6;
constexpr int kStepNum = 2; constexpr int kStepNum = 2;
constexpr int kCoreId = 1; constexpr int kCoreId = 1;
constexpr int kStepTimePs = 1000; constexpr int kStepTimePs = 1000;
constexpr int kHostComputePs = 100; constexpr int kHostComputePs = 50;
constexpr int kHostCompilePs = 50; constexpr int kHostCompilePs = 50;
constexpr int kHostToHostPs = 50; constexpr int kHostToHostPs = 50;
constexpr int kHostToDevicePs = 50;
constexpr int kHostPreparePs = 50; constexpr int kHostPreparePs = 50;
constexpr int kDeviceCollectivePs = 350; constexpr int kDeviceCollectivePs = 350;
constexpr int kHostWaitInputPs = 50; constexpr int kHostWaitInputPs = 50;
@ -60,6 +61,7 @@ void CreateOpStats(OpStats* op_stats) {
type_ps[HOST_COMPUTE] = kHostComputePs; type_ps[HOST_COMPUTE] = kHostComputePs;
type_ps[HOST_COMPILE] = kHostCompilePs; type_ps[HOST_COMPILE] = kHostCompilePs;
type_ps[HOST_TO_HOST] = kHostToHostPs; type_ps[HOST_TO_HOST] = kHostToHostPs;
type_ps[HOST_TO_DEVICE] = kHostToDevicePs;
type_ps[HOST_PREPARE] = kHostPreparePs; type_ps[HOST_PREPARE] = kHostPreparePs;
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs; type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs; type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
@ -83,34 +85,25 @@ TEST(OpStatsToPodStats, GpuPodStats) {
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(), EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
kMaxError); kMaxError);
const auto& breakdown = record.step_breakdown_us(); const auto& breakdown = record.step_breakdown_us();
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(HOST_COMPUTE), EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps + kDeviceCompute16Ps),
kMaxError); breakdown.at(kDeviceCompute), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(HOST_COMPILE), EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs + kDeviceWaitDevicePs),
kMaxError); breakdown.at(kDeviceToDevice), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostToHostPs), breakdown.at(HOST_TO_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(HOST_PREPARE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs), EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
breakdown.at(DEVICE_COLLECTIVES), kMaxError); breakdown.at(kDeviceCollectives), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostWaitInputPs), breakdown.at(HOST_WAIT_INPUT), EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(kHostCompute),
kMaxError); kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs), breakdown.at(DEVICE_TO_DEVICE), EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(kHostPrepare),
kMaxError); kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(DEVICE_TO_HOST), EXPECT_NEAR(
kMaxError); PicosToMicros(kHostWaitInputPs + kHostToDevicePs + kDeviceWaitHostPs),
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps), breakdown.at(kInput), kMaxError);
breakdown.at(DEVICE_COMPUTE_32), kMaxError); EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(kOutput), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute16Ps), EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(kCompile), kMaxError);
breakdown.at(DEVICE_COMPUTE_16), kMaxError); EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(kAllOthers),
EXPECT_NEAR(PicosToMicros(kDeviceWaitDevicePs),
breakdown.at(DEVICE_WAIT_DEVICE), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceWaitHostPs), breakdown.at(DEVICE_WAIT_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(UNKNOWN_TIME),
kMaxError); kMaxError);
EXPECT_EQ(PrintEventTypeLabel(DEVICE_COLLECTIVES), record.bottleneck()); EXPECT_EQ(GetGenericEventTypeStr(kDeviceCollectives), record.bottleneck());
} }
TEST(OpStatsToPodStats, Diagnostics) { TEST(OpStatsToPodStats, Diagnostics) {

View File

@ -36,9 +36,10 @@ const double kMaxError = 1e-6;
constexpr int kStepNum = 2; constexpr int kStepNum = 2;
constexpr int kCoreId = 1; constexpr int kCoreId = 1;
constexpr int kStepTimePs = 1000; constexpr int kStepTimePs = 1000;
constexpr int kHostComputePs = 100; constexpr int kHostComputePs = 50;
constexpr int kHostCompilePs = 50; constexpr int kHostCompilePs = 50;
constexpr int kHostToHostPs = 50; constexpr int kHostToHostPs = 50;
constexpr int kHostToDevicePs = 50;
constexpr int kHostPreparePs = 50; constexpr int kHostPreparePs = 50;
constexpr int kDeviceCollectivePs = 350; constexpr int kDeviceCollectivePs = 350;
constexpr int kHostWaitInputPs = 50; constexpr int kHostWaitInputPs = 50;
@ -61,6 +62,7 @@ void CreateOpStats(OpStats* op_stats) {
type_ps[HOST_COMPUTE] = kHostComputePs; type_ps[HOST_COMPUTE] = kHostComputePs;
type_ps[HOST_COMPILE] = kHostCompilePs; type_ps[HOST_COMPILE] = kHostCompilePs;
type_ps[HOST_TO_HOST] = kHostToHostPs; type_ps[HOST_TO_HOST] = kHostToHostPs;
type_ps[HOST_TO_DEVICE] = kHostToDevicePs;
type_ps[HOST_PREPARE] = kHostPreparePs; type_ps[HOST_PREPARE] = kHostPreparePs;
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs; type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs; type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
@ -87,34 +89,25 @@ TEST(OpStatsToPodViewer, GpuPodViewer) {
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(), EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
kMaxError); kMaxError);
const auto& breakdown = record.step_breakdown_us(); const auto& breakdown = record.step_breakdown_us();
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(HOST_COMPUTE), EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps + kDeviceCompute16Ps),
kMaxError); breakdown.at(kDeviceCompute), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(HOST_COMPILE), EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs + kDeviceWaitDevicePs),
kMaxError); breakdown.at(kDeviceToDevice), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostToHostPs), breakdown.at(HOST_TO_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(HOST_PREPARE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs), EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
breakdown.at(DEVICE_COLLECTIVES), kMaxError); breakdown.at(kDeviceCollectives), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostWaitInputPs), breakdown.at(HOST_WAIT_INPUT), EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(kHostCompute),
kMaxError); kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs), breakdown.at(DEVICE_TO_DEVICE), EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(kHostPrepare),
kMaxError); kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(DEVICE_TO_HOST), EXPECT_NEAR(
kMaxError); PicosToMicros(kHostWaitInputPs + kHostToDevicePs + kDeviceWaitHostPs),
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps), breakdown.at(kInput), kMaxError);
breakdown.at(DEVICE_COMPUTE_32), kMaxError); EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(kOutput), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute16Ps), EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(kCompile), kMaxError);
breakdown.at(DEVICE_COMPUTE_16), kMaxError); EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(kAllOthers),
EXPECT_NEAR(PicosToMicros(kDeviceWaitDevicePs),
breakdown.at(DEVICE_WAIT_DEVICE), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceWaitHostPs), breakdown.at(DEVICE_WAIT_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(UNKNOWN_TIME),
kMaxError); kMaxError);
EXPECT_EQ(PrintEventTypeLabel(DEVICE_COLLECTIVES), record.bottleneck()); EXPECT_EQ(GetGenericEventTypeStr(kDeviceCollectives), record.bottleneck());
} }
TEST(OpStatsToPodViewer, Diagnostics) { TEST(OpStatsToPodViewer, Diagnostics) {

View File

@ -162,8 +162,35 @@ EventType ClassifyDeviceCompute(absl::string_view event_name,
} }
} }
constexpr int kNumGenericEventTypes = GenericEventType::kLastGenericEventType -
GenericEventType::kFirstGenericEventType +
1;
using GenericEventTypeStrMap =
absl::flat_hash_map<GenericEventType, absl::string_view>;
const GenericEventTypeStrMap& GetGenericEventTypeStrMap() {
static const auto* generic_event_type_str_map = new GenericEventTypeStrMap({
{kDeviceCompute, "Device compute"},
{kDeviceToDevice, "Device to device"},
{kDeviceCollectives, "Device collective communication"},
{kHostCompute, "Host compute"},
{kHostPrepare, "Kernel launch"},
{kInput, "Input"},
{kOutput, "Output"},
{kCompile, "Compilation"},
{kAllOthers, "All others"},
});
DCHECK_EQ(generic_event_type_str_map->size(), kNumGenericEventTypes);
return *generic_event_type_str_map;
}
} // namespace } // namespace
absl::string_view GetGenericEventTypeStr(GenericEventType event_type) {
return GetGenericEventTypeStrMap().at(event_type);
}
EventType ClassifyGpuEvent(absl::string_view event_name, EventType ClassifyGpuEvent(absl::string_view event_name,
absl::string_view tensor_shapes) { absl::string_view tensor_shapes) {
if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD")) if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD"))
@ -231,42 +258,6 @@ std::string PrintEventType(EventType event_type) {
} }
} }
std::string PrintEventTypeLabel(EventType event_type) {
switch (event_type) {
case UNKNOWN_TIME:
return "Machine idle or unknown events";
case HOST_COMPUTE:
return "Host compute";
case HOST_COMPILE:
return "Host compile";
case HOST_TO_HOST:
return "Host to host";
case HOST_TO_DEVICE:
return "Host to device";
case HOST_PREPARE:
return "Host prepare";
case DEVICE_COLLECTIVES:
return "Device collectives";
case HOST_WAIT_INPUT:
return "Host wait input";
case DEVICE_TO_DEVICE:
return "Device to device";
case DEVICE_TO_HOST:
return "Device to host";
case DEVICE_COMPUTE_32:
return "Device compute 32-bit";
case DEVICE_COMPUTE_16:
return "Device compute 16-bit";
case DEVICE_WAIT_DEVICE:
return "Device wait device";
case DEVICE_WAIT_HOST:
return "Device wait host";
default:
DCHECK(false);
return "Unknown event type";
}
}
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) { std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) {
return absl::StrCat("(", PrintEventType(event_type_span.type), ", ", return absl::StrCat("(", PrintEventType(event_type_span.type), ", ",
event_type_span.span.DebugString(), ")"); event_type_span.span.DebugString(), ")");

View File

@ -68,6 +68,30 @@ enum EventType {
LAST_EVENT_TYPE = DEVICE_WAIT_HOST LAST_EVENT_TYPE = DEVICE_WAIT_HOST
}; };
// Generic event types that shown to the user.
enum GenericEventType {
kFirstGenericEventType = 1,
// Device is computing.
kDeviceCompute = kFirstGenericEventType,
// Device-to-device communication.
kDeviceToDevice,
// Collective Ops such as All-Reduce and NCCL.
kDeviceCollectives,
// Host is computing.
kHostCompute,
// Host is preparing to launch a computation on device.
kHostPrepare,
// Device waiting for input from the host.
kInput,
// Device sending output to the host.
kOutput,
// Host is compling.
kCompile,
// No recognized event associated with the time.
kAllOthers,
kLastGenericEventType = kAllOthers,
};
// Contains the type and timespan of an event. // Contains the type and timespan of an event.
struct EventTypeSpan { struct EventTypeSpan {
EventType type; // type of this event. EventType type; // type of this event.
@ -197,8 +221,8 @@ EventType ClassifyGpuEvent(absl::string_view event_name,
// Returns the name of the given EventType. // Returns the name of the given EventType.
std::string PrintEventType(EventType event_type); std::string PrintEventType(EventType event_type);
// Returns the label of the given EventType. // Returns the string of the given GenericEventType.
std::string PrintEventTypeLabel(EventType event_type); absl::string_view GetGenericEventTypeStr(GenericEventType event_type);
// Returns a string that prints the given EventTypeSpan. // Returns a string that prints the given EventTypeSpan.
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span); std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);