Make step breakdown the same with overview page.

PiperOrigin-RevId: 336917686
Change-Id: If5daa078f8696e9e2a80ed62685605a2bed6495c
This commit is contained in:
A. Unique TensorFlower 2020-10-13 11:23:24 -07:00 committed by TensorFlower Gardener
parent c8a9751c55
commit 9031396802
6 changed files with 121 additions and 112 deletions

View File

@ -129,6 +129,7 @@ cc_library(
copts = tf_profiler_copts(),
deps = [
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:pod_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:steps_db_proto_cc",

View File

@ -17,6 +17,7 @@ limitations under the License.
#include "google/protobuf/any.pb.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/diagnostics.h"
@ -38,14 +39,31 @@ PodStatsRecord CreatePodStatsRecord(absl::string_view host_name,
record.set_step_num(step_info.step_num());
record.set_total_duration_us(PicosToMicros(step_info.duration_ps()));
auto& step_breakdown_map = *record.mutable_step_breakdown_us();
std::vector<std::pair<uint64, std::string>> metrics;
for (const auto& entry : generic.type_ps()) {
step_breakdown_map[entry.first] = PicosToMicros(entry.second);
metrics.emplace_back(
entry.second, PrintEventTypeLabel(static_cast<EventType>(entry.first)));
}
std::vector<std::pair<uint64, absl::string_view>> metrics;
auto add_event = [&](GenericEventType type,
std::initializer_list<EventType> event_list) {
uint64 ps = 0;
for (const auto& event_type : event_list) {
ps += gtl::FindWithDefault(generic.type_ps(), event_type, /*value=*/0);
}
step_breakdown_map[type] = PicosToMicros(ps);
metrics.emplace_back(ps, GetGenericEventTypeStr(type));
};
add_event(kDeviceCompute, {DEVICE_COMPUTE_32, DEVICE_COMPUTE_16});
add_event(kDeviceToDevice, {DEVICE_TO_DEVICE, DEVICE_WAIT_DEVICE});
add_event(kDeviceCollectives, {DEVICE_COLLECTIVES});
add_event(kHostCompute, {HOST_COMPUTE});
add_event(kHostPrepare, {HOST_PREPARE});
add_event(kInput, {HOST_WAIT_INPUT, HOST_TO_DEVICE, DEVICE_WAIT_HOST});
add_event(kOutput, {DEVICE_TO_HOST});
add_event(kCompile, {HOST_COMPILE});
add_event(kAllOthers, {UNKNOWN_TIME});
std::sort(metrics.begin(), metrics.end());
record.set_bottleneck(metrics.back().second);
record.set_bottleneck(metrics.back().second.data(),
metrics.back().second.size());
return record;
}
@ -53,25 +71,14 @@ PodStatsRecord CreatePodStatsRecord(absl::string_view host_name,
PodStatsDatabase ConvertOpStatsToPodStats(const OpStats& op_stats) {
PodStatsDatabase pod_stats_db;
auto add_event = [&pod_stats_db](EventType type) {
StepBreakdownEvents* event = pod_stats_db.add_step_breakdown_events();
event->set_id(type);
event->set_name(PrintEventTypeLabel(type));
};
add_event(HOST_COMPUTE);
add_event(HOST_COMPILE);
add_event(HOST_TO_HOST);
add_event(HOST_TO_DEVICE);
add_event(HOST_PREPARE);
add_event(DEVICE_COLLECTIVES);
add_event(HOST_WAIT_INPUT);
add_event(DEVICE_TO_DEVICE);
add_event(DEVICE_TO_HOST);
add_event(DEVICE_COMPUTE_32);
add_event(DEVICE_COMPUTE_16);
add_event(DEVICE_WAIT_DEVICE);
add_event(DEVICE_WAIT_HOST);
add_event(UNKNOWN_TIME);
for (int i = GenericEventType::kFirstGenericEventType;
i <= GenericEventType::kLastGenericEventType; i++) {
auto& event = *pod_stats_db.add_step_breakdown_events();
event.set_id(i);
absl::string_view type_str =
GetGenericEventTypeStr(static_cast<GenericEventType>(i));
event.set_name(type_str.data(), type_str.size());
}
for (const auto& step_sequence : op_stats.step_db().step_sequence()) {
int count = 0;

View File

@ -35,9 +35,10 @@ const double kMaxError = 1e-6;
constexpr int kStepNum = 2;
constexpr int kCoreId = 1;
constexpr int kStepTimePs = 1000;
constexpr int kHostComputePs = 100;
constexpr int kHostComputePs = 50;
constexpr int kHostCompilePs = 50;
constexpr int kHostToHostPs = 50;
constexpr int kHostToDevicePs = 50;
constexpr int kHostPreparePs = 50;
constexpr int kDeviceCollectivePs = 350;
constexpr int kHostWaitInputPs = 50;
@ -60,6 +61,7 @@ void CreateOpStats(OpStats* op_stats) {
type_ps[HOST_COMPUTE] = kHostComputePs;
type_ps[HOST_COMPILE] = kHostCompilePs;
type_ps[HOST_TO_HOST] = kHostToHostPs;
type_ps[HOST_TO_DEVICE] = kHostToDevicePs;
type_ps[HOST_PREPARE] = kHostPreparePs;
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
@ -83,34 +85,25 @@ TEST(OpStatsToPodStats, GpuPodStats) {
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
kMaxError);
const auto& breakdown = record.step_breakdown_us();
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(HOST_COMPUTE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(HOST_COMPILE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostToHostPs), breakdown.at(HOST_TO_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(HOST_PREPARE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps + kDeviceCompute16Ps),
breakdown.at(kDeviceCompute), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs + kDeviceWaitDevicePs),
breakdown.at(kDeviceToDevice), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
breakdown.at(DEVICE_COLLECTIVES), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostWaitInputPs), breakdown.at(HOST_WAIT_INPUT),
breakdown.at(kDeviceCollectives), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(kHostCompute),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs), breakdown.at(DEVICE_TO_DEVICE),
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(kHostPrepare),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(DEVICE_TO_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps),
breakdown.at(DEVICE_COMPUTE_32), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute16Ps),
breakdown.at(DEVICE_COMPUTE_16), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceWaitDevicePs),
breakdown.at(DEVICE_WAIT_DEVICE), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceWaitHostPs), breakdown.at(DEVICE_WAIT_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(UNKNOWN_TIME),
EXPECT_NEAR(
PicosToMicros(kHostWaitInputPs + kHostToDevicePs + kDeviceWaitHostPs),
breakdown.at(kInput), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(kOutput), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(kCompile), kMaxError);
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(kAllOthers),
kMaxError);
EXPECT_EQ(PrintEventTypeLabel(DEVICE_COLLECTIVES), record.bottleneck());
EXPECT_EQ(GetGenericEventTypeStr(kDeviceCollectives), record.bottleneck());
}
TEST(OpStatsToPodStats, Diagnostics) {

View File

@ -36,9 +36,10 @@ const double kMaxError = 1e-6;
constexpr int kStepNum = 2;
constexpr int kCoreId = 1;
constexpr int kStepTimePs = 1000;
constexpr int kHostComputePs = 100;
constexpr int kHostComputePs = 50;
constexpr int kHostCompilePs = 50;
constexpr int kHostToHostPs = 50;
constexpr int kHostToDevicePs = 50;
constexpr int kHostPreparePs = 50;
constexpr int kDeviceCollectivePs = 350;
constexpr int kHostWaitInputPs = 50;
@ -61,6 +62,7 @@ void CreateOpStats(OpStats* op_stats) {
type_ps[HOST_COMPUTE] = kHostComputePs;
type_ps[HOST_COMPILE] = kHostCompilePs;
type_ps[HOST_TO_HOST] = kHostToHostPs;
type_ps[HOST_TO_DEVICE] = kHostToDevicePs;
type_ps[HOST_PREPARE] = kHostPreparePs;
type_ps[DEVICE_COLLECTIVES] = kDeviceCollectivePs;
type_ps[HOST_WAIT_INPUT] = kHostWaitInputPs;
@ -87,34 +89,25 @@ TEST(OpStatsToPodViewer, GpuPodViewer) {
EXPECT_NEAR(PicosToMicros(kStepTimePs), record.total_duration_us(),
kMaxError);
const auto& breakdown = record.step_breakdown_us();
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(HOST_COMPUTE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(HOST_COMPILE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostToHostPs), breakdown.at(HOST_TO_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(HOST_PREPARE),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps + kDeviceCompute16Ps),
breakdown.at(kDeviceCompute), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs + kDeviceWaitDevicePs),
breakdown.at(kDeviceToDevice), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCollectivePs),
breakdown.at(DEVICE_COLLECTIVES), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostWaitInputPs), breakdown.at(HOST_WAIT_INPUT),
breakdown.at(kDeviceCollectives), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostComputePs), breakdown.at(kHostCompute),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToDevicePs), breakdown.at(DEVICE_TO_DEVICE),
EXPECT_NEAR(PicosToMicros(kHostPreparePs), breakdown.at(kHostPrepare),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(DEVICE_TO_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute32Ps),
breakdown.at(DEVICE_COMPUTE_32), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceCompute16Ps),
breakdown.at(DEVICE_COMPUTE_16), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceWaitDevicePs),
breakdown.at(DEVICE_WAIT_DEVICE), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceWaitHostPs), breakdown.at(DEVICE_WAIT_HOST),
kMaxError);
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(UNKNOWN_TIME),
EXPECT_NEAR(
PicosToMicros(kHostWaitInputPs + kHostToDevicePs + kDeviceWaitHostPs),
breakdown.at(kInput), kMaxError);
EXPECT_NEAR(PicosToMicros(kDeviceToHostPs), breakdown.at(kOutput), kMaxError);
EXPECT_NEAR(PicosToMicros(kHostCompilePs), breakdown.at(kCompile), kMaxError);
EXPECT_NEAR(PicosToMicros(kUnknownTimePs), breakdown.at(kAllOthers),
kMaxError);
EXPECT_EQ(PrintEventTypeLabel(DEVICE_COLLECTIVES), record.bottleneck());
EXPECT_EQ(GetGenericEventTypeStr(kDeviceCollectives), record.bottleneck());
}
TEST(OpStatsToPodViewer, Diagnostics) {

View File

@ -162,8 +162,35 @@ EventType ClassifyDeviceCompute(absl::string_view event_name,
}
}
constexpr int kNumGenericEventTypes = GenericEventType::kLastGenericEventType -
GenericEventType::kFirstGenericEventType +
1;
using GenericEventTypeStrMap =
absl::flat_hash_map<GenericEventType, absl::string_view>;
const GenericEventTypeStrMap& GetGenericEventTypeStrMap() {
static const auto* generic_event_type_str_map = new GenericEventTypeStrMap({
{kDeviceCompute, "Device compute"},
{kDeviceToDevice, "Device to device"},
{kDeviceCollectives, "Device collective communication"},
{kHostCompute, "Host compute"},
{kHostPrepare, "Kernel launch"},
{kInput, "Input"},
{kOutput, "Output"},
{kCompile, "Compilation"},
{kAllOthers, "All others"},
});
DCHECK_EQ(generic_event_type_str_map->size(), kNumGenericEventTypes);
return *generic_event_type_str_map;
}
} // namespace
absl::string_view GetGenericEventTypeStr(GenericEventType event_type) {
return GetGenericEventTypeStrMap().at(event_type);
}
EventType ClassifyGpuEvent(absl::string_view event_name,
absl::string_view tensor_shapes) {
if (absl::StartsWithIgnoreCase(event_name, "MEMCPYHtoD"))
@ -231,42 +258,6 @@ std::string PrintEventType(EventType event_type) {
}
}
std::string PrintEventTypeLabel(EventType event_type) {
switch (event_type) {
case UNKNOWN_TIME:
return "Machine idle or unknown events";
case HOST_COMPUTE:
return "Host compute";
case HOST_COMPILE:
return "Host compile";
case HOST_TO_HOST:
return "Host to host";
case HOST_TO_DEVICE:
return "Host to device";
case HOST_PREPARE:
return "Host prepare";
case DEVICE_COLLECTIVES:
return "Device collectives";
case HOST_WAIT_INPUT:
return "Host wait input";
case DEVICE_TO_DEVICE:
return "Device to device";
case DEVICE_TO_HOST:
return "Device to host";
case DEVICE_COMPUTE_32:
return "Device compute 32-bit";
case DEVICE_COMPUTE_16:
return "Device compute 16-bit";
case DEVICE_WAIT_DEVICE:
return "Device wait device";
case DEVICE_WAIT_HOST:
return "Device wait host";
default:
DCHECK(false);
return "Unknown event type";
}
}
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span) {
return absl::StrCat("(", PrintEventType(event_type_span.type), ", ",
event_type_span.span.DebugString(), ")");

View File

@ -68,6 +68,30 @@ enum EventType {
LAST_EVENT_TYPE = DEVICE_WAIT_HOST
};
// Generic event types that shown to the user.
enum GenericEventType {
kFirstGenericEventType = 1,
// Device is computing.
kDeviceCompute = kFirstGenericEventType,
// Device-to-device communication.
kDeviceToDevice,
// Collective Ops such as All-Reduce and NCCL.
kDeviceCollectives,
// Host is computing.
kHostCompute,
// Host is preparing to launch a computation on device.
kHostPrepare,
// Device waiting for input from the host.
kInput,
// Device sending output to the host.
kOutput,
// Host is compling.
kCompile,
// No recognized event associated with the time.
kAllOthers,
kLastGenericEventType = kAllOthers,
};
// Contains the type and timespan of an event.
struct EventTypeSpan {
EventType type; // type of this event.
@ -197,8 +221,8 @@ EventType ClassifyGpuEvent(absl::string_view event_name,
// Returns the name of the given EventType.
std::string PrintEventType(EventType event_type);
// Returns the label of the given EventType.
std::string PrintEventTypeLabel(EventType event_type);
// Returns the string of the given GenericEventType.
absl::string_view GetGenericEventTypeStr(GenericEventType event_type);
// Returns a string that prints the given EventTypeSpan.
std::string PrintEventTypeSpan(const EventTypeSpan& event_type_span);