Change DerivedXLineBuilder to accept the vector of event metadata and maintain dependency with other lines.

PiperOrigin-RevId: 294781084
Change-Id: Ied1b11a4cdbb33a0b16282b867174e2048fd6904
This commit is contained in:
Jiho Choi 2020-02-12 16:08:18 -08:00 committed by TensorFlower Gardener
parent 52281ba252
commit 76e77cf61c
6 changed files with 271 additions and 60 deletions

View File

@ -282,3 +282,21 @@ cc_library(
"@com_google_absl//absl/strings",
],
)
tf_cc_test(
name = "derived_timeline_test",
srcs = ["derived_timeline_test.cc"],
deps = [
":derived_timeline",
":group_events",
":tf_xplane_visitor",
":trace_utils",
":xplane_builder",
":xplane_schema",
":xplane_utils",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/strings",
],
)

View File

@ -28,50 +28,81 @@ namespace profiler {
namespace {
// Helper for deriving an XLine from events in another XLine.
// Merges consecutive events with the same metadata.
class DerivedXLineBuilder {
public:
DerivedXLineBuilder(XPlaneBuilder* plane, int64 line_id,
absl::string_view name, int64 timestamp_ns)
absl::string_view name, int64 timestamp_ns,
std::vector<DerivedXLineBuilder*> dependent_lines,
bool try_expand)
: line_(plane->GetOrCreateLine(line_id)),
group_id_stats_(plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kGroupId))) {
line_.SetName(std::string(name));
group_id_stats_(
plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kGroupId))),
try_expand_(try_expand) {
line_.SetName(name);
line_.SetTimestampNs(timestamp_ns);
dependent_lines_ = std::move(dependent_lines);
}
// If the last event of the given level has the same metadata and group_id,
// expands it to include the time until (offset_ps + duration_ps). Otherwise,
// adds a new event and clears last_event_by_level_ for the levels below the
// given level. Clearing last_event_by_level_ prevents a nested event from
// growing larger than the parent event(s).
void ExpandOrAddEvent(const XEventMetadata& event_metadata,
const XEventVisitor& event,
absl::optional<int64> group_id, int level = 0) {
int64 offset_ps = event.OffsetPs(), duration_ps = event.DurationPs();
auto& last_event = last_event_by_level_[level];
DCHECK(!last_event || last_event->OffsetPs() <= offset_ps);
if (last_event && last_event->MetadataId() == event_metadata.id() &&
last_group_id_ == group_id) {
last_event->SetDurationPs((offset_ps + duration_ps) -
last_event->OffsetPs());
} else {
last_event = line_.AddEvent(event_metadata);
last_event->SetOffsetPs(offset_ps);
last_event->SetDurationPs(duration_ps);
last_group_id_ = group_id;
if (group_id) last_event->AddStatValue(*group_id_stats_, *group_id);
for (int i = level + 1; i < last_event_by_level_.size(); ++i) {
last_event_by_level_[i] = absl::nullopt;
}
void ExpandOrAddEvents(
const std::vector<const XEventMetadata*>& metadata_per_level,
const XEventVisitor& events, absl::optional<int64> group_id) {
for (int level = 0; level < metadata_per_level.size(); ++level) {
ExpandOrAddLevelEvent(metadata_per_level[level], events, group_id, level);
}
}
// Reset last events lower than the given level.
void ResetLastEvents(int level = -1) {
for (int i = level + 1; i < last_event_by_level_.size(); ++i) {
last_event_by_level_[i] = absl::nullopt;
}
}
private:
// If the last event of the given level has the same metadata and try_expand_
// is true, expands it to include the time until the given event's (offset_ps
// + duration_ps). Otherwise, adds a new event and clears last_event_by_level_
// for the levels below the given level and all levels of the dependent lines.
// Clearing last_event_by_level_ prevents a nested event from growing larger
// than the parent event(s).
void ExpandOrAddLevelEvent(const XEventMetadata* event_metadata,
const XEventVisitor& event,
absl::optional<int64> group_id, int level) {
int64 offset_ps = event.OffsetPs();
int64 duration_ps = event.DurationPs();
auto& last_event = last_event_by_level_[level];
// If last_event is not nullptr, its offset must be less than or equal to
// the given event's offset.
DCHECK(!last_event || last_event->OffsetPs() <= offset_ps);
if (try_expand_ && last_event &&
last_event->MetadataId() == event_metadata->id()) {
// If last_event is not nullptr and metadata is same, merge the given
// event into last_event.
last_event->SetDurationPs((offset_ps + duration_ps) -
last_event->OffsetPs());
} else {
// Otherwise, create a new event for the given level.
last_event = line_.AddEvent(*event_metadata);
last_event->SetOffsetPs(offset_ps);
last_event->SetDurationPs(duration_ps);
if (group_id) last_event->AddStatValue(*group_id_stats_, *group_id);
// Reset last events lower than the given level.
ResetLastEvents(level);
if (level == 0) ResetDependentLines();
}
}
void ResetDependentLines() {
for (DerivedXLineBuilder* line : dependent_lines_) {
line->ResetLastEvents();
}
}
XLineBuilder line_;
absl::flat_hash_map<int, absl::optional<XEventBuilder>> last_event_by_level_;
absl::optional<int64> last_group_id_;
XStatMetadata* group_id_stats_;
std::vector<DerivedXLineBuilder*> dependent_lines_;
bool try_expand_;
};
const absl::string_view kDerivedLineSteps = "Steps";
@ -98,14 +129,16 @@ void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver,
absl::c_sort(events);
XPlaneBuilder plane(device_trace);
DerivedXLineBuilder steps(&plane, kThreadIdStepInfo, kDerivedLineSteps,
start_timestamp_ns);
DerivedXLineBuilder tf_ops(&plane, kThreadIdTfOp, kDerivedLineTensorFlowOps,
start_timestamp_ns);
start_timestamp_ns, {}, /*try_expand=*/true);
DerivedXLineBuilder hlo_ops(&plane, kThreadIdHloOp, kDerivedLineXlaOps,
start_timestamp_ns);
start_timestamp_ns, {}, /*try_expand=*/true);
DerivedXLineBuilder hlo_modules(&plane, kThreadIdHloModule,
kDerivedLineXlaModules, start_timestamp_ns);
kDerivedLineXlaModules, start_timestamp_ns,
{&tf_ops, &hlo_ops}, /*try_expand=*/false);
DerivedXLineBuilder steps(&plane, kThreadIdStepInfo, kDerivedLineSteps,
start_timestamp_ns, {&tf_ops, &hlo_ops},
/*try_expand=*/true);
// Process events in order by start time.
for (const XEventVisitor& event : events) {
@ -130,39 +163,37 @@ void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver,
if (group_id) {
if (auto group_name = gtl::FindOrNull(event_group_name_map, *group_id)) {
steps.ExpandOrAddEvent(*plane.GetOrCreateEventMetadata(*group_name),
event, group_id);
steps.ExpandOrAddEvents({plane.GetOrCreateEventMetadata(*group_name)},
event, group_id);
}
}
if (!is_kernel) {
// For HLO/TF op lines, only use kernel events, (i.e. excluding memcpy or
// allocation events).
continue;
}
// For HLO/TF op lines, only use kernel events (i.e. excluding memcpy or
// allocation events).
if (!is_kernel) continue;
if (!hlo_module_name.empty()) {
hlo_modules.ExpandOrAddEvent(
*plane.GetOrCreateEventMetadata(hlo_module_name), event, group_id);
hlo_modules.ExpandOrAddEvents(
{plane.GetOrCreateEventMetadata(hlo_module_name)}, event, group_id);
}
if (!hlo_op_names.empty()) { // GPU kernel compiled by XLA
DCHECK(!hlo_module_name.empty());
int level = 0;
std::vector<const XEventMetadata*> hlo_op_metadata_per_level;
for (absl::string_view hlo_op_name : hlo_op_names) {
DCHECK(!hlo_op_name.empty());
hlo_ops.ExpandOrAddEvent(*plane.GetOrCreateEventMetadata(hlo_op_name),
event, group_id, level);
++level;
hlo_op_metadata_per_level.push_back(
plane.GetOrCreateEventMetadata(hlo_op_name));
}
hlo_ops.ExpandOrAddEvents(hlo_op_metadata_per_level, event, group_id);
auto tf_op_name = symbol_resolver(hlo_module_name, hlo_op_names.back());
if (!tf_op_name.empty()) {
tf_ops.ExpandOrAddEvent(*plane.GetOrCreateEventMetadata(tf_op_name),
event, group_id);
tf_ops.ExpandOrAddEvents({plane.GetOrCreateEventMetadata(tf_op_name)},
event, group_id);
}
} else if (!tf_op_fullname.empty()) { // GPU kernel not compiled by XLA
tf_ops.ExpandOrAddEvent(*plane.GetOrCreateEventMetadata(tf_op_fullname),
event, group_id);
tf_ops.ExpandOrAddEvents({plane.GetOrCreateEventMetadata(tf_op_fullname)},
event, group_id);
}
}
RemoveEmptyLines(device_trace);

View File

@ -26,10 +26,10 @@ typedef std::function<absl::string_view(absl::string_view hlo_module_name,
absl::string_view hlo_op)>
SymbolResolver;
// Derives "Step Info", "Tensorflow Ops", "HLO Ops" and "HLO Module" lines in
// Derives "Step Info", "Tensorflow Ops", "XLA Ops" and "XLA Module" lines in
// an NVIDIA_GPU device trace from data passed as ScopedAnnotations and stored
// as XStats in XEvents corresponding to GPU Kernels. Consecutive annotations
// with the same value are merged into a single event.
// with the same value are merged into a single event except for XLA modules.
// The device_trace is both input and output.
void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver,
const EventGroupNameMap& event_group_name_map,

View File

@ -0,0 +1,160 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/utils/derived_timeline.h"
#include "absl/strings/match.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/group_events.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/trace_utils.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
namespace tensorflow {
namespace profiler {
namespace {
TEST(DerivedTimelineTest, EmptySpaceTest) {
XSpace space;
EventGroupNameMap event_group_name_map;
GenerateDerivedTimeLines(event_group_name_map, &space);
EXPECT_EQ(space.planes_size(), 0);
}
// Checks that HLO module events are not expanded.
TEST(DerivedTimelineTest, HloModuleNameTest) {
const absl::string_view kHloModuleName = "hlo_module";
const absl::string_view kKernelDetails = "kernel_details";
XSpace space;
EventGroupNameMap event_group_name_map;
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
auto first_event =
CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kHloModule)),
kHloModuleName);
first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kHloModule)),
kHloModuleName);
second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
GenerateDerivedTimeLines(event_group_name_map, &space);
XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane);
// Only the hlo module line is added and other empty lines are removed at the
// end.
EXPECT_EQ(plane_visitor.NumLines(), 2);
plane_visitor.ForEachLine([&](const XLineVisitor& line_visitor) {
if (line_visitor.Id() == 0) return;
EXPECT_EQ(line_visitor.Id(), kThreadIdHloModule);
EXPECT_EQ(line_visitor.NumEvents(), 2);
line_visitor.ForEachEvent([&](const XEventVisitor& event_visitor) {
EXPECT_EQ(event_visitor.Name(), kHloModuleName);
});
});
}
// Checks that the TF op events are expanded.
TEST(DerivedTimelineTest, TfOpLineTest) {
const absl::string_view kTfOpName = "Mul";
const absl::string_view kKernelDetails = "kernel_details";
XSpace space;
EventGroupNameMap event_group_name_map;
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
auto first_event =
CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
first_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
second_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
GenerateDerivedTimeLines(event_group_name_map, &space);
XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane);
// Only the tf op line is added and other empty lines are removed at the end.
EXPECT_EQ(plane_visitor.NumLines(), 2);
plane_visitor.ForEachLine([&](const XLineVisitor& line_visitor) {
if (line_visitor.Id() == 0) return;
EXPECT_EQ(line_visitor.Id(), kThreadIdTfOp);
EXPECT_EQ(line_visitor.NumEvents(), 1);
line_visitor.ForEachEvent([&](const XEventVisitor& event_visitor) {
EXPECT_EQ(event_visitor.Name(), kTfOpName);
EXPECT_EQ(event_visitor.OffsetPs(), 0);
EXPECT_EQ(event_visitor.DurationPs(), 500);
});
});
}
// Checks that the dependency between the step line and the TF op line prevents
// TF op events from being expanded.
TEST(DerivedTimelineTest, DependencyTest) {
const absl::string_view kTfOpName = "Mul";
const absl::string_view kKernelDetails = "kernel_details";
XSpace space;
EventGroupNameMap event_group_name_map({{0, "train 0"}, {1, "train 1"}});
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100,
{{StatType::kGroupId, 0}});
first_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event = CreateXEvent(&plane_builder, &line_builder, "op2", 200,
300, {{StatType::kGroupId, 1}});
second_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
GenerateDerivedTimeLines(event_group_name_map, &space);
XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane);
// The step line and the TF op line are added.
EXPECT_EQ(plane_visitor.NumLines(), 3);
plane_visitor.ForEachLine([&](const XLineVisitor& line_visitor) {
if (line_visitor.Id() == 0) return;
EXPECT_TRUE(line_visitor.Id() == kThreadIdStepInfo ||
line_visitor.Id() == kThreadIdTfOp);
EXPECT_EQ(line_visitor.NumEvents(), 2);
});
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -129,7 +129,7 @@ void AddOrUpdateStrStat(int64 metadata_id, absl::string_view value,
stat->set_str_value(std::string(value));
}
void CreateXEvent(
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
absl::string_view event_name, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats) {
@ -142,14 +142,16 @@ void CreateXEvent(
GetStatTypeStr(stat_type_and_value.first)),
stat_type_and_value.second);
}
return event_builder;
}
void CreateXEvent(
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
HostEventType event_type, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats) {
CreateXEvent(plane_builder, line_builder, GetHostEventTypeStr(event_type),
offset_ps, duration_ps, stats);
return CreateXEvent(plane_builder, line_builder,
GetHostEventTypeStr(event_type), offset_ps, duration_ps,
stats);
}
void RemovePlaneWithName(XSpace* space, absl::string_view name) {

View File

@ -46,12 +46,12 @@ void AddOrUpdateIntStat(int64 metadata_id, int64 value,
void AddOrUpdateStrStat(int64 metadata_id, absl::string_view value,
tensorflow::profiler::XEvent* event);
void CreateXEvent(
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
absl::string_view event_name, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats);
void CreateXEvent(
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
HostEventType event_type, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats);