let device tracer generate a separate "host plane" and try to merge it with cpu tracer's xplane by time normalization.

PiperOrigin-RevId: 293693717
Change-Id: Ia0ffd6b17c46d38a82684cd8326dac77c0f08817
This commit is contained in:
A. Unique TensorFlower 2020-02-06 15:41:16 -08:00 committed by TensorFlower Gardener
parent d5b2c94b0f
commit f0c0b5289d
11 changed files with 319 additions and 17 deletions

View File

@ -55,13 +55,13 @@ bool IsHostEvent(const CuptiTracerEvent& event) {
return event.thread_id != CuptiTracerEvent::kInvalidThreadId;
}
void CreateXEvent(const CuptiTracerEvent& event, uint64 offset_ns,
XPlaneBuilder* plane, XLineBuilder* line) {
void CreateXEvent(const CuptiTracerEvent& event, XPlaneBuilder* plane,
XLineBuilder* line) {
std::string kernel_name = port::MaybeAbiDemangle(event.name.c_str());
XEventMetadata* event_metadata = plane->GetOrCreateEventMetadata(kernel_name);
XEventBuilder xevent = line->AddEvent(*event_metadata);
xevent.SetTimestampNs(event.start_time_ns + offset_ns);
xevent.SetEndTimestampNs(event.end_time_ns + offset_ns);
xevent.SetTimestampNs(event.start_time_ns);
xevent.SetEndTimestampNs(event.end_time_ns);
if (event.correlation_id != CuptiTracerEvent::kInvalidCorrelationId) {
xevent.AddStatValue(*plane->GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kCorrelationId)),
@ -197,17 +197,19 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
LOG(INFO) << " GpuTracer has collected " << num_callback_events_
<< " callback api events and " << num_activity_events_
<< " activity events.";
XPlaneBuilder host_plane(GetOrCreatePlane(space, kHostThreads));
host_plane.SetId(kHostPlaneId);
XPlaneBuilder host_plane(GetOrCreatePlane(space, kCuptiDriverApiPlaneName));
host_plane.SetId(kCuptiDriverApiPlaneId);
for (int device_ordinal = 0; device_ordinal < num_gpus_; ++device_ordinal) {
std::string name = absl::StrCat(kGpuPlanePrefix, device_ordinal);
XPlaneBuilder device_plane(GetOrCreatePlane(space, name));
device_plane.SetId(kGpuPlaneBaseId + device_ordinal);
per_device_collector_[device_ordinal].Flush(
start_walltime_ns_, start_gpu_ns_, &device_plane, &host_plane);
per_device_collector_[device_ordinal].Flush(start_gpu_ns_, &device_plane,
&host_plane);
per_device_collector_[device_ordinal].GetDeviceCapabilities(
device_ordinal, &device_plane);
NormalizeTimeStamps(&device_plane, start_walltime_ns_);
}
NormalizeTimeStamps(&host_plane, start_walltime_ns_);
}
private:
@ -217,6 +219,18 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
uint64 start_gpu_ns_;
int num_gpus_;
// Set the all XLines of specified XPlane to starting walltime.
// Events time in both host and device planes are CUTPI timestamps.
// We set initial CUPTI timestamp as start time for all lines to reflect
// this fact. Eventually we change line start time to corresponding
// start_walltime_ns to normalize with CPU wall time.
static void NormalizeTimeStamps(XPlaneBuilder* plane,
uint64 start_walltime_ns) {
plane->ForEachLine([&](tensorflow::profiler::XLineBuilder line) {
line.SetTimestampNs(start_walltime_ns);
});
}
struct CorrelationInfo {
CorrelationInfo(uint32 t, uint32 e) : thread_id(t), enqueue_time_ns(e) {}
uint32 thread_id;
@ -328,14 +342,13 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
events.clear();
}
void Flush(uint64 start_walltime_ns, uint64 start_gpu_ns,
XPlaneBuilder* device_plane, XPlaneBuilder* host_plane) {
void Flush(uint64 start_gpu_ns, XPlaneBuilder* device_plane,
XPlaneBuilder* host_plane) {
absl::MutexLock lock(&mutex);
// Tracking event types per line.
absl::flat_hash_map<int64, absl::flat_hash_set<CuptiTracerEventType>>
events_types_per_line;
const uint64 offset_ns = start_walltime_ns - start_gpu_ns;
for (auto& event : events) {
bool is_host_event = IsHostEvent(event);
int64 line_id = is_host_event ? static_cast<int64>(event.thread_id)
@ -345,8 +358,8 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
continue;
auto* plane = is_host_event ? host_plane : device_plane;
XLineBuilder line = plane->GetOrCreateLine(line_id);
if (!is_host_event) line.SetTimestampNs(start_gpu_ns);
CreateXEvent(event, offset_ns, plane, &line);
line.SetTimestampNs(start_gpu_ns);
CreateXEvent(event, plane, &line);
events_types_per_line[line_id].emplace(event.type);
}
device_plane->ForEachLine([&](tensorflow::profiler::XLineBuilder line) {

View File

@ -271,9 +271,9 @@ TEST_F(DeviceTracerTest, TraceToXSpace) {
XSpace space;
TF_ASSERT_OK(tracer->CollectData(&space));
// At least one gpu plane and one host plane for launching events.
const XPlane* host_plane = FindPlaneWithName(space, kHostThreads);
const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName);
ASSERT_NE(host_plane, nullptr);
EXPECT_EQ(host_plane->id(), kHostPlaneId);
EXPECT_EQ(host_plane->id(), kCuptiDriverApiPlaneId);
const XPlane* device_plane =
FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0));

View File

@ -159,6 +159,8 @@ cc_library(
":timespan",
":xplane_builder",
":xplane_schema",
":xplane_visitor",
"//tensorflow/core:platform_base",
"//tensorflow/core/platform:types",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/container:flat_hash_map",
@ -170,10 +172,15 @@ tf_cc_test(
name = "xplane_utils_test",
srcs = ["xplane_utils_test.cc"],
deps = [
":time_utils",
":xplane_builder",
":xplane_utils",
":xplane_visitor",
"//tensorflow/core:platform_base",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/container:flat_hash_map",
],
)

View File

@ -14,6 +14,8 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/time_utils.h"
namespace tensorflow {
namespace profiler {
@ -89,5 +91,15 @@ XEventBuilder XLineBuilder::AddEvent(const XEventMetadata& metadata) {
return XEventBuilder(line_, event);
}
void XLineBuilder::SetTimestampNsAndAdjustEventOffsets(int64 timestamp_ns) {
int64 offset_ps = NanosToPicos(line_->timestamp_ns() - timestamp_ns);
line_->set_timestamp_ns(timestamp_ns);
if (offset_ps) {
for (auto& event : *line_->mutable_events()) {
event.set_offset_ps(event.offset_ps() + offset_ps);
}
}
}
} // namespace profiler
} // namespace tensorflow

View File

@ -54,8 +54,18 @@ class XStatsBuilder {
}
void AddStat(const XStatMetadata& metadata, const XStat& stat) {
DCHECK_EQ(metadata.id(), stat.metadata_id());
*stats_owner_->add_stats() = stat;
XStat* new_stat = stats_owner_->add_stats();
*new_stat = stat;
new_stat->set_metadata_id(metadata.id());
}
XStat* FindOrAddMutableStat(int64 metadata_id) {
for (auto& stat : *stats_owner_->mutable_stats()) {
if (stat.metadata_id() == metadata_id) {
return &stat;
}
}
return stats_owner_->add_stats();
}
void ParseAndAddStatValue(const XStatMetadata& metadata,
@ -132,15 +142,23 @@ class XLineBuilder {
int64 Id() { return line_->id(); }
void SetId(int64 id) { line_->set_id(id); }
int64 NumEvents() { return line_->events_size(); }
void SetName(absl::string_view name) { line_->set_name(string(name)); }
void SetNameIfEmpty(absl::string_view name) {
if (line_->name().empty()) SetName(name);
}
int64 TimestampNs() { return line_->timestamp_ns(); }
// This will set the line start timestamp.
// WARNING: The offset_ps of existing events will not be altered.
void SetTimestampNs(int64 timestamp_ns) {
line_->set_timestamp_ns(timestamp_ns);
}
// This will set the line start timestamp to specific time, and adjust
// the offset_ps of all existing events.
void SetTimestampNsAndAdjustEventOffsets(int64 timestamp_ns);
void SetDurationPs(int64 duration_ps) { line_->set_duration_ps(duration_ps); }
@ -148,6 +166,12 @@ class XLineBuilder {
line_->mutable_events()->Reserve(num_events);
}
void SetDisplayNameIfEmpty(absl::string_view display_name) {
if (line_->display_name().empty()) {
line_->set_display_name(std::string(display_name));
}
}
XEventBuilder AddEvent(const XEventMetadata& metadata);
private:

View File

@ -24,8 +24,11 @@ namespace profiler {
const absl::string_view kHostThreads = "/host:CPU";
const absl::string_view kGpuPlanePrefix = "/device:GPU:";
const absl::string_view kCuptiDriverApiPlaneName = "/host:CUPTI";
const int32 kHostPlaneId = 49;
const int32 kGpuPlaneBaseId = 0;
const int32 kCuptiDriverApiPlaneId = 50;
namespace {

View File

@ -29,11 +29,16 @@ namespace profiler {
ABSL_CONST_INIT extern const absl::string_view kHostThreads;
// Name prefix of XPlane that contains GPU events.
ABSL_CONST_INIT extern const absl::string_view kGpuPlanePrefix;
// Name of XPlane that contains CUPTI driver API generated events.
ABSL_CONST_INIT extern const absl::string_view kCuptiDriverApiPlaneName;
// Id of XPlane that contains TraceMe events.
ABSL_CONST_INIT extern const int32 kHostPlaneId;
// Ids prefix of XPlane that contains GPU events.
ABSL_CONST_INIT extern const int32 kGpuPlaneBaseId;
// Id of XPlane that contains CUPTI driver API generated events which happens
// on CPU host threads, e.g. Kernel launch.
ABSL_CONST_INIT extern const int32 kCuptiDriverApiPlaneId;
// Interesting event types (i.e., TraceMe names).
enum HostEventType {

View File

@ -16,7 +16,11 @@ limitations under the License.
#include "absl/container/flat_hash_map.h"
#include "absl/strings/match.h"
#include "tensorflow/core/platform/env_time.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/timespan.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
namespace tensorflow {
namespace profiler {
@ -199,5 +203,65 @@ void SortXPlane(XPlane* plane) {
plane->mutable_lines()->pointer_end(), XLinesComparator());
}
void MergePlanes(const XPlane& src_plane, XPlane* dst_plane) {
XPlaneVisitor src(&src_plane);
XPlaneBuilder dst(dst_plane);
RemoveEmptyLines(dst_plane);
src.ForEachStat([&](const tensorflow::profiler::XStatVisitor& stat) {
XStatMetadata* stat_metadata = dst.GetOrCreateStatMetadata(stat.Name());
XStat* new_stat = dst.FindOrAddMutableStat(stat_metadata->id());
// Add or override the existing stat value except the metadata id.
*new_stat = stat.RawStat();
new_stat->set_metadata_id(stat_metadata->id());
});
src.ForEachLine([&](const tensorflow::profiler::XLineVisitor& line) {
XLineBuilder dst_line = dst.GetOrCreateLine(line.Id());
int64 time_offset_ps = 0LL;
if (dst_line.NumEvents() == 0) {
// Since we RemoveEmptyLines above, this could only mean that current
// line only exist in src plane.
dst_line.SetTimestampNs(line.TimestampNs());
dst_line.SetName(line.Name());
dst_line.SetDisplayNameIfEmpty(line.DisplayName());
} else {
if (line.TimestampNs() <= dst_line.TimestampNs()) {
dst_line.SetTimestampNsAndAdjustEventOffsets(line.TimestampNs());
} else {
time_offset_ps = (line.TimestampNs() - dst_line.TimestampNs()) *
EnvTime::kNanosToPicos;
}
dst_line.SetNameIfEmpty(line.Name());
if (!line.DisplayName().empty()) {
dst_line.SetDisplayNameIfEmpty(line.DisplayName());
}
}
line.ForEachEvent([&](const tensorflow::profiler::XEventVisitor& event) {
const XEventMetadata* src_event_metadata = event.metadata();
XEventMetadata* dst_event_metadata =
dst.GetOrCreateEventMetadata(event.Name());
if (dst_event_metadata->display_name().empty() &&
!src_event_metadata->display_name().empty()) {
dst_event_metadata->set_display_name(
src_event_metadata->display_name());
}
if (dst_event_metadata->metadata().empty() &&
!src_event_metadata->metadata().empty()) {
dst_event_metadata->set_metadata(src_event_metadata->metadata());
}
XEventBuilder dst_event = dst_line.AddEvent(*dst_event_metadata);
dst_event.SetOffsetPs(event.OffsetPs() + time_offset_ps);
dst_event.SetDurationPs(event.DurationPs());
if (event.NumOccurrences()) {
dst_event.SetNumOccurrences(event.NumOccurrences());
}
event.ForEachStat([&](const tensorflow::profiler::XStatVisitor& stat) {
dst_event.AddStat(*dst.GetOrCreateStatMetadata(stat.Name()),
stat.RawStat());
});
});
});
}
} // namespace profiler
} // namespace tensorflow

View File

@ -71,6 +71,12 @@ XPlane* FindOrAddMutablePlaneWithName(XSpace* space, absl::string_view name);
// (descending) so nested events are sorted from outer to innermost.
void SortXPlane(XPlane* plane);
// Merge Xplane src_plane into Xplane dst_plane, both plane level stats, lines,
// events and event level stats are merged; If src_plane and dst_plane both have
// the same line, which have different start timestamps, we will normalize the
// events offset timestamp correspondingly.
void MergePlanes(const XPlane& src_plane, XPlane* dst_plane);
} // namespace profiler
} // namespace tensorflow

View File

@ -15,8 +15,12 @@ limitations under the License.
#include "tensorflow/core/profiler/utils/xplane_utils.h"
#include "absl/container/flat_hash_map.h"
#include "tensorflow/core/platform/env_time.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
namespace tensorflow {
namespace profiler {
@ -134,6 +138,168 @@ TEST(XPlaneUtilsTest, SortXPlaneTest) {
EXPECT_EQ(plane.lines(0).events(3).duration_ps(), 100);
}
namespace {
XLineBuilder CreateXLine(XPlaneBuilder* plane, absl::string_view name,
absl::string_view display, int64 id,
int64 timestamp_ns) {
XLineBuilder line = plane->GetOrCreateLine(id);
line.SetName(name);
line.SetTimestampNs(timestamp_ns);
line.SetDisplayNameIfEmpty(display);
return line;
}
XEventBuilder CreateXEvent(XPlaneBuilder* plane, XLineBuilder line,
absl::string_view event_name,
absl::optional<absl::string_view> display,
int64 offset_ns, int64 duration_ns) {
XEventMetadata* event_metadata = plane->GetOrCreateEventMetadata(event_name);
if (display) event_metadata->set_display_name(std::string(*display));
XEventBuilder event = line.AddEvent(*event_metadata);
event.SetOffsetNs(offset_ns);
event.SetDurationNs(duration_ns);
return event;
}
template <typename T, typename V>
void CreateXStats(XPlaneBuilder* plane, T* stats_owner,
absl::string_view stats_name, V stats_value) {
stats_owner->AddStatValue(*plane->GetOrCreateStatMetadata(stats_name),
stats_value);
}
void CheckXLine(const XLine& line, absl::string_view name,
absl::string_view display, int64 start_time_ns,
int64 events_size) {
EXPECT_EQ(line.name(), name);
EXPECT_EQ(line.display_name(), display);
EXPECT_EQ(line.timestamp_ns(), start_time_ns);
EXPECT_EQ(line.events_size(), events_size);
}
void CheckXEvent(const XEvent& event, const XPlane& plane,
absl::string_view name, absl::string_view display,
int64 offset_ns, int64 duration_ns, int64 stats_size) {
const XEventMetadata& event_metadata =
plane.event_metadata().at(event.metadata_id());
EXPECT_EQ(event_metadata.name(), name);
EXPECT_EQ(event_metadata.display_name(), display);
EXPECT_EQ(event.offset_ps(), offset_ns * EnvTime::kNanosToPicos);
EXPECT_EQ(event.duration_ps(), duration_ns * EnvTime::kNanosToPicos);
EXPECT_EQ(event.stats_size(), stats_size);
}
} // namespace
TEST(XPlaneUtilsTest, MergeXPlaneTest) {
XPlane src_plane, dst_plane;
constexpr int64 kLineIdOnlyInSrcPlane = 1LL;
constexpr int64 kLineIdOnlyInDstPlane = 2LL;
constexpr int64 kLineIdInBothPlanes = 3LL; // src start ts < dst start ts
constexpr int64 kLineIdInBothPlanes2 = 4LL; // src start ts > dst start ts
{ // Populate the source plane.
XPlaneBuilder src(&src_plane);
CreateXStats(&src, &src, "plane_stat1", 1); // only in source.
CreateXStats(&src, &src, "plane_stat3", 3.0); // shared by source/dest.
auto l1 = CreateXLine(&src, "l1", "d1", kLineIdOnlyInSrcPlane, 100);
auto e1 = CreateXEvent(&src, l1, "event1", "display1", 1, 2);
CreateXStats(&src, &e1, "event_stat1", 2.0);
auto e2 = CreateXEvent(&src, l1, "event2", absl::nullopt, 3, 4);
CreateXStats(&src, &e2, "event_stat2", 3);
auto l2 = CreateXLine(&src, "l2", "d2", kLineIdInBothPlanes, 200);
auto e3 = CreateXEvent(&src, l2, "event3", absl::nullopt, 5, 7);
CreateXStats(&src, &e3, "event_stat3", 2.0);
auto e4 = CreateXEvent(&src, l2, "event4", absl::nullopt, 6, 8);
CreateXStats(&src, &e4, "event_stat4", 3);
CreateXStats(&src, &e4, "event_stat5", 3);
auto l5 = CreateXLine(&src, "l5", "d5", kLineIdInBothPlanes2, 700);
CreateXEvent(&src, l5, "event51", absl::nullopt, 9, 10);
CreateXEvent(&src, l5, "event52", absl::nullopt, 11, 12);
}
{ // Populate the destination plane.
XPlaneBuilder dst(&dst_plane);
CreateXStats(&dst, &dst, "plane_stat2", 2); // only in dest
CreateXStats(&dst, &dst, "plane_stat3", 4); // shared but different.
auto l3 = CreateXLine(&dst, "l3", "d3", kLineIdOnlyInDstPlane, 300);
auto e5 = CreateXEvent(&dst, l3, "event5", absl::nullopt, 11, 2);
CreateXStats(&dst, &e5, "event_stat6", 2.0);
auto e6 = CreateXEvent(&dst, l3, "event6", absl::nullopt, 13, 4);
CreateXStats(&dst, &e6, "event_stat7", 3);
auto l2 = CreateXLine(&dst, "l4", "d4", kLineIdInBothPlanes, 400);
auto e7 = CreateXEvent(&dst, l2, "event7", absl::nullopt, 15, 7);
CreateXStats(&dst, &e7, "event_stat8", 2.0);
auto e8 = CreateXEvent(&dst, l2, "event8", "display8", 16, 8);
CreateXStats(&dst, &e8, "event_stat9", 3);
auto l6 = CreateXLine(&dst, "l6", "d6", kLineIdInBothPlanes2, 300);
CreateXEvent(&dst, l6, "event61", absl::nullopt, 21, 10);
CreateXEvent(&dst, l6, "event62", absl::nullopt, 22, 12);
}
MergePlanes(src_plane, &dst_plane);
XPlaneVisitor plane(&dst_plane);
EXPECT_EQ(dst_plane.lines_size(), 4);
// Check plane level stats,
EXPECT_EQ(dst_plane.stats_size(), 3);
absl::flat_hash_map<absl::string_view, absl::string_view> plane_stats;
plane.ForEachStat([&](const tensorflow::profiler::XStatVisitor& stat) {
if (stat.Name() == "plane_stat1") {
EXPECT_EQ(stat.IntValue(), 1);
} else if (stat.Name() == "plane_stat2") {
EXPECT_EQ(stat.IntValue(), 2);
} else if (stat.Name() == "plane_stat3") {
// XStat in src_plane overrides the counter-part in dst_plane.
EXPECT_EQ(stat.DoubleValue(), 3.0);
} else {
EXPECT_TRUE(false);
}
});
// 3 plane level stats, 9 event level stats.
EXPECT_EQ(dst_plane.stat_metadata_size(), 12);
{ // Old lines are untouched.
const XLine& line = dst_plane.lines(0);
CheckXLine(line, "l3", "d3", 300, 2);
CheckXEvent(line.events(0), dst_plane, "event5", "", 11, 2, 1);
CheckXEvent(line.events(1), dst_plane, "event6", "", 13, 4, 1);
}
{ // Lines with the same id are merged.
// src plane start timestamp > dst plane start timestamp
const XLine& line = dst_plane.lines(1);
// NOTE: use minimum start time of src/dst.
CheckXLine(line, "l4", "d4", 200, 4);
CheckXEvent(line.events(0), dst_plane, "event7", "", 215, 7, 1);
CheckXEvent(line.events(1), dst_plane, "event8", "display8", 216, 8, 1);
CheckXEvent(line.events(2), dst_plane, "event3", "", 5, 7, 1);
CheckXEvent(line.events(3), dst_plane, "event4", "", 6, 8, 2);
}
{ // Lines with the same id are merged.
// src plane start timestamp < dst plane start timestamp
const XLine& line = dst_plane.lines(2);
CheckXLine(line, "l6", "d6", 300, 4);
CheckXEvent(line.events(0), dst_plane, "event61", "", 21, 10, 0);
CheckXEvent(line.events(1), dst_plane, "event62", "", 22, 12, 0);
CheckXEvent(line.events(2), dst_plane, "event51", "", 409, 10, 0);
CheckXEvent(line.events(3), dst_plane, "event52", "", 411, 12, 0);
}
{ // Lines only in source are "copied".
const XLine& line = dst_plane.lines(3);
CheckXLine(line, "l1", "d1", 100, 2);
CheckXEvent(line.events(0), dst_plane, "event1", "display1", 1, 2, 1);
CheckXEvent(line.events(1), dst_plane, "event2", "", 3, 4, 1);
}
}
} // namespace
} // namespace profiler
} // namespace tensorflow

View File

@ -132,6 +132,8 @@ class XEventVisitor : public XStatsOwner<XEvent> {
return GetTimespan() < other.GetTimespan();
}
const XEventMetadata* metadata() const { return metadata_; }
private:
Timespan GetTimespan() const { return Timespan(TimestampPs(), DurationPs()); }