Remove XPlane to trace viewer PID mapping from XPlane schema
PiperOrigin-RevId: 316764981 Change-Id: If8e23e0136493dc721fc41095fd9425b8226b060
This commit is contained in:
parent
aea06cde55
commit
dd789e7783
@ -377,7 +377,9 @@ cc_library(
|
||||
"//tensorflow/core/profiler/protobuf:trace_events_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:tf_xplane_visitor",
|
||||
"//tensorflow/core/profiler/utils:trace_utils",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
"//tensorflow/core/profiler/utils:xplane_utils",
|
||||
"//tensorflow/core/profiler/utils:xplane_visitor",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
@ -390,17 +392,14 @@ tf_cc_test(
|
||||
srcs = ["xplane_to_trace_events_test.cc"],
|
||||
deps = [
|
||||
":xplane_to_trace_events",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/profiler/protobuf:trace_events_proto_cc",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:trace_utils",
|
||||
"//tensorflow/core/profiler/utils:xplane_builder",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
"//tensorflow/core/profiler/utils:xplane_utils",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -28,7 +28,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
||||
#include "tensorflow/core/profiler/utils/trace_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_visitor.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -36,25 +38,63 @@ namespace profiler {
|
||||
|
||||
namespace {
|
||||
|
||||
Device BuildDeviceAndResource(const XPlaneVisitor& plane) {
|
||||
Device device;
|
||||
device.set_name(std::string(plane.Name()));
|
||||
device.set_device_id(plane.Id());
|
||||
void BuildDeviceAndResources(uint32 device_id, const XPlaneVisitor& plane,
|
||||
Device* device) {
|
||||
device->set_name(std::string(plane.Name()));
|
||||
device->set_device_id(device_id);
|
||||
|
||||
bool sort_by_ordinal = (plane.Name() == kHostThreadsPlaneName);
|
||||
bool sort_by_ordinal = (device_id == kHostThreadsDeviceId);
|
||||
int ordinal = 0;
|
||||
plane.ForEachLine([&](const XLineVisitor& line) {
|
||||
Resource resource;
|
||||
resource.set_resource_id(line.Id());
|
||||
uint32 resource_id = line.DisplayId();
|
||||
Resource& resource = (*device->mutable_resources())[resource_id];
|
||||
resource.set_resource_id(resource_id);
|
||||
resource.set_name(std::string(line.DisplayName()));
|
||||
if (sort_by_ordinal) {
|
||||
// When sort_index is absent (i.e. 0), resource id will be used.
|
||||
// Therefore sort_index starts with 1.
|
||||
resource.set_sort_index(++ordinal);
|
||||
}
|
||||
(*device.mutable_resources())[line.Id()] = resource;
|
||||
});
|
||||
return device;
|
||||
}
|
||||
|
||||
void ConvertXPlaneToTraceEvents(uint32 device_id, const XPlaneVisitor& xplane,
|
||||
Trace* trace) {
|
||||
// Convert devices and resources.
|
||||
BuildDeviceAndResources(device_id, xplane,
|
||||
&(*trace->mutable_devices())[device_id]);
|
||||
|
||||
// Convert events.
|
||||
xplane.ForEachLine([device_id, trace](const XLineVisitor& xline) {
|
||||
uint32 resource_id = xline.DisplayId();
|
||||
xline.ForEachEvent(
|
||||
[device_id, resource_id, trace](const XEventVisitor& xevent) {
|
||||
int64 event_type =
|
||||
xevent.Type().value_or(HostEventType::kUnknownHostEventType);
|
||||
if (event_type == HostEventType::kMemoryAllocation ||
|
||||
event_type == HostEventType::kMemoryDeallocation) {
|
||||
return;
|
||||
}
|
||||
auto* event = trace->add_trace_events();
|
||||
auto& args = *event->mutable_args();
|
||||
event->set_device_id(device_id);
|
||||
event->set_resource_id(resource_id);
|
||||
if (xevent.HasDisplayName()) {
|
||||
event->set_name(std::string(xevent.DisplayName()));
|
||||
args["long_name"] = std::string(xevent.Name());
|
||||
} else {
|
||||
event->set_name(std::string(xevent.Name()));
|
||||
}
|
||||
event->set_timestamp_ps(xevent.TimestampPs());
|
||||
event->set_duration_ps(xevent.DurationPs());
|
||||
|
||||
xevent.ForEachStat([&](const XStatVisitor& stat) {
|
||||
if (stat.ValueCase() == XStat::VALUE_NOT_SET) return;
|
||||
if (IsInternalStat(stat.Type())) return;
|
||||
args[std::string(stat.Name())] = stat.ToString();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -81,44 +121,18 @@ void MaybeDropEventsForTraceViewer(Trace* trace, uint32 limit) {
|
||||
}
|
||||
|
||||
void ConvertXSpaceToTraceEvents(const XSpace& xspace, Trace* trace) {
|
||||
auto* trace_devices = trace->mutable_devices();
|
||||
const XPlane* host_plane = FindPlaneWithName(xspace, kHostThreadsPlaneName);
|
||||
if (host_plane != nullptr) {
|
||||
XPlaneVisitor xplane = CreateTfXPlaneVisitor(host_plane);
|
||||
ConvertXPlaneToTraceEvents(kHostThreadsDeviceId, xplane, trace);
|
||||
}
|
||||
|
||||
for (const auto& raw_plane : xspace.planes()) {
|
||||
XPlaneVisitor xplane = CreateTfXPlaneVisitor(&raw_plane);
|
||||
// Convert devices and resources.
|
||||
int64 device_id = xplane.Id();
|
||||
(*trace_devices)[device_id] = BuildDeviceAndResource(xplane);
|
||||
|
||||
// Convert events.
|
||||
xplane.ForEachLine([&](const XLineVisitor& xline) {
|
||||
int64 resource_id = xline.Id(); // Either thread id or CUDA stream id.
|
||||
xline.ForEachEvent([&](const XEventVisitor& xevent) {
|
||||
int64 event_type =
|
||||
xevent.Type().value_or(HostEventType::kUnknownHostEventType);
|
||||
if (event_type == HostEventType::kMemoryAllocation ||
|
||||
event_type == HostEventType::kMemoryDeallocation) {
|
||||
return;
|
||||
}
|
||||
auto* event = trace->add_trace_events();
|
||||
auto& args = *event->mutable_args();
|
||||
event->set_device_id(device_id);
|
||||
event->set_resource_id(resource_id);
|
||||
if (xevent.HasDisplayName()) {
|
||||
event->set_name(std::string(xevent.DisplayName()));
|
||||
args["long_name"] = std::string(xevent.Name());
|
||||
} else {
|
||||
event->set_name(std::string(xevent.Name()));
|
||||
}
|
||||
event->set_timestamp_ps(xevent.TimestampPs());
|
||||
event->set_duration_ps(xevent.DurationPs());
|
||||
|
||||
xevent.ForEachStat([&](const XStatVisitor& stat) {
|
||||
if (stat.ValueCase() == XStat::VALUE_NOT_SET) return;
|
||||
if (IsInternalStat(stat.Type())) return;
|
||||
args[std::string(stat.Name())] = stat.ToString();
|
||||
});
|
||||
});
|
||||
});
|
||||
const std::vector<const XPlane*> device_planes =
|
||||
FindPlanesWithPrefix(xspace, kGpuPlanePrefix);
|
||||
for (const XPlane* device_plane : device_planes) {
|
||||
XPlaneVisitor xplane = CreateTfXPlaneVisitor(device_plane);
|
||||
uint32 device_id = kFirstDeviceId + xplane.Id();
|
||||
ConvertXPlaneToTraceEvents(device_id, xplane, trace);
|
||||
}
|
||||
|
||||
// Trace viewer (non-streaming) has scalability issues, we need to drop
|
||||
|
@ -18,7 +18,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/profiler/protobuf/trace_events.pb.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/trace_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_builder.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
@ -26,10 +28,7 @@ namespace {
|
||||
|
||||
void CreateXSpace(XSpace* space) {
|
||||
XPlaneBuilder host_plane(space->add_planes());
|
||||
XPlaneBuilder device_plane(space->add_planes());
|
||||
|
||||
host_plane.SetName("cpu");
|
||||
host_plane.SetId(0);
|
||||
host_plane.SetName(kHostThreadsPlaneName);
|
||||
XLineBuilder thread1 = host_plane.GetOrCreateLine(10);
|
||||
thread1.SetName("thread1");
|
||||
XEventBuilder event1 =
|
||||
@ -47,8 +46,9 @@ void CreateXSpace(XSpace* space) {
|
||||
event2.ParseAndAddStatValue(*host_plane.GetOrCreateStatMetadata("tf_op"),
|
||||
"Conv2D");
|
||||
|
||||
device_plane.SetName("gpu:0");
|
||||
device_plane.SetId(1);
|
||||
XPlaneBuilder device_plane(space->add_planes());
|
||||
device_plane.SetName(GpuPlaneName(0));
|
||||
device_plane.SetId(0);
|
||||
XLineBuilder stream1 = device_plane.GetOrCreateLine(30);
|
||||
stream1.SetName("gpu stream 1");
|
||||
XEventBuilder event3 =
|
||||
@ -67,8 +67,8 @@ TEST(ConvertXPlaneToTraceEvents, Convert) {
|
||||
ConvertXSpaceToTraceEvents(xspace, &trace);
|
||||
|
||||
ASSERT_EQ(trace.devices_size(), 2);
|
||||
EXPECT_EQ(trace.devices().at(0).resources_size(), 2);
|
||||
EXPECT_EQ(trace.devices().at(1).resources_size(), 1);
|
||||
EXPECT_EQ(trace.devices().at(kHostThreadsDeviceId).resources_size(), 2);
|
||||
EXPECT_EQ(trace.devices().at(kFirstDeviceId).resources_size(), 1);
|
||||
EXPECT_EQ(trace.trace_events_size(), 3);
|
||||
}
|
||||
|
||||
|
@ -147,7 +147,6 @@ Status HostTracer::CollectData(XSpace* space) {
|
||||
}
|
||||
MakeCompleteEvents(&events_);
|
||||
XPlane* plane = FindOrAddMutablePlaneWithName(space, kHostThreadsPlaneName);
|
||||
plane->set_id(kHostPlaneId);
|
||||
ConvertCompleteEventsToXPlane(start_timestamp_ns_, events_, plane);
|
||||
events_.clear();
|
||||
return Status::OK();
|
||||
|
@ -66,7 +66,6 @@ class MetadataCollector : public ProfilerInterface {
|
||||
Status CollectData(XSpace* space) override {
|
||||
if (!debug_info_.empty()) {
|
||||
XPlane* plane = FindOrAddMutablePlaneWithName(space, kMetadataPlaneName);
|
||||
plane->set_id(kMetadataPlaneId);
|
||||
XPlaneBuilder xplane(plane);
|
||||
const XStatMetadata& hlo_proto_stat =
|
||||
*xplane.GetOrCreateStatMetadata(kHloProto);
|
||||
|
@ -36,17 +36,21 @@ tf_cuda_library(
|
||||
deps = [
|
||||
":cupti_utils",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/profiler/internal:annotation_stack",
|
||||
"//tensorflow/core/profiler/internal:parse_annotation",
|
||||
"//tensorflow/core/profiler/internal:profiler_factory",
|
||||
"//tensorflow/core/profiler/internal:profiler_interface",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||
"//tensorflow/core/profiler/utils:xplane_builder",
|
||||
"//tensorflow/core/profiler/utils:xplane_schema",
|
||||
"//tensorflow/core/profiler/utils:xplane_utils",
|
||||
"@com_google_absl//absl/container:fixed_array",
|
||||
"@com_google_absl//absl/container:flat_hash_map",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
],
|
||||
alwayslink = 1,
|
||||
@ -145,4 +149,5 @@ tf_cuda_library(
|
||||
":cupti_wrapper",
|
||||
] + tf_additional_cupti_utils_cuda_deps(),
|
||||
visibility = ["//visibility:public"],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
@ -41,6 +41,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/profiler/internal/parse_annotation.h"
|
||||
#include "tensorflow/core/profiler/internal/profiler_factory.h"
|
||||
#include "tensorflow/core/profiler/internal/profiler_interface.h"
|
||||
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_builder.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
||||
@ -225,11 +226,10 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
|
||||
uint64 end_gpu_ns = CuptiTracer::GetTimestamp();
|
||||
XPlaneBuilder host_plane(
|
||||
FindOrAddMutablePlaneWithName(space, kCuptiDriverApiPlaneName));
|
||||
host_plane.SetId(kCuptiDriverApiPlaneId);
|
||||
for (int device_ordinal = 0; device_ordinal < num_gpus_; ++device_ordinal) {
|
||||
std::string name = GpuPlaneName(device_ordinal);
|
||||
XPlaneBuilder device_plane(FindOrAddMutablePlaneWithName(space, name));
|
||||
device_plane.SetId(kGpuPlaneBaseId + device_ordinal);
|
||||
device_plane.SetId(device_ordinal);
|
||||
per_device_collector_[device_ordinal].Flush(start_gpu_ns_, end_gpu_ns,
|
||||
&device_plane, &host_plane);
|
||||
per_device_collector_[device_ordinal].GetDeviceCapabilities(
|
||||
|
@ -263,12 +263,10 @@ TEST_F(DeviceTracerTest, TraceToXSpace) {
|
||||
// At least one gpu plane and one host plane for launching events.
|
||||
const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName);
|
||||
ASSERT_NE(host_plane, nullptr);
|
||||
EXPECT_EQ(host_plane->id(), kCuptiDriverApiPlaneId);
|
||||
|
||||
const XPlane* device_plane =
|
||||
FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0));
|
||||
ASSERT_NE(device_plane, nullptr); // Check if device plane is serialized.
|
||||
EXPECT_EQ(device_plane->id(), kGpuPlaneBaseId);
|
||||
// one for MemcpyH2D, one for MemcpyD2H, two for Matmul (one from Eigen, one
|
||||
// from cudnn).
|
||||
EXPECT_EQ(device_plane->event_metadata_size(), 4);
|
||||
|
@ -145,6 +145,9 @@ cc_library(
|
||||
cc_library(
|
||||
name = "trace_utils",
|
||||
hdrs = ["trace_utils.h"],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
|
@ -16,11 +16,20 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_PROFILER_UTILS_TRACE_UTILS_H_
|
||||
#define TENSORFLOW_CORE_PROFILER_UTILS_TRACE_UTILS_H_
|
||||
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace profiler {
|
||||
|
||||
// The thread id used for step information in GPU trace viewer.
|
||||
// First derived stream/thread id.
|
||||
// Constants used as trace_viewer PID (device_id in trace_events.proto).
|
||||
// PID 0 is unused.
|
||||
// Support up to 500 accelerator devices.
|
||||
constexpr uint32 kFirstDeviceId = 1;
|
||||
constexpr uint32 kLastDeviceId = 500;
|
||||
// Host threads are shown as a single fake device.
|
||||
constexpr uint32 kHostThreadsDeviceId = kLastDeviceId + 1;
|
||||
|
||||
// Constants used as trace_viewer TID (resource_id in trace_events.proto).
|
||||
constexpr int kThreadIdDerivedMin = 0xdeadbeef;
|
||||
constexpr int kThreadIdStepInfo = kThreadIdDerivedMin;
|
||||
constexpr int kThreadIdKernelLaunch = kThreadIdDerivedMin + 1;
|
||||
@ -29,8 +38,6 @@ constexpr int kThreadIdTfOp = kThreadIdDerivedMin + 3;
|
||||
constexpr int kThreadIdHloModule = kThreadIdDerivedMin + 4;
|
||||
constexpr int kThreadIdHloOp = kThreadIdDerivedMin + 5;
|
||||
constexpr int kThreadIdOverhead = kThreadIdDerivedMin + 6;
|
||||
|
||||
// Last derived stream/thread id.
|
||||
constexpr int kThreadIdDerivedMax = kThreadIdOverhead;
|
||||
|
||||
static inline bool IsDerivedThreadId(int thread_id) {
|
||||
|
@ -39,15 +39,6 @@ const absl::string_view kXlaModuleLineName = "XLA Modules";
|
||||
const absl::string_view kXlaOpLineName = "XLA Ops";
|
||||
const absl::string_view kKernelLaunchLineName = "Launch Stats";
|
||||
|
||||
const int32 kHostPlaneId = 49;
|
||||
const int32 kGpuPlaneBaseId = 0;
|
||||
const int32 kCuptiDriverApiPlaneId = 50;
|
||||
const int32 kMetadataPlaneId = 99;
|
||||
const int32 kTFStreamzPlaneId = 98;
|
||||
|
||||
const int32 kThreadGroupMinPlaneId = kCuptiDriverApiPlaneId + 1;
|
||||
const int32 kThreadGroupMaxPlaneId = kTFStreamzPlaneId - 1;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kNumHostEventTypes =
|
||||
|
@ -45,21 +45,6 @@ ABSL_CONST_INIT extern const absl::string_view kXlaModuleLineName;
|
||||
ABSL_CONST_INIT extern const absl::string_view kXlaOpLineName;
|
||||
ABSL_CONST_INIT extern const absl::string_view kKernelLaunchLineName;
|
||||
|
||||
// Id of XPlane that contains TraceMe events.
|
||||
ABSL_CONST_INIT extern const int32 kHostPlaneId;
|
||||
// Ids prefix of XPlane that contains GPU events.
|
||||
ABSL_CONST_INIT extern const int32 kGpuPlaneBaseId;
|
||||
// Id of XPlane that contains CUPTI driver API generated events which happens
|
||||
// on CPU host threads, e.g. Kernel launch.
|
||||
ABSL_CONST_INIT extern const int32 kCuptiDriverApiPlaneId;
|
||||
// Id of XPlane that contains profile metadata such as XLA debug info.
|
||||
ABSL_CONST_INIT extern const int32 kMetadataPlaneId;
|
||||
// Id of XPlane that contains kpi related metrics.
|
||||
ABSL_CONST_INIT extern const int32 kTFStreamzPlaneId;
|
||||
|
||||
ABSL_CONST_INIT extern const int32 kThreadGroupMinPlaneId;
|
||||
ABSL_CONST_INIT extern const int32 kThreadGroupMaxPlaneId;
|
||||
|
||||
// Interesting event types (i.e., TraceMe names).
|
||||
enum HostEventType {
|
||||
kFirstHostEventType = 0,
|
||||
|
Loading…
Reference in New Issue
Block a user