From dd789e77833942bf7ffccb9e2e2e93e1c6dff436 Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Tue, 16 Jun 2020 15:01:16 -0700 Subject: [PATCH] Remove XPlane to trace viewer PID mapping from XPlane schema PiperOrigin-RevId: 316764981 Change-Id: If8e23e0136493dc721fc41095fd9425b8226b060 --- tensorflow/core/profiler/convert/BUILD | 7 +- .../convert/xplane_to_trace_events.cc | 106 ++++++++++-------- .../convert/xplane_to_trace_events_test.cc | 16 +-- .../core/profiler/internal/cpu/host_tracer.cc | 1 - .../internal/cpu/metadata_collector.cc | 1 - tensorflow/core/profiler/internal/gpu/BUILD | 7 +- .../profiler/internal/gpu/device_tracer.cc | 4 +- .../internal/gpu/device_tracer_test.cc | 2 - tensorflow/core/profiler/utils/BUILD | 3 + tensorflow/core/profiler/utils/trace_utils.h | 15 ++- .../core/profiler/utils/xplane_schema.cc | 9 -- .../core/profiler/utils/xplane_schema.h | 15 --- 12 files changed, 93 insertions(+), 93 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 5f287a14267..abf0176bf6f 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -377,7 +377,9 @@ cc_library( "//tensorflow/core/profiler/protobuf:trace_events_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:tf_xplane_visitor", + "//tensorflow/core/profiler/utils:trace_utils", "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_utils", "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", @@ -390,17 +392,14 @@ tf_cc_test( srcs = ["xplane_to_trace_events_test.cc"], deps = [ ":xplane_to_trace_events", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", "//tensorflow/core/profiler/protobuf:trace_events_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "//tensorflow/core/profiler/utils:trace_utils", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", - "//tensorflow/core/profiler/utils:xplane_utils", ], ) diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc index f4a0145d8f6..882f50e6080 100644 --- a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc @@ -28,7 +28,9 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/trace_events.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" +#include "tensorflow/core/profiler/utils/trace_utils.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { @@ -36,25 +38,63 @@ namespace profiler { namespace { -Device BuildDeviceAndResource(const XPlaneVisitor& plane) { - Device device; - device.set_name(std::string(plane.Name())); - device.set_device_id(plane.Id()); +void BuildDeviceAndResources(uint32 device_id, const XPlaneVisitor& plane, + Device* device) { + device->set_name(std::string(plane.Name())); + device->set_device_id(device_id); - bool sort_by_ordinal = (plane.Name() == kHostThreadsPlaneName); + bool sort_by_ordinal = (device_id == kHostThreadsDeviceId); int ordinal = 0; plane.ForEachLine([&](const XLineVisitor& line) { - Resource resource; - resource.set_resource_id(line.Id()); + uint32 resource_id = line.DisplayId(); + Resource& resource = (*device->mutable_resources())[resource_id]; + resource.set_resource_id(resource_id); resource.set_name(std::string(line.DisplayName())); if (sort_by_ordinal) { // When sort_index is absent (i.e. 0), resource id will be used. // Therefore sort_index starts with 1. resource.set_sort_index(++ordinal); } - (*device.mutable_resources())[line.Id()] = resource; }); - return device; +} + +void ConvertXPlaneToTraceEvents(uint32 device_id, const XPlaneVisitor& xplane, + Trace* trace) { + // Convert devices and resources. + BuildDeviceAndResources(device_id, xplane, + &(*trace->mutable_devices())[device_id]); + + // Convert events. + xplane.ForEachLine([device_id, trace](const XLineVisitor& xline) { + uint32 resource_id = xline.DisplayId(); + xline.ForEachEvent( + [device_id, resource_id, trace](const XEventVisitor& xevent) { + int64 event_type = + xevent.Type().value_or(HostEventType::kUnknownHostEventType); + if (event_type == HostEventType::kMemoryAllocation || + event_type == HostEventType::kMemoryDeallocation) { + return; + } + auto* event = trace->add_trace_events(); + auto& args = *event->mutable_args(); + event->set_device_id(device_id); + event->set_resource_id(resource_id); + if (xevent.HasDisplayName()) { + event->set_name(std::string(xevent.DisplayName())); + args["long_name"] = std::string(xevent.Name()); + } else { + event->set_name(std::string(xevent.Name())); + } + event->set_timestamp_ps(xevent.TimestampPs()); + event->set_duration_ps(xevent.DurationPs()); + + xevent.ForEachStat([&](const XStatVisitor& stat) { + if (stat.ValueCase() == XStat::VALUE_NOT_SET) return; + if (IsInternalStat(stat.Type())) return; + args[std::string(stat.Name())] = stat.ToString(); + }); + }); + }); } } // namespace @@ -81,44 +121,18 @@ void MaybeDropEventsForTraceViewer(Trace* trace, uint32 limit) { } void ConvertXSpaceToTraceEvents(const XSpace& xspace, Trace* trace) { - auto* trace_devices = trace->mutable_devices(); + const XPlane* host_plane = FindPlaneWithName(xspace, kHostThreadsPlaneName); + if (host_plane != nullptr) { + XPlaneVisitor xplane = CreateTfXPlaneVisitor(host_plane); + ConvertXPlaneToTraceEvents(kHostThreadsDeviceId, xplane, trace); + } - for (const auto& raw_plane : xspace.planes()) { - XPlaneVisitor xplane = CreateTfXPlaneVisitor(&raw_plane); - // Convert devices and resources. - int64 device_id = xplane.Id(); - (*trace_devices)[device_id] = BuildDeviceAndResource(xplane); - - // Convert events. - xplane.ForEachLine([&](const XLineVisitor& xline) { - int64 resource_id = xline.Id(); // Either thread id or CUDA stream id. - xline.ForEachEvent([&](const XEventVisitor& xevent) { - int64 event_type = - xevent.Type().value_or(HostEventType::kUnknownHostEventType); - if (event_type == HostEventType::kMemoryAllocation || - event_type == HostEventType::kMemoryDeallocation) { - return; - } - auto* event = trace->add_trace_events(); - auto& args = *event->mutable_args(); - event->set_device_id(device_id); - event->set_resource_id(resource_id); - if (xevent.HasDisplayName()) { - event->set_name(std::string(xevent.DisplayName())); - args["long_name"] = std::string(xevent.Name()); - } else { - event->set_name(std::string(xevent.Name())); - } - event->set_timestamp_ps(xevent.TimestampPs()); - event->set_duration_ps(xevent.DurationPs()); - - xevent.ForEachStat([&](const XStatVisitor& stat) { - if (stat.ValueCase() == XStat::VALUE_NOT_SET) return; - if (IsInternalStat(stat.Type())) return; - args[std::string(stat.Name())] = stat.ToString(); - }); - }); - }); + const std::vector device_planes = + FindPlanesWithPrefix(xspace, kGpuPlanePrefix); + for (const XPlane* device_plane : device_planes) { + XPlaneVisitor xplane = CreateTfXPlaneVisitor(device_plane); + uint32 device_id = kFirstDeviceId + xplane.Id(); + ConvertXPlaneToTraceEvents(device_id, xplane, trace); } // Trace viewer (non-streaming) has scalability issues, we need to drop diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc index b9a9fe09981..1e0d27ae68a 100644 --- a/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_trace_events_test.cc @@ -18,7 +18,9 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/profiler/protobuf/trace_events.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/trace_utils.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" +#include "tensorflow/core/profiler/utils/xplane_schema.h" namespace tensorflow { namespace profiler { @@ -26,10 +28,7 @@ namespace { void CreateXSpace(XSpace* space) { XPlaneBuilder host_plane(space->add_planes()); - XPlaneBuilder device_plane(space->add_planes()); - - host_plane.SetName("cpu"); - host_plane.SetId(0); + host_plane.SetName(kHostThreadsPlaneName); XLineBuilder thread1 = host_plane.GetOrCreateLine(10); thread1.SetName("thread1"); XEventBuilder event1 = @@ -47,8 +46,9 @@ void CreateXSpace(XSpace* space) { event2.ParseAndAddStatValue(*host_plane.GetOrCreateStatMetadata("tf_op"), "Conv2D"); - device_plane.SetName("gpu:0"); - device_plane.SetId(1); + XPlaneBuilder device_plane(space->add_planes()); + device_plane.SetName(GpuPlaneName(0)); + device_plane.SetId(0); XLineBuilder stream1 = device_plane.GetOrCreateLine(30); stream1.SetName("gpu stream 1"); XEventBuilder event3 = @@ -67,8 +67,8 @@ TEST(ConvertXPlaneToTraceEvents, Convert) { ConvertXSpaceToTraceEvents(xspace, &trace); ASSERT_EQ(trace.devices_size(), 2); - EXPECT_EQ(trace.devices().at(0).resources_size(), 2); - EXPECT_EQ(trace.devices().at(1).resources_size(), 1); + EXPECT_EQ(trace.devices().at(kHostThreadsDeviceId).resources_size(), 2); + EXPECT_EQ(trace.devices().at(kFirstDeviceId).resources_size(), 1); EXPECT_EQ(trace.trace_events_size(), 3); } diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc index 37f7baca1d3..fa21df004df 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc @@ -147,7 +147,6 @@ Status HostTracer::CollectData(XSpace* space) { } MakeCompleteEvents(&events_); XPlane* plane = FindOrAddMutablePlaneWithName(space, kHostThreadsPlaneName); - plane->set_id(kHostPlaneId); ConvertCompleteEventsToXPlane(start_timestamp_ns_, events_, plane); events_.clear(); return Status::OK(); diff --git a/tensorflow/core/profiler/internal/cpu/metadata_collector.cc b/tensorflow/core/profiler/internal/cpu/metadata_collector.cc index 58e6385a7ec..c9a593f101b 100644 --- a/tensorflow/core/profiler/internal/cpu/metadata_collector.cc +++ b/tensorflow/core/profiler/internal/cpu/metadata_collector.cc @@ -66,7 +66,6 @@ class MetadataCollector : public ProfilerInterface { Status CollectData(XSpace* space) override { if (!debug_info_.empty()) { XPlane* plane = FindOrAddMutablePlaneWithName(space, kMetadataPlaneName); - plane->set_id(kMetadataPlaneId); XPlaneBuilder xplane(plane); const XStatMetadata& hlo_proto_stat = *xplane.GetOrCreateStatMetadata(kHloProto); diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD index 987bc5ea336..dd8ff1e9a27 100644 --- a/tensorflow/core/profiler/internal/gpu/BUILD +++ b/tensorflow/core/profiler/internal/gpu/BUILD @@ -36,17 +36,21 @@ tf_cuda_library( deps = [ ":cupti_utils", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/internal:annotation_stack", "//tensorflow/core/profiler/internal:parse_annotation", "//tensorflow/core/profiler/internal:profiler_factory", "//tensorflow/core/profiler/internal:profiler_interface", - "//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_utils", + "@com_google_absl//absl/container:fixed_array", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/synchronization", ], alwayslink = 1, @@ -145,4 +149,5 @@ tf_cuda_library( ":cupti_wrapper", ] + tf_additional_cupti_utils_cuda_deps(), visibility = ["//visibility:public"], + alwayslink = 1, ) diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer.cc b/tensorflow/core/profiler/internal/gpu/device_tracer.cc index bc9952302e8..73d2a278ea4 100644 --- a/tensorflow/core/profiler/internal/gpu/device_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/device_tracer.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/core/profiler/internal/parse_annotation.h" #include "tensorflow/core/profiler/internal/profiler_factory.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" @@ -225,11 +226,10 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector { uint64 end_gpu_ns = CuptiTracer::GetTimestamp(); XPlaneBuilder host_plane( FindOrAddMutablePlaneWithName(space, kCuptiDriverApiPlaneName)); - host_plane.SetId(kCuptiDriverApiPlaneId); for (int device_ordinal = 0; device_ordinal < num_gpus_; ++device_ordinal) { std::string name = GpuPlaneName(device_ordinal); XPlaneBuilder device_plane(FindOrAddMutablePlaneWithName(space, name)); - device_plane.SetId(kGpuPlaneBaseId + device_ordinal); + device_plane.SetId(device_ordinal); per_device_collector_[device_ordinal].Flush(start_gpu_ns_, end_gpu_ns, &device_plane, &host_plane); per_device_collector_[device_ordinal].GetDeviceCapabilities( diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc index 6fc19e776e1..973167ff51b 100644 --- a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc +++ b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc @@ -263,12 +263,10 @@ TEST_F(DeviceTracerTest, TraceToXSpace) { // At least one gpu plane and one host plane for launching events. const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName); ASSERT_NE(host_plane, nullptr); - EXPECT_EQ(host_plane->id(), kCuptiDriverApiPlaneId); const XPlane* device_plane = FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0)); ASSERT_NE(device_plane, nullptr); // Check if device plane is serialized. - EXPECT_EQ(device_plane->id(), kGpuPlaneBaseId); // one for MemcpyH2D, one for MemcpyD2H, two for Matmul (one from Eigen, one // from cudnn). EXPECT_EQ(device_plane->event_metadata_size(), 4); diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 6942f3ea306..ece58802661 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -145,6 +145,9 @@ cc_library( cc_library( name = "trace_utils", hdrs = ["trace_utils.h"], + deps = [ + "//tensorflow/core:lib", + ], ) cc_library( diff --git a/tensorflow/core/profiler/utils/trace_utils.h b/tensorflow/core/profiler/utils/trace_utils.h index 024330faa79..72e6f23feb9 100644 --- a/tensorflow/core/profiler/utils/trace_utils.h +++ b/tensorflow/core/profiler/utils/trace_utils.h @@ -16,11 +16,20 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_TRACE_UTILS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_TRACE_UTILS_H_ +#include "tensorflow/core/platform/types.h" + namespace tensorflow { namespace profiler { -// The thread id used for step information in GPU trace viewer. -// First derived stream/thread id. +// Constants used as trace_viewer PID (device_id in trace_events.proto). +// PID 0 is unused. +// Support up to 500 accelerator devices. +constexpr uint32 kFirstDeviceId = 1; +constexpr uint32 kLastDeviceId = 500; +// Host threads are shown as a single fake device. +constexpr uint32 kHostThreadsDeviceId = kLastDeviceId + 1; + +// Constants used as trace_viewer TID (resource_id in trace_events.proto). constexpr int kThreadIdDerivedMin = 0xdeadbeef; constexpr int kThreadIdStepInfo = kThreadIdDerivedMin; constexpr int kThreadIdKernelLaunch = kThreadIdDerivedMin + 1; @@ -29,8 +38,6 @@ constexpr int kThreadIdTfOp = kThreadIdDerivedMin + 3; constexpr int kThreadIdHloModule = kThreadIdDerivedMin + 4; constexpr int kThreadIdHloOp = kThreadIdDerivedMin + 5; constexpr int kThreadIdOverhead = kThreadIdDerivedMin + 6; - -// Last derived stream/thread id. constexpr int kThreadIdDerivedMax = kThreadIdOverhead; static inline bool IsDerivedThreadId(int thread_id) { diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index 2c79df7980f..197dab75d3b 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -39,15 +39,6 @@ const absl::string_view kXlaModuleLineName = "XLA Modules"; const absl::string_view kXlaOpLineName = "XLA Ops"; const absl::string_view kKernelLaunchLineName = "Launch Stats"; -const int32 kHostPlaneId = 49; -const int32 kGpuPlaneBaseId = 0; -const int32 kCuptiDriverApiPlaneId = 50; -const int32 kMetadataPlaneId = 99; -const int32 kTFStreamzPlaneId = 98; - -const int32 kThreadGroupMinPlaneId = kCuptiDriverApiPlaneId + 1; -const int32 kThreadGroupMaxPlaneId = kTFStreamzPlaneId - 1; - namespace { constexpr int kNumHostEventTypes = diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index a045e20d8de..8b999dc6f9f 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -45,21 +45,6 @@ ABSL_CONST_INIT extern const absl::string_view kXlaModuleLineName; ABSL_CONST_INIT extern const absl::string_view kXlaOpLineName; ABSL_CONST_INIT extern const absl::string_view kKernelLaunchLineName; -// Id of XPlane that contains TraceMe events. -ABSL_CONST_INIT extern const int32 kHostPlaneId; -// Ids prefix of XPlane that contains GPU events. -ABSL_CONST_INIT extern const int32 kGpuPlaneBaseId; -// Id of XPlane that contains CUPTI driver API generated events which happens -// on CPU host threads, e.g. Kernel launch. -ABSL_CONST_INIT extern const int32 kCuptiDriverApiPlaneId; -// Id of XPlane that contains profile metadata such as XLA debug info. -ABSL_CONST_INIT extern const int32 kMetadataPlaneId; -// Id of XPlane that contains kpi related metrics. -ABSL_CONST_INIT extern const int32 kTFStreamzPlaneId; - -ABSL_CONST_INIT extern const int32 kThreadGroupMinPlaneId; -ABSL_CONST_INIT extern const int32 kThreadGroupMaxPlaneId; - // Interesting event types (i.e., TraceMe names). enum HostEventType { kFirstHostEventType = 0,