diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c28ea21f4fc..ce4bdf5c8de 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -219,17 +219,13 @@ FRAMEWORK_PROTO_SRCS = [ "framework/versions.proto", ] -PROFILER_PROTO_SRCS = [ - "//tensorflow/core/profiler/protobuf:xplane.proto", -] - ERROR_CODES_PROTO_SRCS = [ "protobuf/error_codes.proto", "//tensorflow/core/lib/core:error_codes.proto", ] # LINT.ThenChange(//tensorflow/core/android_proto_config.asciipb) -CORE_PROTO_SRCS = COMMON_PROTO_SRCS + FRAMEWORK_PROTO_SRCS + PROFILER_PROTO_SRCS + ERROR_CODES_PROTO_SRCS +CORE_PROTO_SRCS = COMMON_PROTO_SRCS + FRAMEWORK_PROTO_SRCS + ERROR_CODES_PROTO_SRCS tf_proto_library( name = "protos_all", diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD index cd6de977a04..c4d915177ba 100644 --- a/tensorflow/core/profiler/internal/BUILD +++ b/tensorflow/core/profiler/internal/BUILD @@ -1,4 +1,5 @@ load("//tensorflow:tensorflow.bzl", "if_not_windows", "tf_cc_test") +load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static") package( default_visibility = ["//tensorflow:internal"], @@ -395,8 +396,11 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - ], + "//tensorflow/core/platform", + ] + if_static( + extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], + otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], + ), ) filegroup( diff --git a/tensorflow/core/profiler/internal/cpu/BUILD b/tensorflow/core/profiler/internal/cpu/BUILD index d2b3fdd1201..e0d271bcfcc 100644 --- a/tensorflow/core/profiler/internal/cpu/BUILD +++ b/tensorflow/core/profiler/internal/cpu/BUILD @@ -1,4 +1,5 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static") package( default_visibility = ["//tensorflow:internal"], @@ -9,16 +10,20 @@ cc_library( name = "host_tracer", srcs = ["host_tracer.cc"], deps = [ + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/profiler/internal:parse_annotation", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/internal:traceme_recorder", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings", - ], + "//tensorflow/core/profiler/utils:xplane_builder", + ] + if_static( + extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], + otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], + ), alwayslink = True, ) @@ -33,6 +38,8 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@com_google_googletest//:gtest_main", ], diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc index 3d2cd86e5a5..6f9c467fe09 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc @@ -13,15 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include +#include #include "absl/container/flat_hash_map.h" #include "absl/strings/str_split.h" #include "tensorflow/core/common_runtime/step_stats_collector.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/profiler/internal/parse_annotation.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/internal/traceme_recorder.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/env_var.h" @@ -29,6 +32,7 @@ namespace tensorflow { namespace profiler { namespace cpu { namespace { + // Controls TraceMeRecorder and converts TraceMeRecorder::Events into // RunMetadata messages. // @@ -50,17 +54,22 @@ class HostTracer : public ProfilerInterface { Status CollectData(XSpace* space) override; - profiler::DeviceType GetDeviceType() override { - return profiler::DeviceType::kCpu; - } + DeviceType GetDeviceType() override { return DeviceType::kCpu; } private: + // Combine events created by TraceMe::ActivityStart and TraceMe::ActivityEnd, + // which can be paired up by their activity_id. + void MakeCompleteEvents(); + // Level of host tracing. const int host_trace_level_; // True if currently recording. bool recording_ = false; + // Timestamp at the start of tracing. + uint64 start_timestamp_ns_ = 0; + // Container of all traced events. TraceMeRecorder::Events events_; }; @@ -78,6 +87,7 @@ Status HostTracer::Start() { if (!recording_) { return Status(error::INTERNAL, "Failed to start TraceMeRecorder"); } + start_timestamp_ns_ = EnvTime::Default()->NowNanos(); return Status::OK(); } @@ -90,33 +100,57 @@ Status HostTracer::Stop() { return Status::OK(); } -constexpr char kUserMetadataMarker = '#'; - -Status HostTracer::CollectData(RunMetadata* run_metadata) { - if (recording_) { - return Status(error::INTERNAL, "TraceMeRecorder not stopped"); - } - // Pair up start and end events, and add complete events to trace_entries. - absl::flat_hash_map end_times; - for (const auto& thread : events_) { - for (const auto& event : thread.events) { - if (event.end_time && !event.start_time) { - end_times.emplace(event.activity_id, event.end_time); +void HostTracer::MakeCompleteEvents() { + // Track events create by ActivityStart and copy their data to events created + // by ActivityEnd. TraceME records events in its destructor, so this results + // in complete events sorted by their end_time in the thread they ended. + // Within the same thread, the record created by ActivityStart must appear + // before the record created by ActivityEnd. Cross-thread events must be + // processed in a separate pass. A single map can be used because the + // activity_id is globally unique. + absl::flat_hash_map start_events; + std::vector end_events; + for (auto& thread : events_) { + for (auto& event : thread.events) { + if (event.start_time && !event.end_time) { // ActivityStart + start_events.emplace(event.activity_id, &event); + } else if (!event.start_time && event.end_time) { // ActivityEnd + auto iter = start_events.find(event.activity_id); + if (iter != start_events.end()) { // same thread + auto* start_event = iter->second; + event.name = std::move(start_event->name); + event.start_time = start_event->start_time; + start_events.erase(iter); + } else { // cross-thread + end_events.push_back(&event); + } } } } + for (auto* event : end_events) { // cross-thread + auto iter = start_events.find(event->activity_id); + if (iter != start_events.end()) { + auto* start_event = iter->second; + event->name = std::move(start_event->name); + event->start_time = start_event->start_time; + start_events.erase(iter); + } + } +} +Status HostTracer::CollectData(RunMetadata* run_metadata) { + if (recording_) { + return errors::Internal("TraceMeRecorder not stopped"); + } + MakeCompleteEvents(); StepStatsCollector step_stats_collector(run_metadata->mutable_step_stats()); + constexpr char kUserMetadataMarker = '#'; const string cpu_name = "/host:CPU"; for (auto& thread : events_) { step_stats_collector.SaveThreadName(cpu_name, thread.thread.tid, thread.thread.name); for (auto& event : thread.events) { - if (!event.end_time) { - auto it = end_times.find(event.activity_id); - if (it != end_times.end()) event.end_time = it->second; - } if (event.start_time && event.end_time) { NodeExecStats* ns = new NodeExecStats; if (event.name.back() != kUserMetadataMarker) { @@ -136,7 +170,6 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) { ns->set_all_end_rel_micros((event.end_time - event.start_time) / EnvTime::kMicrosToNanos); ns->set_thread_id(thread.thread.tid); - // TODO(fishx): Add thread name to RunMetadata step_stats_collector.Save(cpu_name, ns); } } @@ -147,7 +180,48 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) { } Status HostTracer::CollectData(XSpace* space) { - return errors::Unimplemented("Collect data into XSpace not yet implemented"); + if (recording_) { + return errors::Internal("TraceMeRecorder not stopped"); + } + MakeCompleteEvents(); + XPlaneBuilder xplane(space->add_planes()); + xplane.SetName("Host Threads"); + absl::flat_hash_map xevent_metadata_by_name; + absl::flat_hash_map xstat_metadata_by_name; + for (const auto& thread : events_) { + XLineBuilder xline = xplane.AddLine(); + xline.SetId(thread.thread.tid); + xline.SetName(thread.thread.name); + xline.SetTimestampNs(start_timestamp_ns_); + xline.ReserveEvents(thread.events.size()); + for (const auto& event : thread.events) { + if (event.start_time && event.end_time) { + Annotation annotation = ParseAnnotation(event.name); + XEventMetadata*& xevent_metadata = + xevent_metadata_by_name[annotation.name]; + if (xevent_metadata == nullptr) { + xevent_metadata = + xplane.GetOrCreateEventMetadata(xevent_metadata_by_name.size()); + xevent_metadata->set_name(string(annotation.name)); + } + XEventBuilder xevent = xline.AddEvent(*xevent_metadata); + xevent.SetTimestampNs(event.start_time); + xevent.SetEndTimestampNs(event.end_time); + xevent.ReserveStats(annotation.metadata.size()); + for (const auto& metadata : annotation.metadata) { + XStatMetadata*& xstat_metadata = xstat_metadata_by_name[metadata.key]; + if (xstat_metadata == nullptr) { + xstat_metadata = + xplane.GetOrCreateStatMetadata(xstat_metadata_by_name.size()); + xstat_metadata->set_name(string(metadata.key)); + } + xevent.ParseAndAddStatValue(*xstat_metadata, metadata.value); + } + } + } + } + events_.clear(); + return Status::OK(); } } // namespace @@ -161,7 +235,6 @@ std::unique_ptr CreateHostTracer( auto register_host_tracer_factory = [] { bool enable; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_OSS_CPU_PROFILER", true, &enable)); if (enable) { RegisterProfilerFactory(&CreateHostTracer); diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc index e83fe8a3e1a..f1b2b8560c9 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/platform/env.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { @@ -34,12 +35,6 @@ std::unique_ptr CreateHostTracer( namespace { -Status CollectData(ProfilerInterface* profiler, RunMetadata* run_metadata) { - return profiler->CollectData(run_metadata); -} - -using ::testing::ElementsAre; -using ::testing::Pair; using ::testing::UnorderedElementsAre; NodeExecStats MakeNodeStats(const string& name, uint64 thread_id, @@ -79,11 +74,10 @@ inline ::testing::PolymorphicMatcher EqualsNodeStats( return ::testing::MakePolymorphicMatcher(NodeStatsMatcher(expected)); } -TEST(HostTracerTest, CollectsTraceMeEvents) { +TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) { uint32 thread_id = Env::Default()->GetCurrentThreadId(); - const ProfilerOptions options; - auto tracer = CreateHostTracer(options); + auto tracer = CreateHostTracer(ProfilerOptions()); TF_ASSERT_OK(tracer->Start()); { TraceMe traceme("hello"); } @@ -95,7 +89,7 @@ TEST(HostTracerTest, CollectsTraceMeEvents) { TF_ASSERT_OK(tracer->Stop()); RunMetadata run_metadata; - TF_ASSERT_OK(CollectData(tracer.get(), &run_metadata)); + TF_ASSERT_OK(tracer->CollectData(&run_metadata)); EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 1); EXPECT_EQ(run_metadata.step_stats().dev_stats(0).node_stats_size(), 6); @@ -112,6 +106,68 @@ TEST(HostTracerTest, CollectsTraceMeEvents) { MakeNodeStats("incomplete", thread_id, "key1=value1,key2")))); } +TEST(HostTracerTest, CollectsTraceMeEventsAsXSpace) { + uint32 thread_id = Env::Default()->GetCurrentThreadId(); + string thread_name; + ASSERT_TRUE(Env::Default()->GetCurrentThreadName(&thread_name)); + + auto tracer = CreateHostTracer(ProfilerOptions()); + + TF_ASSERT_OK(tracer->Start()); + { TraceMe traceme("hello"); } + { TraceMe traceme("world"); } + { TraceMe traceme("contains#inside"); } + { TraceMe traceme("good#key1=value1#"); } + { TraceMe traceme("morning#key1=value1,key2=value2#"); } + { TraceMe traceme("incomplete#key1=value1,key2#"); } + TF_ASSERT_OK(tracer->Stop()); + + XSpace space; + TF_ASSERT_OK(tracer->CollectData(&space)); + + ASSERT_EQ(space.planes_size(), 1); + const auto& plane = space.planes(0); + EXPECT_EQ(plane.name(), "Host Threads"); + ASSERT_EQ(plane.lines_size(), 1); + ASSERT_EQ(plane.event_metadata_size(), 6); + ASSERT_EQ(plane.stat_metadata_size(), 2); + const auto& event_metadata = plane.event_metadata(); + const auto& stat_metadata = plane.stat_metadata(); + const auto& line = plane.lines(0); + EXPECT_EQ(line.id(), thread_id); + EXPECT_EQ(line.name(), thread_name); + ASSERT_EQ(line.events_size(), 6); + const auto& events = line.events(); + EXPECT_EQ(events[0].metadata_id(), 1); + EXPECT_EQ(event_metadata.at(1).name(), "hello"); + ASSERT_EQ(events[0].stats_size(), 0); + EXPECT_EQ(events[1].metadata_id(), 2); + EXPECT_EQ(event_metadata.at(2).name(), "world"); + ASSERT_EQ(events[1].stats_size(), 0); + EXPECT_EQ(events[2].metadata_id(), 3); + EXPECT_EQ(event_metadata.at(3).name(), "contains#inside"); + ASSERT_EQ(events[2].stats_size(), 0); + EXPECT_EQ(events[3].metadata_id(), 4); + EXPECT_EQ(event_metadata.at(4).name(), "good"); + ASSERT_EQ(events[3].stats_size(), 1); + EXPECT_EQ(events[3].stats(0).metadata_id(), 1); + EXPECT_EQ(stat_metadata.at(1).name(), "key1"); + EXPECT_EQ(events[3].stats(0).str_value(), "value1"); + EXPECT_EQ(events[4].metadata_id(), 5); + EXPECT_EQ(event_metadata.at(5).name(), "morning"); + ASSERT_EQ(events[4].stats_size(), 2); + EXPECT_EQ(events[4].stats(0).metadata_id(), 1); + EXPECT_EQ(events[4].stats(0).str_value(), "value1"); + EXPECT_EQ(events[4].stats(1).metadata_id(), 2); + EXPECT_EQ(stat_metadata.at(2).name(), "key2"); + EXPECT_EQ(events[4].stats(1).str_value(), "value2"); + EXPECT_EQ(events[5].metadata_id(), 6); + EXPECT_EQ(event_metadata.at(6).name(), "incomplete"); + ASSERT_EQ(events[5].stats_size(), 1); + EXPECT_EQ(events[5].stats(0).metadata_id(), 1); + EXPECT_EQ(events[5].stats(0).str_value(), "value1"); +} + } // namespace } // namespace cpu } // namespace profiler diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD index 2e886efa45b..603d7d0fb8b 100644 --- a/tensorflow/core/profiler/internal/gpu/BUILD +++ b/tensorflow/core/profiler/internal/gpu/BUILD @@ -13,6 +13,7 @@ load( ) load( "//tensorflow/core/platform:default/build_config_root.bzl", + "if_static", "tf_cuda_tests_tags", ) @@ -38,8 +39,10 @@ tf_cuda_library( "//tensorflow/core/profiler/internal:parse_annotation", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - ], + ] + if_static( + extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], + otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], + ), alwayslink = 1, ) diff --git a/tensorflow/core/profiler/internal/profiler_interface.h b/tensorflow/core/profiler/internal/profiler_interface.h index 6d7d456f95e..bed4e7603ae 100644 --- a/tensorflow/core/profiler/internal/profiler_interface.h +++ b/tensorflow/core/profiler/internal/profiler_interface.h @@ -19,7 +19,10 @@ limitations under the License. #include #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/platform.h" +#if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#endif #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { @@ -66,10 +69,12 @@ class ProfilerInterface { // return empty data. virtual Status CollectData(RunMetadata* run_metadata) = 0; +#if !defined(IS_MOBILE_PLATFORM) // Saves collected profile data into XSpace. // After this or the overload above are called once, subsequent calls might // return empty data. virtual Status CollectData(XSpace* space) = 0; +#endif // Which device this ProfilerInterface is used for. virtual DeviceType GetDeviceType() = 0; diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD index 115cac39c93..68317696c1d 100644 --- a/tensorflow/core/profiler/protobuf/BUILD +++ b/tensorflow/core/profiler/protobuf/BUILD @@ -16,16 +16,9 @@ tf_proto_library( name = "xplane_proto", srcs = ["xplane.proto"], cc_api_version = 2, - make_default_target_header_only = True, visibility = [":friends"], ) -# This is needed because of how tf_android_core_proto_sources parses proto paths. -exports_files( - srcs = ["xplane.proto"], - visibility = ["//tensorflow/core:__pkg__"], -) - tf_proto_library( name = "op_metrics_proto", srcs = ["op_metrics.proto"], diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 189dfb1f5b1..ede6626db88 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -1,4 +1,5 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static") package( default_visibility = ["//tensorflow/core/profiler:internal"], @@ -84,10 +85,12 @@ cc_library( visibility = [":friends"], deps = [ ":time_utils", - "//tensorflow/core:lib", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/strings", - ], + "//tensorflow/core:lib", + ] + if_static( + extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], + otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], + ), ) cc_library( @@ -97,9 +100,11 @@ cc_library( deps = [ ":time_utils", ":timespan", + "@com_google_absl//absl/strings", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "@com_google_absl//absl/strings", - ], + ] + if_static( + extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], + otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], + ), )