diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ce4bdf5c8de..c28ea21f4fc 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -219,13 +219,17 @@ FRAMEWORK_PROTO_SRCS = [ "framework/versions.proto", ] +PROFILER_PROTO_SRCS = [ + "//tensorflow/core/profiler/protobuf:xplane.proto", +] + ERROR_CODES_PROTO_SRCS = [ "protobuf/error_codes.proto", "//tensorflow/core/lib/core:error_codes.proto", ] # LINT.ThenChange(//tensorflow/core/android_proto_config.asciipb) -CORE_PROTO_SRCS = COMMON_PROTO_SRCS + FRAMEWORK_PROTO_SRCS + ERROR_CODES_PROTO_SRCS +CORE_PROTO_SRCS = COMMON_PROTO_SRCS + FRAMEWORK_PROTO_SRCS + PROFILER_PROTO_SRCS + ERROR_CODES_PROTO_SRCS tf_proto_library( name = "protos_all", diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD index c4d915177ba..cd6de977a04 100644 --- a/tensorflow/core/profiler/internal/BUILD +++ b/tensorflow/core/profiler/internal/BUILD @@ -1,5 +1,4 @@ load("//tensorflow:tensorflow.bzl", "if_not_windows", "tf_cc_test") -load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static") package( default_visibility = ["//tensorflow:internal"], @@ -396,11 +395,8 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/platform", - ] + if_static( - extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], - otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], - ), + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + ], ) filegroup( diff --git a/tensorflow/core/profiler/internal/cpu/BUILD b/tensorflow/core/profiler/internal/cpu/BUILD index e0d271bcfcc..d2b3fdd1201 100644 --- a/tensorflow/core/profiler/internal/cpu/BUILD +++ b/tensorflow/core/profiler/internal/cpu/BUILD @@ -1,5 +1,4 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test") -load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static") package( default_visibility = ["//tensorflow:internal"], @@ -10,20 +9,16 @@ cc_library( name = "host_tracer", srcs = ["host_tracer.cc"], deps = [ - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/profiler/internal:parse_annotation", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/internal:traceme_recorder", - "//tensorflow/core/profiler/utils:xplane_builder", - ] + if_static( - extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], - otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], - ), + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + ], alwayslink = True, ) @@ -38,8 +33,6 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@com_google_googletest//:gtest_main", ], diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc index 6f9c467fe09..3d2cd86e5a5 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc @@ -13,18 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include -#include #include "absl/container/flat_hash_map.h" #include "absl/strings/str_split.h" #include "tensorflow/core/common_runtime/step_stats_collector.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/env_time.h" -#include "tensorflow/core/profiler/internal/parse_annotation.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/internal/traceme_recorder.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" -#include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/env_var.h" @@ -32,7 +29,6 @@ namespace tensorflow { namespace profiler { namespace cpu { namespace { - // Controls TraceMeRecorder and converts TraceMeRecorder::Events into // RunMetadata messages. // @@ -54,22 +50,17 @@ class HostTracer : public ProfilerInterface { Status CollectData(XSpace* space) override; - DeviceType GetDeviceType() override { return DeviceType::kCpu; } + profiler::DeviceType GetDeviceType() override { + return profiler::DeviceType::kCpu; + } private: - // Combine events created by TraceMe::ActivityStart and TraceMe::ActivityEnd, - // which can be paired up by their activity_id. - void MakeCompleteEvents(); - // Level of host tracing. const int host_trace_level_; // True if currently recording. bool recording_ = false; - // Timestamp at the start of tracing. - uint64 start_timestamp_ns_ = 0; - // Container of all traced events. TraceMeRecorder::Events events_; }; @@ -87,7 +78,6 @@ Status HostTracer::Start() { if (!recording_) { return Status(error::INTERNAL, "Failed to start TraceMeRecorder"); } - start_timestamp_ns_ = EnvTime::Default()->NowNanos(); return Status::OK(); } @@ -100,57 +90,33 @@ Status HostTracer::Stop() { return Status::OK(); } -void HostTracer::MakeCompleteEvents() { - // Track events create by ActivityStart and copy their data to events created - // by ActivityEnd. TraceME records events in its destructor, so this results - // in complete events sorted by their end_time in the thread they ended. - // Within the same thread, the record created by ActivityStart must appear - // before the record created by ActivityEnd. Cross-thread events must be - // processed in a separate pass. A single map can be used because the - // activity_id is globally unique. - absl::flat_hash_map start_events; - std::vector end_events; - for (auto& thread : events_) { - for (auto& event : thread.events) { - if (event.start_time && !event.end_time) { // ActivityStart - start_events.emplace(event.activity_id, &event); - } else if (!event.start_time && event.end_time) { // ActivityEnd - auto iter = start_events.find(event.activity_id); - if (iter != start_events.end()) { // same thread - auto* start_event = iter->second; - event.name = std::move(start_event->name); - event.start_time = start_event->start_time; - start_events.erase(iter); - } else { // cross-thread - end_events.push_back(&event); - } - } - } - } - for (auto* event : end_events) { // cross-thread - auto iter = start_events.find(event->activity_id); - if (iter != start_events.end()) { - auto* start_event = iter->second; - event->name = std::move(start_event->name); - event->start_time = start_event->start_time; - start_events.erase(iter); - } - } -} +constexpr char kUserMetadataMarker = '#'; Status HostTracer::CollectData(RunMetadata* run_metadata) { if (recording_) { - return errors::Internal("TraceMeRecorder not stopped"); + return Status(error::INTERNAL, "TraceMeRecorder not stopped"); } - MakeCompleteEvents(); + // Pair up start and end events, and add complete events to trace_entries. + absl::flat_hash_map end_times; + for (const auto& thread : events_) { + for (const auto& event : thread.events) { + if (event.end_time && !event.start_time) { + end_times.emplace(event.activity_id, event.end_time); + } + } + } + StepStatsCollector step_stats_collector(run_metadata->mutable_step_stats()); - constexpr char kUserMetadataMarker = '#'; const string cpu_name = "/host:CPU"; for (auto& thread : events_) { step_stats_collector.SaveThreadName(cpu_name, thread.thread.tid, thread.thread.name); for (auto& event : thread.events) { + if (!event.end_time) { + auto it = end_times.find(event.activity_id); + if (it != end_times.end()) event.end_time = it->second; + } if (event.start_time && event.end_time) { NodeExecStats* ns = new NodeExecStats; if (event.name.back() != kUserMetadataMarker) { @@ -170,6 +136,7 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) { ns->set_all_end_rel_micros((event.end_time - event.start_time) / EnvTime::kMicrosToNanos); ns->set_thread_id(thread.thread.tid); + // TODO(fishx): Add thread name to RunMetadata step_stats_collector.Save(cpu_name, ns); } } @@ -180,48 +147,7 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) { } Status HostTracer::CollectData(XSpace* space) { - if (recording_) { - return errors::Internal("TraceMeRecorder not stopped"); - } - MakeCompleteEvents(); - XPlaneBuilder xplane(space->add_planes()); - xplane.SetName("Host Threads"); - absl::flat_hash_map xevent_metadata_by_name; - absl::flat_hash_map xstat_metadata_by_name; - for (const auto& thread : events_) { - XLineBuilder xline = xplane.AddLine(); - xline.SetId(thread.thread.tid); - xline.SetName(thread.thread.name); - xline.SetTimestampNs(start_timestamp_ns_); - xline.ReserveEvents(thread.events.size()); - for (const auto& event : thread.events) { - if (event.start_time && event.end_time) { - Annotation annotation = ParseAnnotation(event.name); - XEventMetadata*& xevent_metadata = - xevent_metadata_by_name[annotation.name]; - if (xevent_metadata == nullptr) { - xevent_metadata = - xplane.GetOrCreateEventMetadata(xevent_metadata_by_name.size()); - xevent_metadata->set_name(string(annotation.name)); - } - XEventBuilder xevent = xline.AddEvent(*xevent_metadata); - xevent.SetTimestampNs(event.start_time); - xevent.SetEndTimestampNs(event.end_time); - xevent.ReserveStats(annotation.metadata.size()); - for (const auto& metadata : annotation.metadata) { - XStatMetadata*& xstat_metadata = xstat_metadata_by_name[metadata.key]; - if (xstat_metadata == nullptr) { - xstat_metadata = - xplane.GetOrCreateStatMetadata(xstat_metadata_by_name.size()); - xstat_metadata->set_name(string(metadata.key)); - } - xevent.ParseAndAddStatValue(*xstat_metadata, metadata.value); - } - } - } - } - events_.clear(); - return Status::OK(); + return errors::Unimplemented("Collect data into XSpace not yet implemented"); } } // namespace @@ -235,6 +161,7 @@ std::unique_ptr CreateHostTracer( auto register_host_tracer_factory = [] { bool enable; + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_OSS_CPU_PROFILER", true, &enable)); if (enable) { RegisterProfilerFactory(&CreateHostTracer); diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc index f1b2b8560c9..e83fe8a3e1a 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc @@ -23,7 +23,6 @@ limitations under the License. #include "tensorflow/core/platform/env.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { @@ -35,6 +34,12 @@ std::unique_ptr CreateHostTracer( namespace { +Status CollectData(ProfilerInterface* profiler, RunMetadata* run_metadata) { + return profiler->CollectData(run_metadata); +} + +using ::testing::ElementsAre; +using ::testing::Pair; using ::testing::UnorderedElementsAre; NodeExecStats MakeNodeStats(const string& name, uint64 thread_id, @@ -74,10 +79,11 @@ inline ::testing::PolymorphicMatcher EqualsNodeStats( return ::testing::MakePolymorphicMatcher(NodeStatsMatcher(expected)); } -TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) { +TEST(HostTracerTest, CollectsTraceMeEvents) { uint32 thread_id = Env::Default()->GetCurrentThreadId(); - auto tracer = CreateHostTracer(ProfilerOptions()); + const ProfilerOptions options; + auto tracer = CreateHostTracer(options); TF_ASSERT_OK(tracer->Start()); { TraceMe traceme("hello"); } @@ -89,7 +95,7 @@ TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) { TF_ASSERT_OK(tracer->Stop()); RunMetadata run_metadata; - TF_ASSERT_OK(tracer->CollectData(&run_metadata)); + TF_ASSERT_OK(CollectData(tracer.get(), &run_metadata)); EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 1); EXPECT_EQ(run_metadata.step_stats().dev_stats(0).node_stats_size(), 6); @@ -106,68 +112,6 @@ TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) { MakeNodeStats("incomplete", thread_id, "key1=value1,key2")))); } -TEST(HostTracerTest, CollectsTraceMeEventsAsXSpace) { - uint32 thread_id = Env::Default()->GetCurrentThreadId(); - string thread_name; - ASSERT_TRUE(Env::Default()->GetCurrentThreadName(&thread_name)); - - auto tracer = CreateHostTracer(ProfilerOptions()); - - TF_ASSERT_OK(tracer->Start()); - { TraceMe traceme("hello"); } - { TraceMe traceme("world"); } - { TraceMe traceme("contains#inside"); } - { TraceMe traceme("good#key1=value1#"); } - { TraceMe traceme("morning#key1=value1,key2=value2#"); } - { TraceMe traceme("incomplete#key1=value1,key2#"); } - TF_ASSERT_OK(tracer->Stop()); - - XSpace space; - TF_ASSERT_OK(tracer->CollectData(&space)); - - ASSERT_EQ(space.planes_size(), 1); - const auto& plane = space.planes(0); - EXPECT_EQ(plane.name(), "Host Threads"); - ASSERT_EQ(plane.lines_size(), 1); - ASSERT_EQ(plane.event_metadata_size(), 6); - ASSERT_EQ(plane.stat_metadata_size(), 2); - const auto& event_metadata = plane.event_metadata(); - const auto& stat_metadata = plane.stat_metadata(); - const auto& line = plane.lines(0); - EXPECT_EQ(line.id(), thread_id); - EXPECT_EQ(line.name(), thread_name); - ASSERT_EQ(line.events_size(), 6); - const auto& events = line.events(); - EXPECT_EQ(events[0].metadata_id(), 1); - EXPECT_EQ(event_metadata.at(1).name(), "hello"); - ASSERT_EQ(events[0].stats_size(), 0); - EXPECT_EQ(events[1].metadata_id(), 2); - EXPECT_EQ(event_metadata.at(2).name(), "world"); - ASSERT_EQ(events[1].stats_size(), 0); - EXPECT_EQ(events[2].metadata_id(), 3); - EXPECT_EQ(event_metadata.at(3).name(), "contains#inside"); - ASSERT_EQ(events[2].stats_size(), 0); - EXPECT_EQ(events[3].metadata_id(), 4); - EXPECT_EQ(event_metadata.at(4).name(), "good"); - ASSERT_EQ(events[3].stats_size(), 1); - EXPECT_EQ(events[3].stats(0).metadata_id(), 1); - EXPECT_EQ(stat_metadata.at(1).name(), "key1"); - EXPECT_EQ(events[3].stats(0).str_value(), "value1"); - EXPECT_EQ(events[4].metadata_id(), 5); - EXPECT_EQ(event_metadata.at(5).name(), "morning"); - ASSERT_EQ(events[4].stats_size(), 2); - EXPECT_EQ(events[4].stats(0).metadata_id(), 1); - EXPECT_EQ(events[4].stats(0).str_value(), "value1"); - EXPECT_EQ(events[4].stats(1).metadata_id(), 2); - EXPECT_EQ(stat_metadata.at(2).name(), "key2"); - EXPECT_EQ(events[4].stats(1).str_value(), "value2"); - EXPECT_EQ(events[5].metadata_id(), 6); - EXPECT_EQ(event_metadata.at(6).name(), "incomplete"); - ASSERT_EQ(events[5].stats_size(), 1); - EXPECT_EQ(events[5].stats(0).metadata_id(), 1); - EXPECT_EQ(events[5].stats(0).str_value(), "value1"); -} - } // namespace } // namespace cpu } // namespace profiler diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD index 603d7d0fb8b..2e886efa45b 100644 --- a/tensorflow/core/profiler/internal/gpu/BUILD +++ b/tensorflow/core/profiler/internal/gpu/BUILD @@ -13,7 +13,6 @@ load( ) load( "//tensorflow/core/platform:default/build_config_root.bzl", - "if_static", "tf_cuda_tests_tags", ) @@ -39,10 +38,8 @@ tf_cuda_library( "//tensorflow/core/profiler/internal:parse_annotation", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/lib:traceme", - ] + if_static( - extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], - otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], - ), + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + ], alwayslink = 1, ) diff --git a/tensorflow/core/profiler/internal/profiler_interface.h b/tensorflow/core/profiler/internal/profiler_interface.h index bed4e7603ae..6d7d456f95e 100644 --- a/tensorflow/core/profiler/internal/profiler_interface.h +++ b/tensorflow/core/profiler/internal/profiler_interface.h @@ -19,10 +19,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/platform.h" -#if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/profiler/protobuf/xplane.pb.h" -#endif #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { @@ -69,12 +66,10 @@ class ProfilerInterface { // return empty data. virtual Status CollectData(RunMetadata* run_metadata) = 0; -#if !defined(IS_MOBILE_PLATFORM) // Saves collected profile data into XSpace. // After this or the overload above are called once, subsequent calls might // return empty data. virtual Status CollectData(XSpace* space) = 0; -#endif // Which device this ProfilerInterface is used for. virtual DeviceType GetDeviceType() = 0; diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD index 68317696c1d..115cac39c93 100644 --- a/tensorflow/core/profiler/protobuf/BUILD +++ b/tensorflow/core/profiler/protobuf/BUILD @@ -16,9 +16,16 @@ tf_proto_library( name = "xplane_proto", srcs = ["xplane.proto"], cc_api_version = 2, + make_default_target_header_only = True, visibility = [":friends"], ) +# This is needed because of how tf_android_core_proto_sources parses proto paths. +exports_files( + srcs = ["xplane.proto"], + visibility = ["//tensorflow/core:__pkg__"], +) + tf_proto_library( name = "op_metrics_proto", srcs = ["op_metrics.proto"], diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index ede6626db88..189dfb1f5b1 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -1,5 +1,4 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_test") -load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static") package( default_visibility = ["//tensorflow/core/profiler:internal"], @@ -85,12 +84,10 @@ cc_library( visibility = [":friends"], deps = [ ":time_utils", - "@com_google_absl//absl/strings", "//tensorflow/core:lib", - ] + if_static( - extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], - otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], - ), + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/strings", + ], ) cc_library( @@ -100,11 +97,9 @@ cc_library( deps = [ ":time_utils", ":timespan", - "@com_google_absl//absl/strings", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - ] + if_static( - extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"], - otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"], - ), + "//tensorflow/core/profiler/protobuf:xplane_proto_cc", + "@com_google_absl//absl/strings", + ], )