Implement CollectData(XSpace*) in HostTracer

PiperOrigin-RevId: 281325907
Change-Id: I77d89231dc59c4bc1e95e9756753bac70f286988
This commit is contained in:
Jose Baiocchi 2019-11-19 10:34:55 -08:00 committed by TensorFlower Gardener
parent ddbdb4a7fd
commit 6c092464ff
9 changed files with 200 additions and 58 deletions

View File

@ -219,17 +219,13 @@ FRAMEWORK_PROTO_SRCS = [
"framework/versions.proto",
]
PROFILER_PROTO_SRCS = [
"//tensorflow/core/profiler/protobuf:xplane.proto",
]
ERROR_CODES_PROTO_SRCS = [
"protobuf/error_codes.proto",
"//tensorflow/core/lib/core:error_codes.proto",
]
# LINT.ThenChange(//tensorflow/core/android_proto_config.asciipb)
CORE_PROTO_SRCS = COMMON_PROTO_SRCS + FRAMEWORK_PROTO_SRCS + PROFILER_PROTO_SRCS + ERROR_CODES_PROTO_SRCS
CORE_PROTO_SRCS = COMMON_PROTO_SRCS + FRAMEWORK_PROTO_SRCS + ERROR_CODES_PROTO_SRCS
tf_proto_library(
name = "protos_all",

View File

@ -1,4 +1,5 @@
load("//tensorflow:tensorflow.bzl", "if_not_windows", "tf_cc_test")
load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static")
package(
default_visibility = ["//tensorflow:internal"],
@ -395,8 +396,11 @@ cc_library(
deps = [
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
],
"//tensorflow/core/platform",
] + if_static(
extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"],
otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"],
),
)
filegroup(

View File

@ -1,4 +1,5 @@
load("//tensorflow:tensorflow.bzl", "tf_cc_test")
load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static")
package(
default_visibility = ["//tensorflow:internal"],
@ -9,16 +10,20 @@ cc_library(
name = "host_tracer",
srcs = ["host_tracer.cc"],
deps = [
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",
"//tensorflow/core:core_cpu_lib",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/internal:parse_annotation",
"//tensorflow/core/profiler/internal:profiler_interface",
"//tensorflow/core/profiler/internal:traceme_recorder",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",
],
"//tensorflow/core/profiler/utils:xplane_builder",
] + if_static(
extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"],
otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"],
),
alwayslink = True,
)
@ -33,6 +38,8 @@ tf_cc_test(
"//tensorflow/core:test",
"//tensorflow/core/profiler/internal:profiler_interface",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:optional",
"@com_google_googletest//:gtest_main",
],

View File

@ -13,15 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <utility>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/strings/str_split.h"
#include "tensorflow/core/common_runtime/step_stats_collector.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/env_time.h"
#include "tensorflow/core/profiler/internal/parse_annotation.h"
#include "tensorflow/core/profiler/internal/profiler_interface.h"
#include "tensorflow/core/profiler/internal/traceme_recorder.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/xplane_builder.h"
#include "tensorflow/core/protobuf/config.pb.h"
#include "tensorflow/core/util/env_var.h"
@ -29,6 +32,7 @@ namespace tensorflow {
namespace profiler {
namespace cpu {
namespace {
// Controls TraceMeRecorder and converts TraceMeRecorder::Events into
// RunMetadata messages.
//
@ -50,17 +54,22 @@ class HostTracer : public ProfilerInterface {
Status CollectData(XSpace* space) override;
profiler::DeviceType GetDeviceType() override {
return profiler::DeviceType::kCpu;
}
DeviceType GetDeviceType() override { return DeviceType::kCpu; }
private:
// Combine events created by TraceMe::ActivityStart and TraceMe::ActivityEnd,
// which can be paired up by their activity_id.
void MakeCompleteEvents();
// Level of host tracing.
const int host_trace_level_;
// True if currently recording.
bool recording_ = false;
// Timestamp at the start of tracing.
uint64 start_timestamp_ns_ = 0;
// Container of all traced events.
TraceMeRecorder::Events events_;
};
@ -78,6 +87,7 @@ Status HostTracer::Start() {
if (!recording_) {
return Status(error::INTERNAL, "Failed to start TraceMeRecorder");
}
start_timestamp_ns_ = EnvTime::Default()->NowNanos();
return Status::OK();
}
@ -90,33 +100,57 @@ Status HostTracer::Stop() {
return Status::OK();
}
constexpr char kUserMetadataMarker = '#';
Status HostTracer::CollectData(RunMetadata* run_metadata) {
if (recording_) {
return Status(error::INTERNAL, "TraceMeRecorder not stopped");
}
// Pair up start and end events, and add complete events to trace_entries.
absl::flat_hash_map<uint64, uint64> end_times;
for (const auto& thread : events_) {
for (const auto& event : thread.events) {
if (event.end_time && !event.start_time) {
end_times.emplace(event.activity_id, event.end_time);
void HostTracer::MakeCompleteEvents() {
// Track events create by ActivityStart and copy their data to events created
// by ActivityEnd. TraceME records events in its destructor, so this results
// in complete events sorted by their end_time in the thread they ended.
// Within the same thread, the record created by ActivityStart must appear
// before the record created by ActivityEnd. Cross-thread events must be
// processed in a separate pass. A single map can be used because the
// activity_id is globally unique.
absl::flat_hash_map<uint64, TraceMeRecorder::Event*> start_events;
std::vector<TraceMeRecorder::Event*> end_events;
for (auto& thread : events_) {
for (auto& event : thread.events) {
if (event.start_time && !event.end_time) { // ActivityStart
start_events.emplace(event.activity_id, &event);
} else if (!event.start_time && event.end_time) { // ActivityEnd
auto iter = start_events.find(event.activity_id);
if (iter != start_events.end()) { // same thread
auto* start_event = iter->second;
event.name = std::move(start_event->name);
event.start_time = start_event->start_time;
start_events.erase(iter);
} else { // cross-thread
end_events.push_back(&event);
}
}
}
}
for (auto* event : end_events) { // cross-thread
auto iter = start_events.find(event->activity_id);
if (iter != start_events.end()) {
auto* start_event = iter->second;
event->name = std::move(start_event->name);
event->start_time = start_event->start_time;
start_events.erase(iter);
}
}
}
Status HostTracer::CollectData(RunMetadata* run_metadata) {
if (recording_) {
return errors::Internal("TraceMeRecorder not stopped");
}
MakeCompleteEvents();
StepStatsCollector step_stats_collector(run_metadata->mutable_step_stats());
constexpr char kUserMetadataMarker = '#';
const string cpu_name = "/host:CPU";
for (auto& thread : events_) {
step_stats_collector.SaveThreadName(cpu_name, thread.thread.tid,
thread.thread.name);
for (auto& event : thread.events) {
if (!event.end_time) {
auto it = end_times.find(event.activity_id);
if (it != end_times.end()) event.end_time = it->second;
}
if (event.start_time && event.end_time) {
NodeExecStats* ns = new NodeExecStats;
if (event.name.back() != kUserMetadataMarker) {
@ -136,7 +170,6 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) {
ns->set_all_end_rel_micros((event.end_time - event.start_time) /
EnvTime::kMicrosToNanos);
ns->set_thread_id(thread.thread.tid);
// TODO(fishx): Add thread name to RunMetadata
step_stats_collector.Save(cpu_name, ns);
}
}
@ -147,7 +180,48 @@ Status HostTracer::CollectData(RunMetadata* run_metadata) {
}
Status HostTracer::CollectData(XSpace* space) {
return errors::Unimplemented("Collect data into XSpace not yet implemented");
if (recording_) {
return errors::Internal("TraceMeRecorder not stopped");
}
MakeCompleteEvents();
XPlaneBuilder xplane(space->add_planes());
xplane.SetName("Host Threads");
absl::flat_hash_map<string, XEventMetadata*> xevent_metadata_by_name;
absl::flat_hash_map<string, XStatMetadata*> xstat_metadata_by_name;
for (const auto& thread : events_) {
XLineBuilder xline = xplane.AddLine();
xline.SetId(thread.thread.tid);
xline.SetName(thread.thread.name);
xline.SetTimestampNs(start_timestamp_ns_);
xline.ReserveEvents(thread.events.size());
for (const auto& event : thread.events) {
if (event.start_time && event.end_time) {
Annotation annotation = ParseAnnotation(event.name);
XEventMetadata*& xevent_metadata =
xevent_metadata_by_name[annotation.name];
if (xevent_metadata == nullptr) {
xevent_metadata =
xplane.GetOrCreateEventMetadata(xevent_metadata_by_name.size());
xevent_metadata->set_name(string(annotation.name));
}
XEventBuilder xevent = xline.AddEvent(*xevent_metadata);
xevent.SetTimestampNs(event.start_time);
xevent.SetEndTimestampNs(event.end_time);
xevent.ReserveStats(annotation.metadata.size());
for (const auto& metadata : annotation.metadata) {
XStatMetadata*& xstat_metadata = xstat_metadata_by_name[metadata.key];
if (xstat_metadata == nullptr) {
xstat_metadata =
xplane.GetOrCreateStatMetadata(xstat_metadata_by_name.size());
xstat_metadata->set_name(string(metadata.key));
}
xevent.ParseAndAddStatValue(*xstat_metadata, metadata.value);
}
}
}
}
events_.clear();
return Status::OK();
}
} // namespace
@ -161,7 +235,6 @@ std::unique_ptr<ProfilerInterface> CreateHostTracer(
auto register_host_tracer_factory = [] {
bool enable;
TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_OSS_CPU_PROFILER", true, &enable));
if (enable) {
RegisterProfilerFactory(&CreateHostTracer);

View File

@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/profiler/internal/profiler_interface.h"
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/protobuf/config.pb.h"
namespace tensorflow {
@ -34,12 +35,6 @@ std::unique_ptr<ProfilerInterface> CreateHostTracer(
namespace {
Status CollectData(ProfilerInterface* profiler, RunMetadata* run_metadata) {
return profiler->CollectData(run_metadata);
}
using ::testing::ElementsAre;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
NodeExecStats MakeNodeStats(const string& name, uint64 thread_id,
@ -79,11 +74,10 @@ inline ::testing::PolymorphicMatcher<NodeStatsMatcher> EqualsNodeStats(
return ::testing::MakePolymorphicMatcher(NodeStatsMatcher(expected));
}
TEST(HostTracerTest, CollectsTraceMeEvents) {
TEST(HostTracerTest, CollectsTraceMeEventsAsRunMetadata) {
uint32 thread_id = Env::Default()->GetCurrentThreadId();
const ProfilerOptions options;
auto tracer = CreateHostTracer(options);
auto tracer = CreateHostTracer(ProfilerOptions());
TF_ASSERT_OK(tracer->Start());
{ TraceMe traceme("hello"); }
@ -95,7 +89,7 @@ TEST(HostTracerTest, CollectsTraceMeEvents) {
TF_ASSERT_OK(tracer->Stop());
RunMetadata run_metadata;
TF_ASSERT_OK(CollectData(tracer.get(), &run_metadata));
TF_ASSERT_OK(tracer->CollectData(&run_metadata));
EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 1);
EXPECT_EQ(run_metadata.step_stats().dev_stats(0).node_stats_size(), 6);
@ -112,6 +106,68 @@ TEST(HostTracerTest, CollectsTraceMeEvents) {
MakeNodeStats("incomplete", thread_id, "key1=value1,key2"))));
}
TEST(HostTracerTest, CollectsTraceMeEventsAsXSpace) {
uint32 thread_id = Env::Default()->GetCurrentThreadId();
string thread_name;
ASSERT_TRUE(Env::Default()->GetCurrentThreadName(&thread_name));
auto tracer = CreateHostTracer(ProfilerOptions());
TF_ASSERT_OK(tracer->Start());
{ TraceMe traceme("hello"); }
{ TraceMe traceme("world"); }
{ TraceMe traceme("contains#inside"); }
{ TraceMe traceme("good#key1=value1#"); }
{ TraceMe traceme("morning#key1=value1,key2=value2#"); }
{ TraceMe traceme("incomplete#key1=value1,key2#"); }
TF_ASSERT_OK(tracer->Stop());
XSpace space;
TF_ASSERT_OK(tracer->CollectData(&space));
ASSERT_EQ(space.planes_size(), 1);
const auto& plane = space.planes(0);
EXPECT_EQ(plane.name(), "Host Threads");
ASSERT_EQ(plane.lines_size(), 1);
ASSERT_EQ(plane.event_metadata_size(), 6);
ASSERT_EQ(plane.stat_metadata_size(), 2);
const auto& event_metadata = plane.event_metadata();
const auto& stat_metadata = plane.stat_metadata();
const auto& line = plane.lines(0);
EXPECT_EQ(line.id(), thread_id);
EXPECT_EQ(line.name(), thread_name);
ASSERT_EQ(line.events_size(), 6);
const auto& events = line.events();
EXPECT_EQ(events[0].metadata_id(), 1);
EXPECT_EQ(event_metadata.at(1).name(), "hello");
ASSERT_EQ(events[0].stats_size(), 0);
EXPECT_EQ(events[1].metadata_id(), 2);
EXPECT_EQ(event_metadata.at(2).name(), "world");
ASSERT_EQ(events[1].stats_size(), 0);
EXPECT_EQ(events[2].metadata_id(), 3);
EXPECT_EQ(event_metadata.at(3).name(), "contains#inside");
ASSERT_EQ(events[2].stats_size(), 0);
EXPECT_EQ(events[3].metadata_id(), 4);
EXPECT_EQ(event_metadata.at(4).name(), "good");
ASSERT_EQ(events[3].stats_size(), 1);
EXPECT_EQ(events[3].stats(0).metadata_id(), 1);
EXPECT_EQ(stat_metadata.at(1).name(), "key1");
EXPECT_EQ(events[3].stats(0).str_value(), "value1");
EXPECT_EQ(events[4].metadata_id(), 5);
EXPECT_EQ(event_metadata.at(5).name(), "morning");
ASSERT_EQ(events[4].stats_size(), 2);
EXPECT_EQ(events[4].stats(0).metadata_id(), 1);
EXPECT_EQ(events[4].stats(0).str_value(), "value1");
EXPECT_EQ(events[4].stats(1).metadata_id(), 2);
EXPECT_EQ(stat_metadata.at(2).name(), "key2");
EXPECT_EQ(events[4].stats(1).str_value(), "value2");
EXPECT_EQ(events[5].metadata_id(), 6);
EXPECT_EQ(event_metadata.at(6).name(), "incomplete");
ASSERT_EQ(events[5].stats_size(), 1);
EXPECT_EQ(events[5].stats(0).metadata_id(), 1);
EXPECT_EQ(events[5].stats(0).str_value(), "value1");
}
} // namespace
} // namespace cpu
} // namespace profiler

View File

@ -13,6 +13,7 @@ load(
)
load(
"//tensorflow/core/platform:default/build_config_root.bzl",
"if_static",
"tf_cuda_tests_tags",
)
@ -38,8 +39,10 @@ tf_cuda_library(
"//tensorflow/core/profiler/internal:parse_annotation",
"//tensorflow/core/profiler/internal:profiler_interface",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
],
] + if_static(
extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"],
otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"],
),
alwayslink = 1,
)

View File

@ -19,7 +19,10 @@ limitations under the License.
#include <vector>
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/platform.h"
#if !defined(IS_MOBILE_PLATFORM)
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#endif
#include "tensorflow/core/protobuf/config.pb.h"
namespace tensorflow {
@ -66,10 +69,12 @@ class ProfilerInterface {
// return empty data.
virtual Status CollectData(RunMetadata* run_metadata) = 0;
#if !defined(IS_MOBILE_PLATFORM)
// Saves collected profile data into XSpace.
// After this or the overload above are called once, subsequent calls might
// return empty data.
virtual Status CollectData(XSpace* space) = 0;
#endif
// Which device this ProfilerInterface is used for.
virtual DeviceType GetDeviceType() = 0;

View File

@ -16,16 +16,9 @@ tf_proto_library(
name = "xplane_proto",
srcs = ["xplane.proto"],
cc_api_version = 2,
make_default_target_header_only = True,
visibility = [":friends"],
)
# This is needed because of how tf_android_core_proto_sources parses proto paths.
exports_files(
srcs = ["xplane.proto"],
visibility = ["//tensorflow/core:__pkg__"],
)
tf_proto_library(
name = "op_metrics_proto",
srcs = ["op_metrics.proto"],

View File

@ -1,4 +1,5 @@
load("//tensorflow:tensorflow.bzl", "tf_cc_test")
load("//tensorflow/core/platform:default/build_config_root.bzl", "if_static")
package(
default_visibility = ["//tensorflow/core/profiler:internal"],
@ -84,10 +85,12 @@ cc_library(
visibility = [":friends"],
deps = [
":time_utils",
"//tensorflow/core:lib",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/strings",
],
"//tensorflow/core:lib",
] + if_static(
extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"],
otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"],
),
)
cc_library(
@ -97,9 +100,11 @@ cc_library(
deps = [
":time_utils",
":timespan",
"@com_google_absl//absl/strings",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/strings",
],
] + if_static(
extra_deps = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc"],
otherwise = ["//tensorflow/core/profiler/protobuf:xplane_proto_cc_headers_only"],
),
)