From 9d0c55d9f05b7420f86392cd61a16b7993420474 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Thu, 16 Apr 2020 15:32:00 -0700 Subject: [PATCH] Set is_eager for CPU TF op's OpMetrics. PiperOrigin-RevId: 306933924 Change-Id: I566f76c697e82bf67368f1e7699c7e71dc5a16bd --- .../convert/xplane_to_op_metrics_db.cc | 18 ++++++-- tensorflow/core/profiler/utils/BUILD | 1 + .../core/profiler/utils/group_events.cc | 35 +++++++++++--- tensorflow/core/profiler/utils/group_events.h | 10 +++- .../core/profiler/utils/group_events_test.cc | 46 +++++++++++++------ tensorflow/core/profiler/utils/op_utils.cc | 5 +- tensorflow/core/profiler/utils/op_utils.h | 3 +- .../core/profiler/utils/xplane_schema.cc | 1 + .../core/profiler/utils/xplane_schema.h | 1 + 9 files changed, 89 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc index d207cf558d1..09df59e44d9 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc @@ -48,6 +48,8 @@ struct TfActivity { TfActivityType activity_type; // Full TF op name and type of this activity (backed by XEvent::name). TfOp tf_op; + // Whether it is eagerly executed. + bool is_eager; }; // TF Op metrics stored as element in OpStack. @@ -83,8 +85,8 @@ void ProcessOneTfActivity(const TfActivity& activity, Timespan tf_op_span = PicoSpan(info->start_timestamp_ps, activity.timestamp_ps); tf_metrics_data->tf_metrics_db_builder.EnterOp( - activity.tf_op.name, activity.tf_op.type, tf_op_span.duration_ps(), - info->children_duration_ps); + activity.tf_op.name, activity.tf_op.type, activity.is_eager, + tf_op_span.duration_ps(), info->children_duration_ps); TfOpInfo* parent_info = tf_op_stack->Top(); if (parent_info != nullptr) { parent_info->children_duration_ps += tf_op_span.duration_ps(); @@ -135,9 +137,17 @@ void CollectTfActivities(const XLineVisitor& line, const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id()); if (tf_op != nullptr) { ++tf_op_id; + bool is_eager = false; + event.ForEachStat([&](const XStatVisitor& stat) { + if (stat.Type() == StatType::kIsEager) { + is_eager = stat.IntValue(); + } + }); Timespan span(event.TimestampPs(), event.DurationPs()); - tf_activities->push_back({span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op}); - tf_activities->push_back({span.end_ps(), tf_op_id, kTfOpEnd, *tf_op}); + tf_activities->push_back( + {span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager}); + tf_activities->push_back( + {span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager}); } }); } diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 76a19b7c271..6bfb7ad84a1 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -228,6 +228,7 @@ cc_library( hdrs = ["group_events.h"], visibility = [":friends"], deps = [ + ":tf_op_utils", ":tf_xplane_visitor", ":xplane_schema", ":xplane_utils", diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 7e3a17bdcbe..e68cb521463 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/str_join.h" #include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/profiler/utils/tf_op_utils.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" @@ -66,6 +67,12 @@ absl::optional GetKernelEventType(const XPlaneVisitor& visitor, return absl::nullopt; } +bool IsTfOpEvent(const XPlaneVisitor& visitor, const XEvent& event) { + TfOp tf_op = + ParseTfOpFullname(visitor.GetEventMetadata(event.metadata_id())->name()); + return tf_op.category == Category::kTensorFlow; +} + int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) { if (absl::optional event_type = visitor.GetEventType(event)) { return *event_type; @@ -76,6 +83,8 @@ int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) { // TODO(148346217): Make XPlaneVisitor support KernelLaunch and // KernelExecute event types. return *kernel_event_type; + } else if (IsTfOpEvent(visitor, event)) { + return HostEventType::kTfOpRun; } else { return HostEventType::kUnknownHostEventType; } @@ -183,6 +192,12 @@ void EventNode::SetIsEager(bool is_eager) { is_eager ? 1 : 0, event_); } +bool EventNode::IsEager() { + // It is eagerly executed if its trace context does not include the TF + // executor. + return FindParent(HostEventType::kExecutorStateProcess) == nullptr; +} + bool EventNode::IsNestedIn(EventNode* parent) { return parent && IsNested(GetEvent(), parent->GetEvent()); } @@ -290,16 +305,21 @@ void EventForest::CreateEventGroup( } } -void EventForest::MarkEagerlyExecutedKernels() { +void EventForest::MarkEagerlyExecutedGpuKernels() { auto kernel_execute_event_node_list = gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute); if (!kernel_execute_event_node_list) return; for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) { - // A kernel is eagerly executed if its trace context does not include the - // TF executor. - bool is_eager = kernel_execute_event_node->FindParent( - HostEventType::kExecutorStateProcess) == nullptr; - kernel_execute_event_node->SetIsEager(is_eager); + kernel_execute_event_node->SetIsEager(kernel_execute_event_node->IsEager()); + } +} + +void EventForest::MarkEagerlyExecutedCpuTfOps() { + auto tf_op_run_event_node_list = + gtl::FindOrNull(event_node_map_, HostEventType::kTfOpRun); + if (!tf_op_run_event_node_list) return; + for (auto& tf_op_run_event_node : *tf_op_run_event_node_list) { + tf_op_run_event_node->SetIsEager(tf_op_run_event_node->IsEager()); } } @@ -381,7 +401,8 @@ EventForest::EventForest( CreateVirtualEventsForAsyncExecutor(); } CreateEventGroup(root_event_types); - MarkEagerlyExecutedKernels(); + MarkEagerlyExecutedGpuKernels(); + MarkEagerlyExecutedCpuTfOps(); } std::vector CreateInterThreadConnectInfoList() { diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h index a8e3c1a393b..1140f2dab8d 100644 --- a/tensorflow/core/profiler/utils/group_events.h +++ b/tensorflow/core/profiler/utils/group_events.h @@ -73,6 +73,9 @@ class EventNode { void SetIsEager(bool is_eager); + // Returns true if this event is part of eagerly executed op. + bool IsEager(); + bool IsNestedIn(EventNode* parent); // Returns the closest parent of the given event type. @@ -125,8 +128,11 @@ class EventForest { void CreateEventGroup( const std::vector& root_event_types); - // Sets the is_eager stat to true for the eagerly executed kernel events. - void MarkEagerlyExecutedKernels(); + // Sets the is_eager stat to true for the eagerly executed GPU kernel events. + void MarkEagerlyExecutedGpuKernels(); + + // Sets the is_eager stat to true for the eagerly executed CPU TF op events. + void MarkEagerlyExecutedCpuTfOps(); // Create virtual events of HostEventType::kHostTrainingLoopIteration and // event nodes for them. A virtual event is created for each iteration of the diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index 13709d442af..76d9405ebda 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -145,7 +145,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) { TEST(GroupEventsTest, EagerOpTest) { XSpace space; - XPlaneBuilder host_plane_builder(space.add_planes()); + XPlane* host_plane = space.add_planes(); + XPlaneBuilder host_plane_builder(host_plane); host_plane_builder.SetName(kHostThreads); host_plane_builder.ReserveLines(1); @@ -153,8 +154,8 @@ TEST(GroupEventsTest, EagerOpTest) { // Eagerly scheduled GPU kernel. CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100, {{StatType::kCorrelationId, 100}}); - CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 110, 200, {{StatType::kStepId, 0}}); + // Eagerly executed CPU TF op. + CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {}); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); @@ -166,17 +167,24 @@ TEST(GroupEventsTest, EagerOpTest) { {{StatType::kCorrelationId, 100}}); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); - XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); - EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 2); - EXPECT_EQ(device_plane_visitor.GetStatType( - device_plane->lines(0).events(0).stats(1)), + XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); + const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(1); + EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1); + EXPECT_EQ(host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0)), StatType::kIsEager); - EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 1); + EXPECT_EQ(eager_cpu_tf_op.stats(0).int64_value(), 1); + XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); + const XEvent& eager_gpu_kernel = device_plane->lines(0).events(0); + EXPECT_EQ(eager_gpu_kernel.stats_size(), 2); + EXPECT_EQ(device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1)), + StatType::kIsEager); + EXPECT_EQ(eager_gpu_kernel.stats(1).int64_value(), 1); } TEST(GroupEventsTest, FunctionOpTest) { XSpace space; - XPlaneBuilder host_plane_builder(space.add_planes()); + XPlane* host_plane = space.add_planes(); + XPlaneBuilder host_plane_builder(host_plane); host_plane_builder.SetName(kHostThreads); host_plane_builder.ReserveLines(2); @@ -191,8 +199,10 @@ TEST(GroupEventsTest, FunctionOpTest) { HostEventType::kExecutorStateProcess, 20, 80, {{StatType::kStepId, 0}}); // GPU kernel scheduled inside tf.function. - CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, + CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30, {{StatType::kCorrelationId, 100}}); + // CPU TF op executed inside tf.function. + CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {}); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); @@ -204,12 +214,18 @@ TEST(GroupEventsTest, FunctionOpTest) { {{StatType::kCorrelationId, 100}}); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); - XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); - EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); - EXPECT_EQ(device_plane_visitor.GetStatType( - device_plane->lines(0).events(0).stats(2)), + XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); + const XEvent& cpu_tf_op = host_plane->lines(1).events(2); + EXPECT_EQ(cpu_tf_op.stats_size(), 2); + EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1)), StatType::kIsEager); - EXPECT_EQ(device_plane->lines(0).events(0).stats(2).int64_value(), 0); + EXPECT_EQ(cpu_tf_op.stats(1).int64_value(), 0); + XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); + const XEvent& gpu_kernel = device_plane->lines(0).events(0); + EXPECT_EQ(gpu_kernel.stats_size(), 3); + EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2)), + StatType::kIsEager); + EXPECT_EQ(gpu_kernel.stats(2).int64_value(), 0); } } // namespace diff --git a/tensorflow/core/profiler/utils/op_utils.cc b/tensorflow/core/profiler/utils/op_utils.cc index cc17564c6f8..cbc16f965d7 100644 --- a/tensorflow/core/profiler/utils/op_utils.cc +++ b/tensorflow/core/profiler/utils/op_utils.cc @@ -34,13 +34,14 @@ double GetCappedPerf(double perf, uint64 time, double rate_limit) { } // namespace void HostOpMetricsDbBuilder::EnterOp(absl::string_view name, - absl::string_view category, uint64 time_ps, - uint64 children_time_ps) { + absl::string_view category, bool is_eager, + uint64 time_ps, uint64 children_time_ps) { uint64 self_time_ps = time_ps - children_time_ps; DCHECK_GE(time_ps, self_time_ps); OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name); if (op_metrics->category().empty()) op_metrics->set_category(category.data(), category.size()); + op_metrics->set_is_eager(is_eager); op_metrics->set_occurrences(op_metrics->occurrences() + 1); op_metrics->set_time_ps(op_metrics->time_ps() + time_ps); op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps); diff --git a/tensorflow/core/profiler/utils/op_utils.h b/tensorflow/core/profiler/utils/op_utils.h index aeb973705e9..8aaa0f4f5c2 100644 --- a/tensorflow/core/profiler/utils/op_utils.h +++ b/tensorflow/core/profiler/utils/op_utils.h @@ -35,12 +35,13 @@ class HostOpMetricsDbBuilder : public OpMetricsDbBuilder { // observed on a trace, where: // name = the OP name. // category = the OP category. + // is_eager = whether this OP is eagerly executed. // time_ps = the total execution time of the OP in picoseconds, including // the execution time of its children. // children_time_ps = the execution time of the children of this OP in // picoseconds void EnterOp(absl::string_view name, absl::string_view category, - uint64 time_ps, uint64 children_time_ps); + bool is_eager, uint64 time_ps, uint64 children_time_ps); // Updates total_host_infeed_enq_duration_ps_ and // total_host_infeed_enq_duration_ps_. diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index e93bea09b91..a040f9382c6 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -53,6 +53,7 @@ const HostEventTypeMap& GetHostEventTypeMap() { {"SessionRun", kSessionRun}, {"FunctionRun", kFunctionRun}, {"RunGraph", kRunGraph}, + {"TfOpRun", kTfOpRun}, {"EagerKernelExecute", kEagerKernelExecute}, {"ExecutorState::Process", kExecutorStateProcess}, {"ExecutorDoneCallback", kExecutorDoneCallback}, diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index 2a8ba268825..3a741716851 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -52,6 +52,7 @@ enum HostEventType { kSessionRun, kFunctionRun, kRunGraph, + kTfOpRun, kEagerKernelExecute, kExecutorStateProcess, kExecutorDoneCallback,