Set is_eager for CPU TF op's OpMetrics.

PiperOrigin-RevId: 306933924
Change-Id: I566f76c697e82bf67368f1e7699c7e71dc5a16bd
This commit is contained in:
Jiho Choi 2020-04-16 15:32:00 -07:00 committed by TensorFlower Gardener
parent 50fe4f8b12
commit 9d0c55d9f0
9 changed files with 89 additions and 31 deletions

View File

@ -48,6 +48,8 @@ struct TfActivity {
TfActivityType activity_type; TfActivityType activity_type;
// Full TF op name and type of this activity (backed by XEvent::name). // Full TF op name and type of this activity (backed by XEvent::name).
TfOp tf_op; TfOp tf_op;
// Whether it is eagerly executed.
bool is_eager;
}; };
// TF Op metrics stored as element in OpStack. // TF Op metrics stored as element in OpStack.
@ -83,8 +85,8 @@ void ProcessOneTfActivity(const TfActivity& activity,
Timespan tf_op_span = Timespan tf_op_span =
PicoSpan(info->start_timestamp_ps, activity.timestamp_ps); PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
tf_metrics_data->tf_metrics_db_builder.EnterOp( tf_metrics_data->tf_metrics_db_builder.EnterOp(
activity.tf_op.name, activity.tf_op.type, tf_op_span.duration_ps(), activity.tf_op.name, activity.tf_op.type, activity.is_eager,
info->children_duration_ps); tf_op_span.duration_ps(), info->children_duration_ps);
TfOpInfo* parent_info = tf_op_stack->Top(); TfOpInfo* parent_info = tf_op_stack->Top();
if (parent_info != nullptr) { if (parent_info != nullptr) {
parent_info->children_duration_ps += tf_op_span.duration_ps(); parent_info->children_duration_ps += tf_op_span.duration_ps();
@ -135,9 +137,17 @@ void CollectTfActivities(const XLineVisitor& line,
const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id()); const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
if (tf_op != nullptr) { if (tf_op != nullptr) {
++tf_op_id; ++tf_op_id;
bool is_eager = false;
event.ForEachStat([&](const XStatVisitor& stat) {
if (stat.Type() == StatType::kIsEager) {
is_eager = stat.IntValue();
}
});
Timespan span(event.TimestampPs(), event.DurationPs()); Timespan span(event.TimestampPs(), event.DurationPs());
tf_activities->push_back({span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op}); tf_activities->push_back(
tf_activities->push_back({span.end_ps(), tf_op_id, kTfOpEnd, *tf_op}); {span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager});
tf_activities->push_back(
{span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager});
} }
}); });
} }

View File

@ -228,6 +228,7 @@ cc_library(
hdrs = ["group_events.h"], hdrs = ["group_events.h"],
visibility = [":friends"], visibility = [":friends"],
deps = [ deps = [
":tf_op_utils",
":tf_xplane_visitor", ":tf_xplane_visitor",
":xplane_schema", ":xplane_schema",
":xplane_utils", ":xplane_utils",

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "absl/strings/str_join.h" #include "absl/strings/str_join.h"
#include "absl/types/optional.h" #include "absl/types/optional.h"
#include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/profiler/utils/xplane_utils.h"
@ -66,6 +67,12 @@ absl::optional<int64> GetKernelEventType(const XPlaneVisitor& visitor,
return absl::nullopt; return absl::nullopt;
} }
bool IsTfOpEvent(const XPlaneVisitor& visitor, const XEvent& event) {
TfOp tf_op =
ParseTfOpFullname(visitor.GetEventMetadata(event.metadata_id())->name());
return tf_op.category == Category::kTensorFlow;
}
int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) { int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
if (absl::optional<int64> event_type = visitor.GetEventType(event)) { if (absl::optional<int64> event_type = visitor.GetEventType(event)) {
return *event_type; return *event_type;
@ -76,6 +83,8 @@ int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
// TODO(148346217): Make XPlaneVisitor support KernelLaunch and // TODO(148346217): Make XPlaneVisitor support KernelLaunch and
// KernelExecute event types. // KernelExecute event types.
return *kernel_event_type; return *kernel_event_type;
} else if (IsTfOpEvent(visitor, event)) {
return HostEventType::kTfOpRun;
} else { } else {
return HostEventType::kUnknownHostEventType; return HostEventType::kUnknownHostEventType;
} }
@ -183,6 +192,12 @@ void EventNode::SetIsEager(bool is_eager) {
is_eager ? 1 : 0, event_); is_eager ? 1 : 0, event_);
} }
bool EventNode::IsEager() {
// It is eagerly executed if its trace context does not include the TF
// executor.
return FindParent(HostEventType::kExecutorStateProcess) == nullptr;
}
bool EventNode::IsNestedIn(EventNode* parent) { bool EventNode::IsNestedIn(EventNode* parent) {
return parent && IsNested(GetEvent(), parent->GetEvent()); return parent && IsNested(GetEvent(), parent->GetEvent());
} }
@ -290,16 +305,21 @@ void EventForest::CreateEventGroup(
} }
} }
void EventForest::MarkEagerlyExecutedKernels() { void EventForest::MarkEagerlyExecutedGpuKernels() {
auto kernel_execute_event_node_list = auto kernel_execute_event_node_list =
gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute); gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute);
if (!kernel_execute_event_node_list) return; if (!kernel_execute_event_node_list) return;
for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) { for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) {
// A kernel is eagerly executed if its trace context does not include the kernel_execute_event_node->SetIsEager(kernel_execute_event_node->IsEager());
// TF executor. }
bool is_eager = kernel_execute_event_node->FindParent( }
HostEventType::kExecutorStateProcess) == nullptr;
kernel_execute_event_node->SetIsEager(is_eager); void EventForest::MarkEagerlyExecutedCpuTfOps() {
auto tf_op_run_event_node_list =
gtl::FindOrNull(event_node_map_, HostEventType::kTfOpRun);
if (!tf_op_run_event_node_list) return;
for (auto& tf_op_run_event_node : *tf_op_run_event_node_list) {
tf_op_run_event_node->SetIsEager(tf_op_run_event_node->IsEager());
} }
} }
@ -381,7 +401,8 @@ EventForest::EventForest(
CreateVirtualEventsForAsyncExecutor(); CreateVirtualEventsForAsyncExecutor();
} }
CreateEventGroup(root_event_types); CreateEventGroup(root_event_types);
MarkEagerlyExecutedKernels(); MarkEagerlyExecutedGpuKernels();
MarkEagerlyExecutedCpuTfOps();
} }
std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() { std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {

View File

@ -73,6 +73,9 @@ class EventNode {
void SetIsEager(bool is_eager); void SetIsEager(bool is_eager);
// Returns true if this event is part of eagerly executed op.
bool IsEager();
bool IsNestedIn(EventNode* parent); bool IsNestedIn(EventNode* parent);
// Returns the closest parent of the given event type. // Returns the closest parent of the given event type.
@ -125,8 +128,11 @@ class EventForest {
void CreateEventGroup( void CreateEventGroup(
const std::vector<int64 /*EventType*/>& root_event_types); const std::vector<int64 /*EventType*/>& root_event_types);
// Sets the is_eager stat to true for the eagerly executed kernel events. // Sets the is_eager stat to true for the eagerly executed GPU kernel events.
void MarkEagerlyExecutedKernels(); void MarkEagerlyExecutedGpuKernels();
// Sets the is_eager stat to true for the eagerly executed CPU TF op events.
void MarkEagerlyExecutedCpuTfOps();
// Create virtual events of HostEventType::kHostTrainingLoopIteration and // Create virtual events of HostEventType::kHostTrainingLoopIteration and
// event nodes for them. A virtual event is created for each iteration of the // event nodes for them. A virtual event is created for each iteration of the

View File

@ -145,7 +145,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) {
TEST(GroupEventsTest, EagerOpTest) { TEST(GroupEventsTest, EagerOpTest) {
XSpace space; XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes()); XPlane* host_plane = space.add_planes();
XPlaneBuilder host_plane_builder(host_plane);
host_plane_builder.SetName(kHostThreads); host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(1); host_plane_builder.ReserveLines(1);
@ -153,8 +154,8 @@ TEST(GroupEventsTest, EagerOpTest) {
// Eagerly scheduled GPU kernel. // Eagerly scheduled GPU kernel.
CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100, CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100,
{{StatType::kCorrelationId, 100}}); {{StatType::kCorrelationId, 100}});
CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, // Eagerly executed CPU TF op.
110, 200, {{StatType::kStepId, 0}}); CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {});
XPlane* device_plane = space.add_planes(); XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane); XPlaneBuilder device_plane_builder(device_plane);
@ -166,17 +167,24 @@ TEST(GroupEventsTest, EagerOpTest) {
{{StatType::kCorrelationId, 100}}); {{StatType::kCorrelationId, 100}});
GroupTfEvents(&space, /*event_group_name_map=*/nullptr); GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 2); const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(1);
EXPECT_EQ(device_plane_visitor.GetStatType( EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1);
device_plane->lines(0).events(0).stats(1)), EXPECT_EQ(host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0)),
StatType::kIsEager); StatType::kIsEager);
EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 1); EXPECT_EQ(eager_cpu_tf_op.stats(0).int64_value(), 1);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
const XEvent& eager_gpu_kernel = device_plane->lines(0).events(0);
EXPECT_EQ(eager_gpu_kernel.stats_size(), 2);
EXPECT_EQ(device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1)),
StatType::kIsEager);
EXPECT_EQ(eager_gpu_kernel.stats(1).int64_value(), 1);
} }
TEST(GroupEventsTest, FunctionOpTest) { TEST(GroupEventsTest, FunctionOpTest) {
XSpace space; XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes()); XPlane* host_plane = space.add_planes();
XPlaneBuilder host_plane_builder(host_plane);
host_plane_builder.SetName(kHostThreads); host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(2); host_plane_builder.ReserveLines(2);
@ -191,8 +199,10 @@ TEST(GroupEventsTest, FunctionOpTest) {
HostEventType::kExecutorStateProcess, 20, 80, HostEventType::kExecutorStateProcess, 20, 80,
{{StatType::kStepId, 0}}); {{StatType::kStepId, 0}});
// GPU kernel scheduled inside tf.function. // GPU kernel scheduled inside tf.function.
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30,
{{StatType::kCorrelationId, 100}}); {{StatType::kCorrelationId, 100}});
// CPU TF op executed inside tf.function.
CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {});
XPlane* device_plane = space.add_planes(); XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane); XPlaneBuilder device_plane_builder(device_plane);
@ -204,12 +214,18 @@ TEST(GroupEventsTest, FunctionOpTest) {
{{StatType::kCorrelationId, 100}}); {{StatType::kCorrelationId, 100}});
GroupTfEvents(&space, /*event_group_name_map=*/nullptr); GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); const XEvent& cpu_tf_op = host_plane->lines(1).events(2);
EXPECT_EQ(device_plane_visitor.GetStatType( EXPECT_EQ(cpu_tf_op.stats_size(), 2);
device_plane->lines(0).events(0).stats(2)), EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1)),
StatType::kIsEager); StatType::kIsEager);
EXPECT_EQ(device_plane->lines(0).events(0).stats(2).int64_value(), 0); EXPECT_EQ(cpu_tf_op.stats(1).int64_value(), 0);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
const XEvent& gpu_kernel = device_plane->lines(0).events(0);
EXPECT_EQ(gpu_kernel.stats_size(), 3);
EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2)),
StatType::kIsEager);
EXPECT_EQ(gpu_kernel.stats(2).int64_value(), 0);
} }
} // namespace } // namespace

View File

@ -34,13 +34,14 @@ double GetCappedPerf(double perf, uint64 time, double rate_limit) {
} // namespace } // namespace
void HostOpMetricsDbBuilder::EnterOp(absl::string_view name, void HostOpMetricsDbBuilder::EnterOp(absl::string_view name,
absl::string_view category, uint64 time_ps, absl::string_view category, bool is_eager,
uint64 children_time_ps) { uint64 time_ps, uint64 children_time_ps) {
uint64 self_time_ps = time_ps - children_time_ps; uint64 self_time_ps = time_ps - children_time_ps;
DCHECK_GE(time_ps, self_time_ps); DCHECK_GE(time_ps, self_time_ps);
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name); OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name);
if (op_metrics->category().empty()) if (op_metrics->category().empty())
op_metrics->set_category(category.data(), category.size()); op_metrics->set_category(category.data(), category.size());
op_metrics->set_is_eager(is_eager);
op_metrics->set_occurrences(op_metrics->occurrences() + 1); op_metrics->set_occurrences(op_metrics->occurrences() + 1);
op_metrics->set_time_ps(op_metrics->time_ps() + time_ps); op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps); op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps);

View File

@ -35,12 +35,13 @@ class HostOpMetricsDbBuilder : public OpMetricsDbBuilder {
// observed on a trace, where: // observed on a trace, where:
// name = the OP name. // name = the OP name.
// category = the OP category. // category = the OP category.
// is_eager = whether this OP is eagerly executed.
// time_ps = the total execution time of the OP in picoseconds, including // time_ps = the total execution time of the OP in picoseconds, including
// the execution time of its children. // the execution time of its children.
// children_time_ps = the execution time of the children of this OP in // children_time_ps = the execution time of the children of this OP in
// picoseconds // picoseconds
void EnterOp(absl::string_view name, absl::string_view category, void EnterOp(absl::string_view name, absl::string_view category,
uint64 time_ps, uint64 children_time_ps); bool is_eager, uint64 time_ps, uint64 children_time_ps);
// Updates total_host_infeed_enq_duration_ps_ and // Updates total_host_infeed_enq_duration_ps_ and
// total_host_infeed_enq_duration_ps_. // total_host_infeed_enq_duration_ps_.

View File

@ -53,6 +53,7 @@ const HostEventTypeMap& GetHostEventTypeMap() {
{"SessionRun", kSessionRun}, {"SessionRun", kSessionRun},
{"FunctionRun", kFunctionRun}, {"FunctionRun", kFunctionRun},
{"RunGraph", kRunGraph}, {"RunGraph", kRunGraph},
{"TfOpRun", kTfOpRun},
{"EagerKernelExecute", kEagerKernelExecute}, {"EagerKernelExecute", kEagerKernelExecute},
{"ExecutorState::Process", kExecutorStateProcess}, {"ExecutorState::Process", kExecutorStateProcess},
{"ExecutorDoneCallback", kExecutorDoneCallback}, {"ExecutorDoneCallback", kExecutorDoneCallback},

View File

@ -52,6 +52,7 @@ enum HostEventType {
kSessionRun, kSessionRun,
kFunctionRun, kFunctionRun,
kRunGraph, kRunGraph,
kTfOpRun,
kEagerKernelExecute, kEagerKernelExecute,
kExecutorStateProcess, kExecutorStateProcess,
kExecutorDoneCallback, kExecutorDoneCallback,