Set is_eager for CPU TF op's OpMetrics.

PiperOrigin-RevId: 306933924
Change-Id: I566f76c697e82bf67368f1e7699c7e71dc5a16bd
This commit is contained in:
Jiho Choi 2020-04-16 15:32:00 -07:00 committed by TensorFlower Gardener
parent 50fe4f8b12
commit 9d0c55d9f0
9 changed files with 89 additions and 31 deletions

View File

@ -48,6 +48,8 @@ struct TfActivity {
TfActivityType activity_type;
// Full TF op name and type of this activity (backed by XEvent::name).
TfOp tf_op;
// Whether it is eagerly executed.
bool is_eager;
};
// TF Op metrics stored as element in OpStack.
@ -83,8 +85,8 @@ void ProcessOneTfActivity(const TfActivity& activity,
Timespan tf_op_span =
PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
tf_metrics_data->tf_metrics_db_builder.EnterOp(
activity.tf_op.name, activity.tf_op.type, tf_op_span.duration_ps(),
info->children_duration_ps);
activity.tf_op.name, activity.tf_op.type, activity.is_eager,
tf_op_span.duration_ps(), info->children_duration_ps);
TfOpInfo* parent_info = tf_op_stack->Top();
if (parent_info != nullptr) {
parent_info->children_duration_ps += tf_op_span.duration_ps();
@ -135,9 +137,17 @@ void CollectTfActivities(const XLineVisitor& line,
const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
if (tf_op != nullptr) {
++tf_op_id;
bool is_eager = false;
event.ForEachStat([&](const XStatVisitor& stat) {
if (stat.Type() == StatType::kIsEager) {
is_eager = stat.IntValue();
}
});
Timespan span(event.TimestampPs(), event.DurationPs());
tf_activities->push_back({span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op});
tf_activities->push_back({span.end_ps(), tf_op_id, kTfOpEnd, *tf_op});
tf_activities->push_back(
{span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager});
tf_activities->push_back(
{span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager});
}
});
}

View File

@ -228,6 +228,7 @@ cc_library(
hdrs = ["group_events.h"],
visibility = [":friends"],
deps = [
":tf_op_utils",
":tf_xplane_visitor",
":xplane_schema",
":xplane_utils",

View File

@ -20,6 +20,7 @@ limitations under the License.
#include "absl/strings/str_join.h"
#include "absl/types/optional.h"
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
@ -66,6 +67,12 @@ absl::optional<int64> GetKernelEventType(const XPlaneVisitor& visitor,
return absl::nullopt;
}
bool IsTfOpEvent(const XPlaneVisitor& visitor, const XEvent& event) {
TfOp tf_op =
ParseTfOpFullname(visitor.GetEventMetadata(event.metadata_id())->name());
return tf_op.category == Category::kTensorFlow;
}
int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
if (absl::optional<int64> event_type = visitor.GetEventType(event)) {
return *event_type;
@ -76,6 +83,8 @@ int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
// TODO(148346217): Make XPlaneVisitor support KernelLaunch and
// KernelExecute event types.
return *kernel_event_type;
} else if (IsTfOpEvent(visitor, event)) {
return HostEventType::kTfOpRun;
} else {
return HostEventType::kUnknownHostEventType;
}
@ -183,6 +192,12 @@ void EventNode::SetIsEager(bool is_eager) {
is_eager ? 1 : 0, event_);
}
bool EventNode::IsEager() {
// It is eagerly executed if its trace context does not include the TF
// executor.
return FindParent(HostEventType::kExecutorStateProcess) == nullptr;
}
bool EventNode::IsNestedIn(EventNode* parent) {
return parent && IsNested(GetEvent(), parent->GetEvent());
}
@ -290,16 +305,21 @@ void EventForest::CreateEventGroup(
}
}
void EventForest::MarkEagerlyExecutedKernels() {
void EventForest::MarkEagerlyExecutedGpuKernels() {
auto kernel_execute_event_node_list =
gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute);
if (!kernel_execute_event_node_list) return;
for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) {
// A kernel is eagerly executed if its trace context does not include the
// TF executor.
bool is_eager = kernel_execute_event_node->FindParent(
HostEventType::kExecutorStateProcess) == nullptr;
kernel_execute_event_node->SetIsEager(is_eager);
kernel_execute_event_node->SetIsEager(kernel_execute_event_node->IsEager());
}
}
void EventForest::MarkEagerlyExecutedCpuTfOps() {
auto tf_op_run_event_node_list =
gtl::FindOrNull(event_node_map_, HostEventType::kTfOpRun);
if (!tf_op_run_event_node_list) return;
for (auto& tf_op_run_event_node : *tf_op_run_event_node_list) {
tf_op_run_event_node->SetIsEager(tf_op_run_event_node->IsEager());
}
}
@ -381,7 +401,8 @@ EventForest::EventForest(
CreateVirtualEventsForAsyncExecutor();
}
CreateEventGroup(root_event_types);
MarkEagerlyExecutedKernels();
MarkEagerlyExecutedGpuKernels();
MarkEagerlyExecutedCpuTfOps();
}
std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {

View File

@ -73,6 +73,9 @@ class EventNode {
void SetIsEager(bool is_eager);
// Returns true if this event is part of eagerly executed op.
bool IsEager();
bool IsNestedIn(EventNode* parent);
// Returns the closest parent of the given event type.
@ -125,8 +128,11 @@ class EventForest {
void CreateEventGroup(
const std::vector<int64 /*EventType*/>& root_event_types);
// Sets the is_eager stat to true for the eagerly executed kernel events.
void MarkEagerlyExecutedKernels();
// Sets the is_eager stat to true for the eagerly executed GPU kernel events.
void MarkEagerlyExecutedGpuKernels();
// Sets the is_eager stat to true for the eagerly executed CPU TF op events.
void MarkEagerlyExecutedCpuTfOps();
// Create virtual events of HostEventType::kHostTrainingLoopIteration and
// event nodes for them. A virtual event is created for each iteration of the

View File

@ -145,7 +145,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) {
TEST(GroupEventsTest, EagerOpTest) {
XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes());
XPlane* host_plane = space.add_planes();
XPlaneBuilder host_plane_builder(host_plane);
host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(1);
@ -153,8 +154,8 @@ TEST(GroupEventsTest, EagerOpTest) {
// Eagerly scheduled GPU kernel.
CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100,
{{StatType::kCorrelationId, 100}});
CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun,
110, 200, {{StatType::kStepId, 0}});
// Eagerly executed CPU TF op.
CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {});
XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane);
@ -166,17 +167,24 @@ TEST(GroupEventsTest, EagerOpTest) {
{{StatType::kCorrelationId, 100}});
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 2);
EXPECT_EQ(device_plane_visitor.GetStatType(
device_plane->lines(0).events(0).stats(1)),
XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(1);
EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1);
EXPECT_EQ(host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0)),
StatType::kIsEager);
EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 1);
EXPECT_EQ(eager_cpu_tf_op.stats(0).int64_value(), 1);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
const XEvent& eager_gpu_kernel = device_plane->lines(0).events(0);
EXPECT_EQ(eager_gpu_kernel.stats_size(), 2);
EXPECT_EQ(device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1)),
StatType::kIsEager);
EXPECT_EQ(eager_gpu_kernel.stats(1).int64_value(), 1);
}
TEST(GroupEventsTest, FunctionOpTest) {
XSpace space;
XPlaneBuilder host_plane_builder(space.add_planes());
XPlane* host_plane = space.add_planes();
XPlaneBuilder host_plane_builder(host_plane);
host_plane_builder.SetName(kHostThreads);
host_plane_builder.ReserveLines(2);
@ -191,8 +199,10 @@ TEST(GroupEventsTest, FunctionOpTest) {
HostEventType::kExecutorStateProcess, 20, 80,
{{StatType::kStepId, 0}});
// GPU kernel scheduled inside tf.function.
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70,
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30,
{{StatType::kCorrelationId, 100}});
// CPU TF op executed inside tf.function.
CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {});
XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane);
@ -204,12 +214,18 @@ TEST(GroupEventsTest, FunctionOpTest) {
{{StatType::kCorrelationId, 100}});
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3);
EXPECT_EQ(device_plane_visitor.GetStatType(
device_plane->lines(0).events(0).stats(2)),
XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
const XEvent& cpu_tf_op = host_plane->lines(1).events(2);
EXPECT_EQ(cpu_tf_op.stats_size(), 2);
EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1)),
StatType::kIsEager);
EXPECT_EQ(device_plane->lines(0).events(0).stats(2).int64_value(), 0);
EXPECT_EQ(cpu_tf_op.stats(1).int64_value(), 0);
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
const XEvent& gpu_kernel = device_plane->lines(0).events(0);
EXPECT_EQ(gpu_kernel.stats_size(), 3);
EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2)),
StatType::kIsEager);
EXPECT_EQ(gpu_kernel.stats(2).int64_value(), 0);
}
} // namespace

View File

@ -34,13 +34,14 @@ double GetCappedPerf(double perf, uint64 time, double rate_limit) {
} // namespace
void HostOpMetricsDbBuilder::EnterOp(absl::string_view name,
absl::string_view category, uint64 time_ps,
uint64 children_time_ps) {
absl::string_view category, bool is_eager,
uint64 time_ps, uint64 children_time_ps) {
uint64 self_time_ps = time_ps - children_time_ps;
DCHECK_GE(time_ps, self_time_ps);
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name);
if (op_metrics->category().empty())
op_metrics->set_category(category.data(), category.size());
op_metrics->set_is_eager(is_eager);
op_metrics->set_occurrences(op_metrics->occurrences() + 1);
op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps);

View File

@ -35,12 +35,13 @@ class HostOpMetricsDbBuilder : public OpMetricsDbBuilder {
// observed on a trace, where:
// name = the OP name.
// category = the OP category.
// is_eager = whether this OP is eagerly executed.
// time_ps = the total execution time of the OP in picoseconds, including
// the execution time of its children.
// children_time_ps = the execution time of the children of this OP in
// picoseconds
void EnterOp(absl::string_view name, absl::string_view category,
uint64 time_ps, uint64 children_time_ps);
bool is_eager, uint64 time_ps, uint64 children_time_ps);
// Updates total_host_infeed_enq_duration_ps_ and
// total_host_infeed_enq_duration_ps_.

View File

@ -53,6 +53,7 @@ const HostEventTypeMap& GetHostEventTypeMap() {
{"SessionRun", kSessionRun},
{"FunctionRun", kFunctionRun},
{"RunGraph", kRunGraph},
{"TfOpRun", kTfOpRun},
{"EagerKernelExecute", kEagerKernelExecute},
{"ExecutorState::Process", kExecutorStateProcess},
{"ExecutorDoneCallback", kExecutorDoneCallback},

View File

@ -52,6 +52,7 @@ enum HostEventType {
kSessionRun,
kFunctionRun,
kRunGraph,
kTfOpRun,
kEagerKernelExecute,
kExecutorStateProcess,
kExecutorDoneCallback,