Set is_eager for CPU TF op's OpMetrics.
PiperOrigin-RevId: 306933924 Change-Id: I566f76c697e82bf67368f1e7699c7e71dc5a16bd
This commit is contained in:
parent
50fe4f8b12
commit
9d0c55d9f0
@ -48,6 +48,8 @@ struct TfActivity {
|
|||||||
TfActivityType activity_type;
|
TfActivityType activity_type;
|
||||||
// Full TF op name and type of this activity (backed by XEvent::name).
|
// Full TF op name and type of this activity (backed by XEvent::name).
|
||||||
TfOp tf_op;
|
TfOp tf_op;
|
||||||
|
// Whether it is eagerly executed.
|
||||||
|
bool is_eager;
|
||||||
};
|
};
|
||||||
|
|
||||||
// TF Op metrics stored as element in OpStack.
|
// TF Op metrics stored as element in OpStack.
|
||||||
@ -83,8 +85,8 @@ void ProcessOneTfActivity(const TfActivity& activity,
|
|||||||
Timespan tf_op_span =
|
Timespan tf_op_span =
|
||||||
PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
|
PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
|
||||||
tf_metrics_data->tf_metrics_db_builder.EnterOp(
|
tf_metrics_data->tf_metrics_db_builder.EnterOp(
|
||||||
activity.tf_op.name, activity.tf_op.type, tf_op_span.duration_ps(),
|
activity.tf_op.name, activity.tf_op.type, activity.is_eager,
|
||||||
info->children_duration_ps);
|
tf_op_span.duration_ps(), info->children_duration_ps);
|
||||||
TfOpInfo* parent_info = tf_op_stack->Top();
|
TfOpInfo* parent_info = tf_op_stack->Top();
|
||||||
if (parent_info != nullptr) {
|
if (parent_info != nullptr) {
|
||||||
parent_info->children_duration_ps += tf_op_span.duration_ps();
|
parent_info->children_duration_ps += tf_op_span.duration_ps();
|
||||||
@ -135,9 +137,17 @@ void CollectTfActivities(const XLineVisitor& line,
|
|||||||
const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
|
const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
|
||||||
if (tf_op != nullptr) {
|
if (tf_op != nullptr) {
|
||||||
++tf_op_id;
|
++tf_op_id;
|
||||||
|
bool is_eager = false;
|
||||||
|
event.ForEachStat([&](const XStatVisitor& stat) {
|
||||||
|
if (stat.Type() == StatType::kIsEager) {
|
||||||
|
is_eager = stat.IntValue();
|
||||||
|
}
|
||||||
|
});
|
||||||
Timespan span(event.TimestampPs(), event.DurationPs());
|
Timespan span(event.TimestampPs(), event.DurationPs());
|
||||||
tf_activities->push_back({span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op});
|
tf_activities->push_back(
|
||||||
tf_activities->push_back({span.end_ps(), tf_op_id, kTfOpEnd, *tf_op});
|
{span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager});
|
||||||
|
tf_activities->push_back(
|
||||||
|
{span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -228,6 +228,7 @@ cc_library(
|
|||||||
hdrs = ["group_events.h"],
|
hdrs = ["group_events.h"],
|
||||||
visibility = [":friends"],
|
visibility = [":friends"],
|
||||||
deps = [
|
deps = [
|
||||||
|
":tf_op_utils",
|
||||||
":tf_xplane_visitor",
|
":tf_xplane_visitor",
|
||||||
":xplane_schema",
|
":xplane_schema",
|
||||||
":xplane_utils",
|
":xplane_utils",
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
#include "absl/strings/str_join.h"
|
#include "absl/strings/str_join.h"
|
||||||
#include "absl/types/optional.h"
|
#include "absl/types/optional.h"
|
||||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||||
|
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||||
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
||||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||||
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
||||||
@ -66,6 +67,12 @@ absl::optional<int64> GetKernelEventType(const XPlaneVisitor& visitor,
|
|||||||
return absl::nullopt;
|
return absl::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsTfOpEvent(const XPlaneVisitor& visitor, const XEvent& event) {
|
||||||
|
TfOp tf_op =
|
||||||
|
ParseTfOpFullname(visitor.GetEventMetadata(event.metadata_id())->name());
|
||||||
|
return tf_op.category == Category::kTensorFlow;
|
||||||
|
}
|
||||||
|
|
||||||
int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
|
int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
|
||||||
if (absl::optional<int64> event_type = visitor.GetEventType(event)) {
|
if (absl::optional<int64> event_type = visitor.GetEventType(event)) {
|
||||||
return *event_type;
|
return *event_type;
|
||||||
@ -76,6 +83,8 @@ int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
|
|||||||
// TODO(148346217): Make XPlaneVisitor support KernelLaunch and
|
// TODO(148346217): Make XPlaneVisitor support KernelLaunch and
|
||||||
// KernelExecute event types.
|
// KernelExecute event types.
|
||||||
return *kernel_event_type;
|
return *kernel_event_type;
|
||||||
|
} else if (IsTfOpEvent(visitor, event)) {
|
||||||
|
return HostEventType::kTfOpRun;
|
||||||
} else {
|
} else {
|
||||||
return HostEventType::kUnknownHostEventType;
|
return HostEventType::kUnknownHostEventType;
|
||||||
}
|
}
|
||||||
@ -183,6 +192,12 @@ void EventNode::SetIsEager(bool is_eager) {
|
|||||||
is_eager ? 1 : 0, event_);
|
is_eager ? 1 : 0, event_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool EventNode::IsEager() {
|
||||||
|
// It is eagerly executed if its trace context does not include the TF
|
||||||
|
// executor.
|
||||||
|
return FindParent(HostEventType::kExecutorStateProcess) == nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
bool EventNode::IsNestedIn(EventNode* parent) {
|
bool EventNode::IsNestedIn(EventNode* parent) {
|
||||||
return parent && IsNested(GetEvent(), parent->GetEvent());
|
return parent && IsNested(GetEvent(), parent->GetEvent());
|
||||||
}
|
}
|
||||||
@ -290,16 +305,21 @@ void EventForest::CreateEventGroup(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EventForest::MarkEagerlyExecutedKernels() {
|
void EventForest::MarkEagerlyExecutedGpuKernels() {
|
||||||
auto kernel_execute_event_node_list =
|
auto kernel_execute_event_node_list =
|
||||||
gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute);
|
gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute);
|
||||||
if (!kernel_execute_event_node_list) return;
|
if (!kernel_execute_event_node_list) return;
|
||||||
for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) {
|
for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) {
|
||||||
// A kernel is eagerly executed if its trace context does not include the
|
kernel_execute_event_node->SetIsEager(kernel_execute_event_node->IsEager());
|
||||||
// TF executor.
|
}
|
||||||
bool is_eager = kernel_execute_event_node->FindParent(
|
}
|
||||||
HostEventType::kExecutorStateProcess) == nullptr;
|
|
||||||
kernel_execute_event_node->SetIsEager(is_eager);
|
void EventForest::MarkEagerlyExecutedCpuTfOps() {
|
||||||
|
auto tf_op_run_event_node_list =
|
||||||
|
gtl::FindOrNull(event_node_map_, HostEventType::kTfOpRun);
|
||||||
|
if (!tf_op_run_event_node_list) return;
|
||||||
|
for (auto& tf_op_run_event_node : *tf_op_run_event_node_list) {
|
||||||
|
tf_op_run_event_node->SetIsEager(tf_op_run_event_node->IsEager());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,7 +401,8 @@ EventForest::EventForest(
|
|||||||
CreateVirtualEventsForAsyncExecutor();
|
CreateVirtualEventsForAsyncExecutor();
|
||||||
}
|
}
|
||||||
CreateEventGroup(root_event_types);
|
CreateEventGroup(root_event_types);
|
||||||
MarkEagerlyExecutedKernels();
|
MarkEagerlyExecutedGpuKernels();
|
||||||
|
MarkEagerlyExecutedCpuTfOps();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {
|
std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {
|
||||||
|
@ -73,6 +73,9 @@ class EventNode {
|
|||||||
|
|
||||||
void SetIsEager(bool is_eager);
|
void SetIsEager(bool is_eager);
|
||||||
|
|
||||||
|
// Returns true if this event is part of eagerly executed op.
|
||||||
|
bool IsEager();
|
||||||
|
|
||||||
bool IsNestedIn(EventNode* parent);
|
bool IsNestedIn(EventNode* parent);
|
||||||
|
|
||||||
// Returns the closest parent of the given event type.
|
// Returns the closest parent of the given event type.
|
||||||
@ -125,8 +128,11 @@ class EventForest {
|
|||||||
void CreateEventGroup(
|
void CreateEventGroup(
|
||||||
const std::vector<int64 /*EventType*/>& root_event_types);
|
const std::vector<int64 /*EventType*/>& root_event_types);
|
||||||
|
|
||||||
// Sets the is_eager stat to true for the eagerly executed kernel events.
|
// Sets the is_eager stat to true for the eagerly executed GPU kernel events.
|
||||||
void MarkEagerlyExecutedKernels();
|
void MarkEagerlyExecutedGpuKernels();
|
||||||
|
|
||||||
|
// Sets the is_eager stat to true for the eagerly executed CPU TF op events.
|
||||||
|
void MarkEagerlyExecutedCpuTfOps();
|
||||||
|
|
||||||
// Create virtual events of HostEventType::kHostTrainingLoopIteration and
|
// Create virtual events of HostEventType::kHostTrainingLoopIteration and
|
||||||
// event nodes for them. A virtual event is created for each iteration of the
|
// event nodes for them. A virtual event is created for each iteration of the
|
||||||
|
@ -145,7 +145,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) {
|
|||||||
|
|
||||||
TEST(GroupEventsTest, EagerOpTest) {
|
TEST(GroupEventsTest, EagerOpTest) {
|
||||||
XSpace space;
|
XSpace space;
|
||||||
XPlaneBuilder host_plane_builder(space.add_planes());
|
XPlane* host_plane = space.add_planes();
|
||||||
|
XPlaneBuilder host_plane_builder(host_plane);
|
||||||
host_plane_builder.SetName(kHostThreads);
|
host_plane_builder.SetName(kHostThreads);
|
||||||
host_plane_builder.ReserveLines(1);
|
host_plane_builder.ReserveLines(1);
|
||||||
|
|
||||||
@ -153,8 +154,8 @@ TEST(GroupEventsTest, EagerOpTest) {
|
|||||||
// Eagerly scheduled GPU kernel.
|
// Eagerly scheduled GPU kernel.
|
||||||
CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100,
|
CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100,
|
||||||
{{StatType::kCorrelationId, 100}});
|
{{StatType::kCorrelationId, 100}});
|
||||||
CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun,
|
// Eagerly executed CPU TF op.
|
||||||
110, 200, {{StatType::kStepId, 0}});
|
CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {});
|
||||||
|
|
||||||
XPlane* device_plane = space.add_planes();
|
XPlane* device_plane = space.add_planes();
|
||||||
XPlaneBuilder device_plane_builder(device_plane);
|
XPlaneBuilder device_plane_builder(device_plane);
|
||||||
@ -166,17 +167,24 @@ TEST(GroupEventsTest, EagerOpTest) {
|
|||||||
{{StatType::kCorrelationId, 100}});
|
{{StatType::kCorrelationId, 100}});
|
||||||
|
|
||||||
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
|
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
|
||||||
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
|
||||||
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 2);
|
const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(1);
|
||||||
EXPECT_EQ(device_plane_visitor.GetStatType(
|
EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1);
|
||||||
device_plane->lines(0).events(0).stats(1)),
|
EXPECT_EQ(host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0)),
|
||||||
StatType::kIsEager);
|
StatType::kIsEager);
|
||||||
EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 1);
|
EXPECT_EQ(eager_cpu_tf_op.stats(0).int64_value(), 1);
|
||||||
|
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
||||||
|
const XEvent& eager_gpu_kernel = device_plane->lines(0).events(0);
|
||||||
|
EXPECT_EQ(eager_gpu_kernel.stats_size(), 2);
|
||||||
|
EXPECT_EQ(device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1)),
|
||||||
|
StatType::kIsEager);
|
||||||
|
EXPECT_EQ(eager_gpu_kernel.stats(1).int64_value(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(GroupEventsTest, FunctionOpTest) {
|
TEST(GroupEventsTest, FunctionOpTest) {
|
||||||
XSpace space;
|
XSpace space;
|
||||||
XPlaneBuilder host_plane_builder(space.add_planes());
|
XPlane* host_plane = space.add_planes();
|
||||||
|
XPlaneBuilder host_plane_builder(host_plane);
|
||||||
host_plane_builder.SetName(kHostThreads);
|
host_plane_builder.SetName(kHostThreads);
|
||||||
host_plane_builder.ReserveLines(2);
|
host_plane_builder.ReserveLines(2);
|
||||||
|
|
||||||
@ -191,8 +199,10 @@ TEST(GroupEventsTest, FunctionOpTest) {
|
|||||||
HostEventType::kExecutorStateProcess, 20, 80,
|
HostEventType::kExecutorStateProcess, 20, 80,
|
||||||
{{StatType::kStepId, 0}});
|
{{StatType::kStepId, 0}});
|
||||||
// GPU kernel scheduled inside tf.function.
|
// GPU kernel scheduled inside tf.function.
|
||||||
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70,
|
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30,
|
||||||
{{StatType::kCorrelationId, 100}});
|
{{StatType::kCorrelationId, 100}});
|
||||||
|
// CPU TF op executed inside tf.function.
|
||||||
|
CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {});
|
||||||
|
|
||||||
XPlane* device_plane = space.add_planes();
|
XPlane* device_plane = space.add_planes();
|
||||||
XPlaneBuilder device_plane_builder(device_plane);
|
XPlaneBuilder device_plane_builder(device_plane);
|
||||||
@ -204,12 +214,18 @@ TEST(GroupEventsTest, FunctionOpTest) {
|
|||||||
{{StatType::kCorrelationId, 100}});
|
{{StatType::kCorrelationId, 100}});
|
||||||
|
|
||||||
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
|
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
|
||||||
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
|
||||||
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3);
|
const XEvent& cpu_tf_op = host_plane->lines(1).events(2);
|
||||||
EXPECT_EQ(device_plane_visitor.GetStatType(
|
EXPECT_EQ(cpu_tf_op.stats_size(), 2);
|
||||||
device_plane->lines(0).events(0).stats(2)),
|
EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1)),
|
||||||
StatType::kIsEager);
|
StatType::kIsEager);
|
||||||
EXPECT_EQ(device_plane->lines(0).events(0).stats(2).int64_value(), 0);
|
EXPECT_EQ(cpu_tf_op.stats(1).int64_value(), 0);
|
||||||
|
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
||||||
|
const XEvent& gpu_kernel = device_plane->lines(0).events(0);
|
||||||
|
EXPECT_EQ(gpu_kernel.stats_size(), 3);
|
||||||
|
EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2)),
|
||||||
|
StatType::kIsEager);
|
||||||
|
EXPECT_EQ(gpu_kernel.stats(2).int64_value(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -34,13 +34,14 @@ double GetCappedPerf(double perf, uint64 time, double rate_limit) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void HostOpMetricsDbBuilder::EnterOp(absl::string_view name,
|
void HostOpMetricsDbBuilder::EnterOp(absl::string_view name,
|
||||||
absl::string_view category, uint64 time_ps,
|
absl::string_view category, bool is_eager,
|
||||||
uint64 children_time_ps) {
|
uint64 time_ps, uint64 children_time_ps) {
|
||||||
uint64 self_time_ps = time_ps - children_time_ps;
|
uint64 self_time_ps = time_ps - children_time_ps;
|
||||||
DCHECK_GE(time_ps, self_time_ps);
|
DCHECK_GE(time_ps, self_time_ps);
|
||||||
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name);
|
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name);
|
||||||
if (op_metrics->category().empty())
|
if (op_metrics->category().empty())
|
||||||
op_metrics->set_category(category.data(), category.size());
|
op_metrics->set_category(category.data(), category.size());
|
||||||
|
op_metrics->set_is_eager(is_eager);
|
||||||
op_metrics->set_occurrences(op_metrics->occurrences() + 1);
|
op_metrics->set_occurrences(op_metrics->occurrences() + 1);
|
||||||
op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
|
op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
|
||||||
op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps);
|
op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps);
|
||||||
|
@ -35,12 +35,13 @@ class HostOpMetricsDbBuilder : public OpMetricsDbBuilder {
|
|||||||
// observed on a trace, where:
|
// observed on a trace, where:
|
||||||
// name = the OP name.
|
// name = the OP name.
|
||||||
// category = the OP category.
|
// category = the OP category.
|
||||||
|
// is_eager = whether this OP is eagerly executed.
|
||||||
// time_ps = the total execution time of the OP in picoseconds, including
|
// time_ps = the total execution time of the OP in picoseconds, including
|
||||||
// the execution time of its children.
|
// the execution time of its children.
|
||||||
// children_time_ps = the execution time of the children of this OP in
|
// children_time_ps = the execution time of the children of this OP in
|
||||||
// picoseconds
|
// picoseconds
|
||||||
void EnterOp(absl::string_view name, absl::string_view category,
|
void EnterOp(absl::string_view name, absl::string_view category,
|
||||||
uint64 time_ps, uint64 children_time_ps);
|
bool is_eager, uint64 time_ps, uint64 children_time_ps);
|
||||||
|
|
||||||
// Updates total_host_infeed_enq_duration_ps_ and
|
// Updates total_host_infeed_enq_duration_ps_ and
|
||||||
// total_host_infeed_enq_duration_ps_.
|
// total_host_infeed_enq_duration_ps_.
|
||||||
|
@ -53,6 +53,7 @@ const HostEventTypeMap& GetHostEventTypeMap() {
|
|||||||
{"SessionRun", kSessionRun},
|
{"SessionRun", kSessionRun},
|
||||||
{"FunctionRun", kFunctionRun},
|
{"FunctionRun", kFunctionRun},
|
||||||
{"RunGraph", kRunGraph},
|
{"RunGraph", kRunGraph},
|
||||||
|
{"TfOpRun", kTfOpRun},
|
||||||
{"EagerKernelExecute", kEagerKernelExecute},
|
{"EagerKernelExecute", kEagerKernelExecute},
|
||||||
{"ExecutorState::Process", kExecutorStateProcess},
|
{"ExecutorState::Process", kExecutorStateProcess},
|
||||||
{"ExecutorDoneCallback", kExecutorDoneCallback},
|
{"ExecutorDoneCallback", kExecutorDoneCallback},
|
||||||
|
@ -52,6 +52,7 @@ enum HostEventType {
|
|||||||
kSessionRun,
|
kSessionRun,
|
||||||
kFunctionRun,
|
kFunctionRun,
|
||||||
kRunGraph,
|
kRunGraph,
|
||||||
|
kTfOpRun,
|
||||||
kEagerKernelExecute,
|
kEagerKernelExecute,
|
||||||
kExecutorStateProcess,
|
kExecutorStateProcess,
|
||||||
kExecutorDoneCallback,
|
kExecutorDoneCallback,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user