Set is_eager for CPU TF op's OpMetrics.
PiperOrigin-RevId: 306933924 Change-Id: I566f76c697e82bf67368f1e7699c7e71dc5a16bd
This commit is contained in:
parent
50fe4f8b12
commit
9d0c55d9f0
@ -48,6 +48,8 @@ struct TfActivity {
|
||||
TfActivityType activity_type;
|
||||
// Full TF op name and type of this activity (backed by XEvent::name).
|
||||
TfOp tf_op;
|
||||
// Whether it is eagerly executed.
|
||||
bool is_eager;
|
||||
};
|
||||
|
||||
// TF Op metrics stored as element in OpStack.
|
||||
@ -83,8 +85,8 @@ void ProcessOneTfActivity(const TfActivity& activity,
|
||||
Timespan tf_op_span =
|
||||
PicoSpan(info->start_timestamp_ps, activity.timestamp_ps);
|
||||
tf_metrics_data->tf_metrics_db_builder.EnterOp(
|
||||
activity.tf_op.name, activity.tf_op.type, tf_op_span.duration_ps(),
|
||||
info->children_duration_ps);
|
||||
activity.tf_op.name, activity.tf_op.type, activity.is_eager,
|
||||
tf_op_span.duration_ps(), info->children_duration_ps);
|
||||
TfOpInfo* parent_info = tf_op_stack->Top();
|
||||
if (parent_info != nullptr) {
|
||||
parent_info->children_duration_ps += tf_op_span.duration_ps();
|
||||
@ -135,9 +137,17 @@ void CollectTfActivities(const XLineVisitor& line,
|
||||
const TfOp* tf_op = gtl::FindOrNull(tf_ops, event.Id());
|
||||
if (tf_op != nullptr) {
|
||||
++tf_op_id;
|
||||
bool is_eager = false;
|
||||
event.ForEachStat([&](const XStatVisitor& stat) {
|
||||
if (stat.Type() == StatType::kIsEager) {
|
||||
is_eager = stat.IntValue();
|
||||
}
|
||||
});
|
||||
Timespan span(event.TimestampPs(), event.DurationPs());
|
||||
tf_activities->push_back({span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op});
|
||||
tf_activities->push_back({span.end_ps(), tf_op_id, kTfOpEnd, *tf_op});
|
||||
tf_activities->push_back(
|
||||
{span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager});
|
||||
tf_activities->push_back(
|
||||
{span.end_ps(), tf_op_id, kTfOpEnd, *tf_op, is_eager});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -228,6 +228,7 @@ cc_library(
|
||||
hdrs = ["group_events.h"],
|
||||
visibility = [":friends"],
|
||||
deps = [
|
||||
":tf_op_utils",
|
||||
":tf_xplane_visitor",
|
||||
":xplane_schema",
|
||||
":xplane_utils",
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
#include "absl/strings/str_join.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/core/lib/gtl/map_util.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_op_utils.h"
|
||||
#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_schema.h"
|
||||
#include "tensorflow/core/profiler/utils/xplane_utils.h"
|
||||
@ -66,6 +67,12 @@ absl::optional<int64> GetKernelEventType(const XPlaneVisitor& visitor,
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
bool IsTfOpEvent(const XPlaneVisitor& visitor, const XEvent& event) {
|
||||
TfOp tf_op =
|
||||
ParseTfOpFullname(visitor.GetEventMetadata(event.metadata_id())->name());
|
||||
return tf_op.category == Category::kTensorFlow;
|
||||
}
|
||||
|
||||
int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
|
||||
if (absl::optional<int64> event_type = visitor.GetEventType(event)) {
|
||||
return *event_type;
|
||||
@ -76,6 +83,8 @@ int64 GetEventType(const XPlaneVisitor& visitor, const XEvent& event) {
|
||||
// TODO(148346217): Make XPlaneVisitor support KernelLaunch and
|
||||
// KernelExecute event types.
|
||||
return *kernel_event_type;
|
||||
} else if (IsTfOpEvent(visitor, event)) {
|
||||
return HostEventType::kTfOpRun;
|
||||
} else {
|
||||
return HostEventType::kUnknownHostEventType;
|
||||
}
|
||||
@ -183,6 +192,12 @@ void EventNode::SetIsEager(bool is_eager) {
|
||||
is_eager ? 1 : 0, event_);
|
||||
}
|
||||
|
||||
bool EventNode::IsEager() {
|
||||
// It is eagerly executed if its trace context does not include the TF
|
||||
// executor.
|
||||
return FindParent(HostEventType::kExecutorStateProcess) == nullptr;
|
||||
}
|
||||
|
||||
bool EventNode::IsNestedIn(EventNode* parent) {
|
||||
return parent && IsNested(GetEvent(), parent->GetEvent());
|
||||
}
|
||||
@ -290,16 +305,21 @@ void EventForest::CreateEventGroup(
|
||||
}
|
||||
}
|
||||
|
||||
void EventForest::MarkEagerlyExecutedKernels() {
|
||||
void EventForest::MarkEagerlyExecutedGpuKernels() {
|
||||
auto kernel_execute_event_node_list =
|
||||
gtl::FindOrNull(event_node_map_, HostEventType::kKernelExecute);
|
||||
if (!kernel_execute_event_node_list) return;
|
||||
for (auto& kernel_execute_event_node : *kernel_execute_event_node_list) {
|
||||
// A kernel is eagerly executed if its trace context does not include the
|
||||
// TF executor.
|
||||
bool is_eager = kernel_execute_event_node->FindParent(
|
||||
HostEventType::kExecutorStateProcess) == nullptr;
|
||||
kernel_execute_event_node->SetIsEager(is_eager);
|
||||
kernel_execute_event_node->SetIsEager(kernel_execute_event_node->IsEager());
|
||||
}
|
||||
}
|
||||
|
||||
void EventForest::MarkEagerlyExecutedCpuTfOps() {
|
||||
auto tf_op_run_event_node_list =
|
||||
gtl::FindOrNull(event_node_map_, HostEventType::kTfOpRun);
|
||||
if (!tf_op_run_event_node_list) return;
|
||||
for (auto& tf_op_run_event_node : *tf_op_run_event_node_list) {
|
||||
tf_op_run_event_node->SetIsEager(tf_op_run_event_node->IsEager());
|
||||
}
|
||||
}
|
||||
|
||||
@ -381,7 +401,8 @@ EventForest::EventForest(
|
||||
CreateVirtualEventsForAsyncExecutor();
|
||||
}
|
||||
CreateEventGroup(root_event_types);
|
||||
MarkEagerlyExecutedKernels();
|
||||
MarkEagerlyExecutedGpuKernels();
|
||||
MarkEagerlyExecutedCpuTfOps();
|
||||
}
|
||||
|
||||
std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {
|
||||
|
@ -73,6 +73,9 @@ class EventNode {
|
||||
|
||||
void SetIsEager(bool is_eager);
|
||||
|
||||
// Returns true if this event is part of eagerly executed op.
|
||||
bool IsEager();
|
||||
|
||||
bool IsNestedIn(EventNode* parent);
|
||||
|
||||
// Returns the closest parent of the given event type.
|
||||
@ -125,8 +128,11 @@ class EventForest {
|
||||
void CreateEventGroup(
|
||||
const std::vector<int64 /*EventType*/>& root_event_types);
|
||||
|
||||
// Sets the is_eager stat to true for the eagerly executed kernel events.
|
||||
void MarkEagerlyExecutedKernels();
|
||||
// Sets the is_eager stat to true for the eagerly executed GPU kernel events.
|
||||
void MarkEagerlyExecutedGpuKernels();
|
||||
|
||||
// Sets the is_eager stat to true for the eagerly executed CPU TF op events.
|
||||
void MarkEagerlyExecutedCpuTfOps();
|
||||
|
||||
// Create virtual events of HostEventType::kHostTrainingLoopIteration and
|
||||
// event nodes for them. A virtual event is created for each iteration of the
|
||||
|
@ -145,7 +145,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) {
|
||||
|
||||
TEST(GroupEventsTest, EagerOpTest) {
|
||||
XSpace space;
|
||||
XPlaneBuilder host_plane_builder(space.add_planes());
|
||||
XPlane* host_plane = space.add_planes();
|
||||
XPlaneBuilder host_plane_builder(host_plane);
|
||||
host_plane_builder.SetName(kHostThreads);
|
||||
host_plane_builder.ReserveLines(1);
|
||||
|
||||
@ -153,8 +154,8 @@ TEST(GroupEventsTest, EagerOpTest) {
|
||||
// Eagerly scheduled GPU kernel.
|
||||
CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100,
|
||||
{{StatType::kCorrelationId, 100}});
|
||||
CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun,
|
||||
110, 200, {{StatType::kStepId, 0}});
|
||||
// Eagerly executed CPU TF op.
|
||||
CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {});
|
||||
|
||||
XPlane* device_plane = space.add_planes();
|
||||
XPlaneBuilder device_plane_builder(device_plane);
|
||||
@ -166,17 +167,24 @@ TEST(GroupEventsTest, EagerOpTest) {
|
||||
{{StatType::kCorrelationId, 100}});
|
||||
|
||||
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
|
||||
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
||||
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 2);
|
||||
EXPECT_EQ(device_plane_visitor.GetStatType(
|
||||
device_plane->lines(0).events(0).stats(1)),
|
||||
XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
|
||||
const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(1);
|
||||
EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1);
|
||||
EXPECT_EQ(host_plane_visitor.GetStatType(eager_cpu_tf_op.stats(0)),
|
||||
StatType::kIsEager);
|
||||
EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 1);
|
||||
EXPECT_EQ(eager_cpu_tf_op.stats(0).int64_value(), 1);
|
||||
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
||||
const XEvent& eager_gpu_kernel = device_plane->lines(0).events(0);
|
||||
EXPECT_EQ(eager_gpu_kernel.stats_size(), 2);
|
||||
EXPECT_EQ(device_plane_visitor.GetStatType(eager_gpu_kernel.stats(1)),
|
||||
StatType::kIsEager);
|
||||
EXPECT_EQ(eager_gpu_kernel.stats(1).int64_value(), 1);
|
||||
}
|
||||
|
||||
TEST(GroupEventsTest, FunctionOpTest) {
|
||||
XSpace space;
|
||||
XPlaneBuilder host_plane_builder(space.add_planes());
|
||||
XPlane* host_plane = space.add_planes();
|
||||
XPlaneBuilder host_plane_builder(host_plane);
|
||||
host_plane_builder.SetName(kHostThreads);
|
||||
host_plane_builder.ReserveLines(2);
|
||||
|
||||
@ -191,8 +199,10 @@ TEST(GroupEventsTest, FunctionOpTest) {
|
||||
HostEventType::kExecutorStateProcess, 20, 80,
|
||||
{{StatType::kStepId, 0}});
|
||||
// GPU kernel scheduled inside tf.function.
|
||||
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70,
|
||||
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30,
|
||||
{{StatType::kCorrelationId, 100}});
|
||||
// CPU TF op executed inside tf.function.
|
||||
CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {});
|
||||
|
||||
XPlane* device_plane = space.add_planes();
|
||||
XPlaneBuilder device_plane_builder(device_plane);
|
||||
@ -204,12 +214,18 @@ TEST(GroupEventsTest, FunctionOpTest) {
|
||||
{{StatType::kCorrelationId, 100}});
|
||||
|
||||
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
|
||||
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
||||
EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3);
|
||||
EXPECT_EQ(device_plane_visitor.GetStatType(
|
||||
device_plane->lines(0).events(0).stats(2)),
|
||||
XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane);
|
||||
const XEvent& cpu_tf_op = host_plane->lines(1).events(2);
|
||||
EXPECT_EQ(cpu_tf_op.stats_size(), 2);
|
||||
EXPECT_EQ(host_plane_visitor.GetStatType(cpu_tf_op.stats(1)),
|
||||
StatType::kIsEager);
|
||||
EXPECT_EQ(device_plane->lines(0).events(0).stats(2).int64_value(), 0);
|
||||
EXPECT_EQ(cpu_tf_op.stats(1).int64_value(), 0);
|
||||
XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane);
|
||||
const XEvent& gpu_kernel = device_plane->lines(0).events(0);
|
||||
EXPECT_EQ(gpu_kernel.stats_size(), 3);
|
||||
EXPECT_EQ(device_plane_visitor.GetStatType(gpu_kernel.stats(2)),
|
||||
StatType::kIsEager);
|
||||
EXPECT_EQ(gpu_kernel.stats(2).int64_value(), 0);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -34,13 +34,14 @@ double GetCappedPerf(double perf, uint64 time, double rate_limit) {
|
||||
} // namespace
|
||||
|
||||
void HostOpMetricsDbBuilder::EnterOp(absl::string_view name,
|
||||
absl::string_view category, uint64 time_ps,
|
||||
uint64 children_time_ps) {
|
||||
absl::string_view category, bool is_eager,
|
||||
uint64 time_ps, uint64 children_time_ps) {
|
||||
uint64 self_time_ps = time_ps - children_time_ps;
|
||||
DCHECK_GE(time_ps, self_time_ps);
|
||||
OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name);
|
||||
if (op_metrics->category().empty())
|
||||
op_metrics->set_category(category.data(), category.size());
|
||||
op_metrics->set_is_eager(is_eager);
|
||||
op_metrics->set_occurrences(op_metrics->occurrences() + 1);
|
||||
op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
|
||||
op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps);
|
||||
|
@ -35,12 +35,13 @@ class HostOpMetricsDbBuilder : public OpMetricsDbBuilder {
|
||||
// observed on a trace, where:
|
||||
// name = the OP name.
|
||||
// category = the OP category.
|
||||
// is_eager = whether this OP is eagerly executed.
|
||||
// time_ps = the total execution time of the OP in picoseconds, including
|
||||
// the execution time of its children.
|
||||
// children_time_ps = the execution time of the children of this OP in
|
||||
// picoseconds
|
||||
void EnterOp(absl::string_view name, absl::string_view category,
|
||||
uint64 time_ps, uint64 children_time_ps);
|
||||
bool is_eager, uint64 time_ps, uint64 children_time_ps);
|
||||
|
||||
// Updates total_host_infeed_enq_duration_ps_ and
|
||||
// total_host_infeed_enq_duration_ps_.
|
||||
|
@ -53,6 +53,7 @@ const HostEventTypeMap& GetHostEventTypeMap() {
|
||||
{"SessionRun", kSessionRun},
|
||||
{"FunctionRun", kFunctionRun},
|
||||
{"RunGraph", kRunGraph},
|
||||
{"TfOpRun", kTfOpRun},
|
||||
{"EagerKernelExecute", kEagerKernelExecute},
|
||||
{"ExecutorState::Process", kExecutorStateProcess},
|
||||
{"ExecutorDoneCallback", kExecutorDoneCallback},
|
||||
|
@ -52,6 +52,7 @@ enum HostEventType {
|
||||
kSessionRun,
|
||||
kFunctionRun,
|
||||
kRunGraph,
|
||||
kTfOpRun,
|
||||
kEagerKernelExecute,
|
||||
kExecutorStateProcess,
|
||||
kExecutorDoneCallback,
|
||||
|
Loading…
x
Reference in New Issue
Block a user