Improve the handling of remote worker threads. Also, sort root events before grouping.
PiperOrigin-RevId: 316133384 Change-Id: Ie8272331135b1d14d52e66a9933e3d7037ac2eb6
This commit is contained in:
parent
eb4af4527b
commit
795362accd
@ -285,6 +285,7 @@ cc_library(
|
|||||||
"//tensorflow/core:lib_internal",
|
"//tensorflow/core:lib_internal",
|
||||||
"//tensorflow/core/profiler/lib:connected_traceme",
|
"//tensorflow/core/profiler/lib:connected_traceme",
|
||||||
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
|
||||||
|
"@com_google_absl//absl/algorithm:container",
|
||||||
"@com_google_absl//absl/container:flat_hash_map",
|
"@com_google_absl//absl/container:flat_hash_map",
|
||||||
"@com_google_absl//absl/container:flat_hash_set",
|
"@com_google_absl//absl/container:flat_hash_set",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
|
@ -24,6 +24,7 @@ limitations under the License.
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/algorithm/container.h"
|
||||||
#include "absl/container/flat_hash_map.h"
|
#include "absl/container/flat_hash_map.h"
|
||||||
#include "absl/container/flat_hash_set.h"
|
#include "absl/container/flat_hash_set.h"
|
||||||
#include "absl/strings/str_cat.h"
|
#include "absl/strings/str_cat.h"
|
||||||
@ -128,13 +129,6 @@ std::unique_ptr<XEvent> CreateVirtualEvent(const XStat& step_id_stat,
|
|||||||
return virtual_event;
|
return virtual_event;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool NeedsVirtualEventsForAsyncExecutor(
|
|
||||||
const std::vector<int64 /*EventType*/>& root_event_types) {
|
|
||||||
return std::find(root_event_types.begin(), root_event_types.end(),
|
|
||||||
HostEventType::kAsyncExecutorTraceContext) !=
|
|
||||||
root_event_types.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool HasFunctionRun(EventNode* event_node) {
|
bool HasFunctionRun(EventNode* event_node) {
|
||||||
for (EventNode* child : event_node->GetChildren()) {
|
for (EventNode* child : event_node->GetChildren()) {
|
||||||
if (child->GetEventVisitor().Type() == HostEventType::kFunctionRun) {
|
if (child->GetEventVisitor().Type() == HostEventType::kFunctionRun) {
|
||||||
@ -254,6 +248,13 @@ bool IsTopRoot(const EventNode* event) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SortEventList(EventList* event_list) {
|
||||||
|
absl::c_sort(*event_list, [](const EventNode* e1, const EventNode* e2) {
|
||||||
|
return e1->GetEventVisitor().TimestampPs() <
|
||||||
|
e2->GetEventVisitor().TimestampPs();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
EventNode::EventNode(const XPlaneVisitor* plane, XLine* raw_line,
|
EventNode::EventNode(const XPlaneVisitor* plane, XLine* raw_line,
|
||||||
@ -488,6 +489,7 @@ void EventForest::CreateEventGroup() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SortEventList(&root_events_);
|
||||||
for (EventNode* root_event : root_events_) {
|
for (EventNode* root_event : root_events_) {
|
||||||
if (IsTopRoot(root_event)) {
|
if (IsTopRoot(root_event)) {
|
||||||
ProcessRootEvent(next_group_id_++, root_event, &event_group_name_map_);
|
ProcessRootEvent(next_group_id_++, root_event, &event_group_name_map_);
|
||||||
@ -579,23 +581,21 @@ void EventForest::ProcessTensorFlowLoop() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EventForest::CreateVirtualEventsForAsyncExecutor() {
|
void EventForest::ProcessWorker() {
|
||||||
auto eager_kernel_execute_event_node_list =
|
auto eager_kernel_execute_event_list =
|
||||||
gtl::FindOrNull(event_node_map_, HostEventType::kEagerKernelExecute);
|
gtl::FindOrNull(event_node_map_, HostEventType::kEagerKernelExecute);
|
||||||
if (!eager_kernel_execute_event_node_list) return;
|
if (!eager_kernel_execute_event_list) return;
|
||||||
EventNode* virtual_event_node = nullptr;
|
// The last EagerKernelExecute with a FunctionRun child.
|
||||||
for (auto& eager_kernel_execute_event_node :
|
EventNode* root_event = nullptr;
|
||||||
*eager_kernel_execute_event_node_list) {
|
for (auto& eager_kernel_execute_event : *eager_kernel_execute_event_list) {
|
||||||
if (HasFunctionRun(eager_kernel_execute_event_node.get())) {
|
if (HasFunctionRun(eager_kernel_execute_event.get())) {
|
||||||
auto new_virtual_event_node =
|
// A function op becomes a new root.
|
||||||
absl::make_unique<EventNode>(*eager_kernel_execute_event_node);
|
root_event = eager_kernel_execute_event.get();
|
||||||
virtual_event_node = new_virtual_event_node.get();
|
root_event->SetIsRoot(true);
|
||||||
// event_node_map_ keeps new_virtual_event_node alive.
|
root_events_.push_back(root_event);
|
||||||
event_node_map_[HostEventType::kAsyncExecutorTraceContext].push_back(
|
} else if (root_event) {
|
||||||
std::move(new_virtual_event_node));
|
// Add non-function eager ops as child.
|
||||||
}
|
root_event->AddChild(eager_kernel_execute_event.get());
|
||||||
if (virtual_event_node) {
|
|
||||||
virtual_event_node->AddChild(eager_kernel_execute_event_node.get());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -615,9 +615,7 @@ EventForest::EventForest(
|
|||||||
ConnectInterThread(connect_info_list);
|
ConnectInterThread(connect_info_list);
|
||||||
ConnectContextGroups(context_groups);
|
ConnectContextGroups(context_groups);
|
||||||
ProcessTensorFlowLoop();
|
ProcessTensorFlowLoop();
|
||||||
if (NeedsVirtualEventsForAsyncExecutor(root_event_types)) {
|
ProcessWorker();
|
||||||
CreateVirtualEventsForAsyncExecutor();
|
|
||||||
}
|
|
||||||
ProcessLegacyRootEvents(root_event_types);
|
ProcessLegacyRootEvents(root_event_types);
|
||||||
CreateEventGroup();
|
CreateEventGroup();
|
||||||
MarkEagerlyExecutedGpuKernels();
|
MarkEagerlyExecutedGpuKernels();
|
||||||
|
@ -186,10 +186,9 @@ class EventForest {
|
|||||||
// iteraton to `tf_loop_root_events_`.
|
// iteraton to `tf_loop_root_events_`.
|
||||||
void ProcessTensorFlowLoop();
|
void ProcessTensorFlowLoop();
|
||||||
|
|
||||||
// Creates virtual events of HostEventType::kAsyncExecutorTraceContext. A
|
// Processes the worker thread by grouping a FunctionRun with the following
|
||||||
// virtual event is created for every FunctionRun and the following eager ops
|
// eager ops (e.g., for Keras callback).
|
||||||
// (e.g., for Keras callback).
|
void ProcessWorker();
|
||||||
void CreateVirtualEventsForAsyncExecutor();
|
|
||||||
|
|
||||||
EventNodeMap event_node_map_;
|
EventNodeMap event_node_map_;
|
||||||
std::vector<XPlaneVisitor> visitors_;
|
std::vector<XPlaneVisitor> visitors_;
|
||||||
|
@ -470,6 +470,63 @@ TEST(GroupEventsTest, AsyncEventTest) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(GroupEventsTest, WorkerTest) {
|
||||||
|
constexpr uint64 kEagerKernelExecuteDuration = 100;
|
||||||
|
constexpr uint64 kFunctionRunDuration = 50;
|
||||||
|
constexpr uint64 kFirstEagerKernelExecuteStartTime = 0;
|
||||||
|
constexpr uint64 kSecondEagerKernelExecuteStartTime = 200;
|
||||||
|
constexpr uint64 kThirdEagerKernelExecuteStartTime = 400;
|
||||||
|
constexpr uint64 kFourthEagerKernelExecuteStartTime = 600;
|
||||||
|
constexpr uint64 kFirstFunctionRunStartTime = 210;
|
||||||
|
constexpr uint64 kSecondFunctionRunStartTime = 610;
|
||||||
|
|
||||||
|
XSpace raw_space;
|
||||||
|
XPlane* raw_plane = raw_space.add_planes();
|
||||||
|
XPlaneBuilder plane(raw_plane);
|
||||||
|
plane.ReserveLines(1);
|
||||||
|
auto line = plane.GetOrCreateLine(0);
|
||||||
|
// Eager op. It doesn't belong to any group.
|
||||||
|
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
|
||||||
|
kFirstEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
|
||||||
|
// First function. It creates the first group.
|
||||||
|
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
|
||||||
|
kSecondEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
|
||||||
|
CreateXEvent(&plane, &line, HostEventType::kFunctionRun,
|
||||||
|
kFirstFunctionRunStartTime, kFunctionRunDuration);
|
||||||
|
// Eager op. It belongs to the first group.
|
||||||
|
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
|
||||||
|
kThirdEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
|
||||||
|
// Second function. It creates the second group.
|
||||||
|
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
|
||||||
|
kFourthEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
|
||||||
|
CreateXEvent(&plane, &line, HostEventType::kFunctionRun,
|
||||||
|
kSecondFunctionRunStartTime, kFunctionRunDuration);
|
||||||
|
|
||||||
|
GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr);
|
||||||
|
CreateTfXPlaneVisitor(raw_plane).ForEachLine(
|
||||||
|
[&](const tensorflow::profiler::XLineVisitor& line) {
|
||||||
|
EXPECT_EQ(line.NumEvents(), 6);
|
||||||
|
line.ForEachEvent(
|
||||||
|
[&](const tensorflow::profiler::XEventVisitor& event) {
|
||||||
|
absl::optional<int64> group_id;
|
||||||
|
if (absl::optional<XStatVisitor> stat =
|
||||||
|
event.GetStat(StatType::kGroupId)) {
|
||||||
|
group_id = stat->IntValue();
|
||||||
|
}
|
||||||
|
if (event.TimestampPs() < kSecondEagerKernelExecuteStartTime) {
|
||||||
|
EXPECT_FALSE(group_id.has_value());
|
||||||
|
} else if (event.TimestampPs() <
|
||||||
|
kFourthEagerKernelExecuteStartTime) {
|
||||||
|
EXPECT_TRUE(group_id.has_value());
|
||||||
|
EXPECT_EQ(*group_id, 0);
|
||||||
|
} else {
|
||||||
|
EXPECT_TRUE(group_id.has_value());
|
||||||
|
EXPECT_EQ(*group_id, 1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
@ -107,8 +107,6 @@ const HostEventTypeMap& GetHostEventTypeMap() {
|
|||||||
// tf.data related.
|
// tf.data related.
|
||||||
{"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
|
{"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
|
||||||
{"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
|
{"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
|
||||||
// Virtual events for grouping.
|
|
||||||
{"AsyncExecutorTraceContext", kAsyncExecutorTraceContext},
|
|
||||||
// GPU related.
|
// GPU related.
|
||||||
{"KernelLaunch", kKernelLaunch},
|
{"KernelLaunch", kKernelLaunch},
|
||||||
{"KernelExecute", kKernelExecute},
|
{"KernelExecute", kKernelExecute},
|
||||||
|
@ -100,8 +100,6 @@ enum HostEventType {
|
|||||||
// tf.data related.
|
// tf.data related.
|
||||||
kIteratorGetNextOp,
|
kIteratorGetNextOp,
|
||||||
kIteratorGetNextAsOptionalOp,
|
kIteratorGetNextAsOptionalOp,
|
||||||
// Virtual events for grouping.
|
|
||||||
kAsyncExecutorTraceContext,
|
|
||||||
// GPU related.
|
// GPU related.
|
||||||
kKernelLaunch,
|
kKernelLaunch,
|
||||||
kKernelExecute,
|
kKernelExecute,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user