Improve the handling of remote worker threads. Also, sort root events before grouping.

PiperOrigin-RevId: 316133384
Change-Id: Ie8272331135b1d14d52e66a9933e3d7037ac2eb6
This commit is contained in:
Jiho Choi 2020-06-12 10:48:21 -07:00 committed by TensorFlower Gardener
parent eb4af4527b
commit 795362accd
6 changed files with 85 additions and 34 deletions

View File

@ -285,6 +285,7 @@ cc_library(
"//tensorflow/core:lib_internal", "//tensorflow/core:lib_internal",
"//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/strings", "@com_google_absl//absl/strings",

View File

@ -24,6 +24,7 @@ limitations under the License.
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "absl/algorithm/container.h"
#include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h" #include "absl/container/flat_hash_set.h"
#include "absl/strings/str_cat.h" #include "absl/strings/str_cat.h"
@ -128,13 +129,6 @@ std::unique_ptr<XEvent> CreateVirtualEvent(const XStat& step_id_stat,
return virtual_event; return virtual_event;
} }
bool NeedsVirtualEventsForAsyncExecutor(
const std::vector<int64 /*EventType*/>& root_event_types) {
return std::find(root_event_types.begin(), root_event_types.end(),
HostEventType::kAsyncExecutorTraceContext) !=
root_event_types.end();
}
bool HasFunctionRun(EventNode* event_node) { bool HasFunctionRun(EventNode* event_node) {
for (EventNode* child : event_node->GetChildren()) { for (EventNode* child : event_node->GetChildren()) {
if (child->GetEventVisitor().Type() == HostEventType::kFunctionRun) { if (child->GetEventVisitor().Type() == HostEventType::kFunctionRun) {
@ -254,6 +248,13 @@ bool IsTopRoot(const EventNode* event) {
return true; return true;
} }
void SortEventList(EventList* event_list) {
absl::c_sort(*event_list, [](const EventNode* e1, const EventNode* e2) {
return e1->GetEventVisitor().TimestampPs() <
e2->GetEventVisitor().TimestampPs();
});
}
} // namespace } // namespace
EventNode::EventNode(const XPlaneVisitor* plane, XLine* raw_line, EventNode::EventNode(const XPlaneVisitor* plane, XLine* raw_line,
@ -488,6 +489,7 @@ void EventForest::CreateEventGroup() {
return; return;
} }
SortEventList(&root_events_);
for (EventNode* root_event : root_events_) { for (EventNode* root_event : root_events_) {
if (IsTopRoot(root_event)) { if (IsTopRoot(root_event)) {
ProcessRootEvent(next_group_id_++, root_event, &event_group_name_map_); ProcessRootEvent(next_group_id_++, root_event, &event_group_name_map_);
@ -579,23 +581,21 @@ void EventForest::ProcessTensorFlowLoop() {
} }
} }
void EventForest::CreateVirtualEventsForAsyncExecutor() { void EventForest::ProcessWorker() {
auto eager_kernel_execute_event_node_list = auto eager_kernel_execute_event_list =
gtl::FindOrNull(event_node_map_, HostEventType::kEagerKernelExecute); gtl::FindOrNull(event_node_map_, HostEventType::kEagerKernelExecute);
if (!eager_kernel_execute_event_node_list) return; if (!eager_kernel_execute_event_list) return;
EventNode* virtual_event_node = nullptr; // The last EagerKernelExecute with a FunctionRun child.
for (auto& eager_kernel_execute_event_node : EventNode* root_event = nullptr;
*eager_kernel_execute_event_node_list) { for (auto& eager_kernel_execute_event : *eager_kernel_execute_event_list) {
if (HasFunctionRun(eager_kernel_execute_event_node.get())) { if (HasFunctionRun(eager_kernel_execute_event.get())) {
auto new_virtual_event_node = // A function op becomes a new root.
absl::make_unique<EventNode>(*eager_kernel_execute_event_node); root_event = eager_kernel_execute_event.get();
virtual_event_node = new_virtual_event_node.get(); root_event->SetIsRoot(true);
// event_node_map_ keeps new_virtual_event_node alive. root_events_.push_back(root_event);
event_node_map_[HostEventType::kAsyncExecutorTraceContext].push_back( } else if (root_event) {
std::move(new_virtual_event_node)); // Add non-function eager ops as child.
} root_event->AddChild(eager_kernel_execute_event.get());
if (virtual_event_node) {
virtual_event_node->AddChild(eager_kernel_execute_event_node.get());
} }
} }
} }
@ -615,9 +615,7 @@ EventForest::EventForest(
ConnectInterThread(connect_info_list); ConnectInterThread(connect_info_list);
ConnectContextGroups(context_groups); ConnectContextGroups(context_groups);
ProcessTensorFlowLoop(); ProcessTensorFlowLoop();
if (NeedsVirtualEventsForAsyncExecutor(root_event_types)) { ProcessWorker();
CreateVirtualEventsForAsyncExecutor();
}
ProcessLegacyRootEvents(root_event_types); ProcessLegacyRootEvents(root_event_types);
CreateEventGroup(); CreateEventGroup();
MarkEagerlyExecutedGpuKernels(); MarkEagerlyExecutedGpuKernels();

View File

@ -186,10 +186,9 @@ class EventForest {
// iteraton to `tf_loop_root_events_`. // iteraton to `tf_loop_root_events_`.
void ProcessTensorFlowLoop(); void ProcessTensorFlowLoop();
// Creates virtual events of HostEventType::kAsyncExecutorTraceContext. A // Processes the worker thread by grouping a FunctionRun with the following
// virtual event is created for every FunctionRun and the following eager ops // eager ops (e.g., for Keras callback).
// (e.g., for Keras callback). void ProcessWorker();
void CreateVirtualEventsForAsyncExecutor();
EventNodeMap event_node_map_; EventNodeMap event_node_map_;
std::vector<XPlaneVisitor> visitors_; std::vector<XPlaneVisitor> visitors_;

View File

@ -470,6 +470,63 @@ TEST(GroupEventsTest, AsyncEventTest) {
}); });
} }
TEST(GroupEventsTest, WorkerTest) {
constexpr uint64 kEagerKernelExecuteDuration = 100;
constexpr uint64 kFunctionRunDuration = 50;
constexpr uint64 kFirstEagerKernelExecuteStartTime = 0;
constexpr uint64 kSecondEagerKernelExecuteStartTime = 200;
constexpr uint64 kThirdEagerKernelExecuteStartTime = 400;
constexpr uint64 kFourthEagerKernelExecuteStartTime = 600;
constexpr uint64 kFirstFunctionRunStartTime = 210;
constexpr uint64 kSecondFunctionRunStartTime = 610;
XSpace raw_space;
XPlane* raw_plane = raw_space.add_planes();
XPlaneBuilder plane(raw_plane);
plane.ReserveLines(1);
auto line = plane.GetOrCreateLine(0);
// Eager op. It doesn't belong to any group.
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
kFirstEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
// First function. It creates the first group.
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
kSecondEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
CreateXEvent(&plane, &line, HostEventType::kFunctionRun,
kFirstFunctionRunStartTime, kFunctionRunDuration);
// Eager op. It belongs to the first group.
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
kThirdEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
// Second function. It creates the second group.
CreateXEvent(&plane, &line, HostEventType::kEagerKernelExecute,
kFourthEagerKernelExecuteStartTime, kEagerKernelExecuteDuration);
CreateXEvent(&plane, &line, HostEventType::kFunctionRun,
kSecondFunctionRunStartTime, kFunctionRunDuration);
GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr);
CreateTfXPlaneVisitor(raw_plane).ForEachLine(
[&](const tensorflow::profiler::XLineVisitor& line) {
EXPECT_EQ(line.NumEvents(), 6);
line.ForEachEvent(
[&](const tensorflow::profiler::XEventVisitor& event) {
absl::optional<int64> group_id;
if (absl::optional<XStatVisitor> stat =
event.GetStat(StatType::kGroupId)) {
group_id = stat->IntValue();
}
if (event.TimestampPs() < kSecondEagerKernelExecuteStartTime) {
EXPECT_FALSE(group_id.has_value());
} else if (event.TimestampPs() <
kFourthEagerKernelExecuteStartTime) {
EXPECT_TRUE(group_id.has_value());
EXPECT_EQ(*group_id, 0);
} else {
EXPECT_TRUE(group_id.has_value());
EXPECT_EQ(*group_id, 1);
}
});
});
}
} // namespace } // namespace
} // namespace profiler } // namespace profiler
} // namespace tensorflow } // namespace tensorflow

View File

@ -107,8 +107,6 @@ const HostEventTypeMap& GetHostEventTypeMap() {
// tf.data related. // tf.data related.
{"IteratorGetNextOp::DoCompute", kIteratorGetNextOp}, {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp},
{"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp}, {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp},
// Virtual events for grouping.
{"AsyncExecutorTraceContext", kAsyncExecutorTraceContext},
// GPU related. // GPU related.
{"KernelLaunch", kKernelLaunch}, {"KernelLaunch", kKernelLaunch},
{"KernelExecute", kKernelExecute}, {"KernelExecute", kKernelExecute},

View File

@ -100,8 +100,6 @@ enum HostEventType {
// tf.data related. // tf.data related.
kIteratorGetNextOp, kIteratorGetNextOp,
kIteratorGetNextAsOptionalOp, kIteratorGetNextAsOptionalOp,
// Virtual events for grouping.
kAsyncExecutorTraceContext,
// GPU related. // GPU related.
kKernelLaunch, kKernelLaunch,
kKernelExecute, kKernelExecute,