From b46d4e1c09823253c876fcb914fac77bd70c491a Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Wed, 10 Jun 2020 21:42:44 -0700 Subject: [PATCH] Fix FunctionRun's TraceMe and apply the new TraceMe APIs. PiperOrigin-RevId: 315830899 Change-Id: Ic16e3e98efa6bbcb702f3a59c9236eb35e3f5e6f --- tensorflow/core/common_runtime/BUILD | 6 ++- .../core/common_runtime/direct_session.cc | 19 +++++--- tensorflow/core/common_runtime/eager/BUILD | 2 + .../common_runtime/eager/kernel_and_device.cc | 13 +++--- tensorflow/core/common_runtime/executor.cc | 45 +++++++++++++------ tensorflow/core/distributed_runtime/BUILD | 3 +- .../core/distributed_runtime/graph_mgr.cc | 18 +++++--- tensorflow/core/profiler/lib/BUILD | 1 + 8 files changed, 74 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index ec9fe0ef688..e8f1dd1c5b5 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -582,8 +582,9 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:annotated_traceme", + "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:scoped_annotation", - "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:traceme_encode", "@com_google_absl//absl/memory", ], alwayslink = 1, @@ -1696,9 +1697,10 @@ tf_cuda_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/debug:debug_graph_utils", "//tensorflow/core/kernels:function_ops", + "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:profiler_backends", "//tensorflow/core/profiler/lib:profiler_session", - "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:traceme_encode", "@com_google_absl//absl/container:flat_hash_set", ], alwayslink = 1, diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 8093795b498..2c4e4c40e95 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -71,8 +71,9 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/profiler_session.h" -#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/env_var.h" @@ -500,18 +501,24 @@ Status DirectSession::RunInternal( RunState run_state(step_id, &devices_); const size_t num_executors = executors_and_keys->items.size(); - profiler::TraceMe activity( + profiler::TraceMeProducer activity( + // To TraceMeConsumers in ExecutorState::Process/Finish. [&] { if (options_.config.experimental().has_session_metadata()) { const auto& model_metadata = options_.config.experimental().session_metadata(); - return strings::StrCat("SessionRun#id=", step_id, - ",model_id=", model_metadata.name(), ":", - model_metadata.version(), "#"); + string model_id = strings::StrCat(model_metadata.name(), ":", + model_metadata.version()); + return profiler::TraceMeEncode("SessionRun", + {{"id", step_id}, + {"$r", 1} /*root_event*/, + {"model_id", model_id}}); } else { - return strings::StrCat("SessionRun#id=", step_id, "#"); + return profiler::TraceMeEncode( + "SessionRun", {{"id", step_id}, {"$r", 1} /*root_event*/}); } }, + profiler::ContextType::kTfExecutor, step_id, profiler::TraceMeLevel::kInfo); std::unique_ptr debugger_state; diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 625468b39d5..6698e41ccb0 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -290,7 +290,9 @@ KERNEL_AND_DEVICE_DEPS = [ "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:annotated_traceme", + "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:traceme_encode", "//tensorflow/core/grappler/optimizers:meta_optimizer", ] diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 48dd6a718e8..c3d3d76fac5 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -40,7 +40,9 @@ limitations under the License. #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/setround.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" #if !defined(IS_MOBILE_PLATFORM) @@ -381,16 +383,17 @@ void KernelAndDeviceFunc::RunAsync( outputs->clear(); - profiler::TraceMe* activity = new profiler::TraceMe( + profiler::TraceMeProducer activity( + // To TraceMeConsumers in ExecutorState::Process/Finish. [&] { - return absl::StrCat("FunctionRun#name=", name(), ",id=", opts->step_id, - "#"); + return profiler::TraceMeEncode( + "FunctionRun", {{"id", opts->step_id}, {"$r", 1} /*root_event*/}); }, + profiler::ContextType::kTfExecutor, opts->step_id, profiler::TraceMeLevel::kInfo); pflr_->Run(*opts, handle_, inputs, outputs, - [opts, rendezvous, local_cm, step_container, this, activity, + [opts, rendezvous, local_cm, step_container, this, done = std::move(done)](const Status& s) { - delete activity; rendezvous->Unref(); if (step_container == nullptr) { this->step_container_.CleanUp(); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index af011ac95d8..b7998e9882c 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -65,8 +65,9 @@ limitations under the License. #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/scoped_annotation.h" -#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" @@ -627,16 +628,20 @@ void ExecutorState::ProcessConstTensor( template void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { - profiler::TraceMe activity( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in KernelAndDeviceFunc::RunAsync, + // DirectSession::RunInternal or GraphMgr::ExecuteAsync. [&] { // NOTE: This tracing uses the iteration number from the first tagged // node that executes during this call to `Process()`. In principle, // subsequent nodes could have different values of `iter_num` that // will not be traced. - return absl::StrCat("ExecutorState::Process#id=", step_id_, - ",iter_num=", tagged_node.get_iter_num(), "#"); + return profiler::TraceMeEncode( + "ExecutorState::Process", + {{"id", step_id_}, {"iter_num", tagged_node.get_iter_num()}}); }, - 2); + profiler::ContextType::kTfExecutor, step_id_, + profiler::TraceMeLevel::kInfo); WithContext wc(context_); TaggedNodeSeq ready; TaggedNodeReadyQueue inline_ready; @@ -1240,11 +1245,15 @@ void ExecutorState::Finish() { } delete this; runner([step_id, status, done_cb = std::move(done_cb)]() { - profiler::TraceMe traceme( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in KernelAndDeviceFunc::RunAsync, + // DirectSession::RunInternal or GraphMgr::ExecuteAsync. [&] { - return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); + return profiler::TraceMeEncode("ExecutorDoneCallback", + {{"id", step_id}}); }, - 2); + profiler::ContextType::kTfExecutor, step_id, + profiler::TraceMeLevel::kInfo); done_cb(status); }); return; @@ -1259,22 +1268,30 @@ void ExecutorState::Finish() { done_cb = std::move(done_cb)](const Status& status) mutable { delete this; runner([step_id, status, done_cb = std::move(done_cb)]() { - profiler::TraceMe traceme( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in KernelAndDeviceFunc::RunAsync, + // DirectSession::RunInternal or GraphMgr::ExecuteAsync. [&] { - return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); + return profiler::TraceMeEncode("ExecutorDoneCallback", + {{"id", step_id}}); }, - 2); + profiler::ContextType::kTfExecutor, step_id, + profiler::TraceMeLevel::kInfo); done_cb(status); }); }); } else { delete this; runner([step_id, status, done_cb = std::move(done_cb)]() { - profiler::TraceMe traceme( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in KernelAndDeviceFunc::RunAsync, + // DirectSession::RunInternal or GraphMgr::ExecuteAsync. [&] { - return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); + return profiler::TraceMeEncode("ExecutorDoneCallback", + {{"id", step_id}}); }, - 2); + profiler::ContextType::kTfExecutor, step_id, + profiler::TraceMeLevel::kInfo); done_cb(status); }); } diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 02e8ba550a8..ae1253e7dc6 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -439,7 +439,8 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core:worker_proto_cc", "//tensorflow/core/debug", - "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:connected_traceme", + "//tensorflow/core/profiler/lib:traceme_encode", ], ) diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index fe353d7d76c..de26990e0e5 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -49,7 +49,8 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/protobuf/worker.pb.h" #include "tensorflow/core/util/env_var.h" @@ -419,8 +420,13 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, CancellationManager* cancellation_manager, const NamedTensors& in, StatusCallback done) { const uint64 start_time_usecs = Env::Default()->NowMicros(); - profiler::TraceMe activity( - [step_id] { return absl::StrCat("RunGraph#id=", step_id, "#"); }, + profiler::TraceMeProducer activity( + // To TraceMeConsumers in ExecutorState::Process/Finish or RunGraphDone. + [step_id] { + return profiler::TraceMeEncode( + "RunGraph", {{"id", step_id}, {"$r", 1} /*root_event*/}); + }, + profiler::ContextType::kTfExecutor, step_id, profiler::TraceMeLevel::kInfo); // Lookup an item. Holds one ref while executing. Item* item = nullptr; @@ -486,10 +492,12 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, cancellation_manager, session, [item, rendezvous, ce_handle, done, start_time_usecs, input_size, step_id](const Status& s) { - profiler::TraceMe activity( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in GraphMgr::ExecuteAsync. [step_id] { - return absl::StrCat("RunGraphDone#id=", step_id, "#"); + return profiler::TraceMeEncode("RunGraphDone", {{"id", step_id}}); }, + profiler::ContextType::kTfExecutor, step_id, profiler::TraceMeLevel::kInfo); done(s); metrics::RecordGraphInputTensors(input_size); diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 2e32552e076..0f92ffd5a70 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -175,6 +175,7 @@ filegroup( name = "mobile_srcs", srcs = [ "annotated_traceme.h", + "connected_traceme.h", "profiler_session.cc", "profiler_session.h", "scoped_annotation.h",