Fix FunctionRun's TraceMe and apply the new TraceMe APIs.

PiperOrigin-RevId: 315830899
Change-Id: Ic16e3e98efa6bbcb702f3a59c9236eb35e3f5e6f
This commit is contained in:
Jiho Choi 2020-06-10 21:42:44 -07:00 committed by TensorFlower Gardener
parent a529dbd7d4
commit b46d4e1c09
8 changed files with 74 additions and 33 deletions

View File

@ -582,8 +582,9 @@ cc_library(
"//tensorflow/core:lib_internal", "//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/lib:annotated_traceme", "//tensorflow/core/profiler/lib:annotated_traceme",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:scoped_annotation", "//tensorflow/core/profiler/lib:scoped_annotation",
"//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/lib:traceme_encode",
"@com_google_absl//absl/memory", "@com_google_absl//absl/memory",
], ],
alwayslink = 1, alwayslink = 1,
@ -1696,9 +1697,10 @@ tf_cuda_library(
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
"//tensorflow/core/debug:debug_graph_utils", "//tensorflow/core/debug:debug_graph_utils",
"//tensorflow/core/kernels:function_ops", "//tensorflow/core/kernels:function_ops",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:profiler_backends", "//tensorflow/core/profiler/lib:profiler_backends",
"//tensorflow/core/profiler/lib:profiler_session", "//tensorflow/core/profiler/lib:profiler_session",
"//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/lib:traceme_encode",
"@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:flat_hash_set",
], ],
alwayslink = 1, alwayslink = 1,

View File

@ -71,8 +71,9 @@ limitations under the License.
#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/profiler_session.h" #include "tensorflow/core/profiler/lib/profiler_session.h"
#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/protobuf/config.pb.h"
#include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/device_name_utils.h"
#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/env_var.h"
@ -500,18 +501,24 @@ Status DirectSession::RunInternal(
RunState run_state(step_id, &devices_); RunState run_state(step_id, &devices_);
const size_t num_executors = executors_and_keys->items.size(); const size_t num_executors = executors_and_keys->items.size();
profiler::TraceMe activity( profiler::TraceMeProducer activity(
// To TraceMeConsumers in ExecutorState::Process/Finish.
[&] { [&] {
if (options_.config.experimental().has_session_metadata()) { if (options_.config.experimental().has_session_metadata()) {
const auto& model_metadata = const auto& model_metadata =
options_.config.experimental().session_metadata(); options_.config.experimental().session_metadata();
return strings::StrCat("SessionRun#id=", step_id, string model_id = strings::StrCat(model_metadata.name(), ":",
",model_id=", model_metadata.name(), ":", model_metadata.version());
model_metadata.version(), "#"); return profiler::TraceMeEncode("SessionRun",
{{"id", step_id},
{"$r", 1} /*root_event*/,
{"model_id", model_id}});
} else { } else {
return strings::StrCat("SessionRun#id=", step_id, "#"); return profiler::TraceMeEncode(
"SessionRun", {{"id", step_id}, {"$r", 1} /*root_event*/});
} }
}, },
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo); profiler::TraceMeLevel::kInfo);
std::unique_ptr<DebuggerStateInterface> debugger_state; std::unique_ptr<DebuggerStateInterface> debugger_state;

View File

@ -290,7 +290,9 @@ KERNEL_AND_DEVICE_DEPS = [
"//tensorflow/core:lib_internal", "//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/lib:annotated_traceme", "//tensorflow/core/profiler/lib:annotated_traceme",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/lib:traceme_encode",
"//tensorflow/core/grappler/optimizers:meta_optimizer", "//tensorflow/core/grappler/optimizers:meta_optimizer",
] ]

View File

@ -40,7 +40,9 @@ limitations under the License.
#include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/fingerprint.h"
#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/platform/setround.h"
#include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/public/version.h" #include "tensorflow/core/public/version.h"
#include "tensorflow/core/util/tensor_slice_reader_cache.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h"
#if !defined(IS_MOBILE_PLATFORM) #if !defined(IS_MOBILE_PLATFORM)
@ -381,16 +383,17 @@ void KernelAndDeviceFunc::RunAsync(
outputs->clear(); outputs->clear();
profiler::TraceMe* activity = new profiler::TraceMe( profiler::TraceMeProducer activity(
// To TraceMeConsumers in ExecutorState::Process/Finish.
[&] { [&] {
return absl::StrCat("FunctionRun#name=", name(), ",id=", opts->step_id, return profiler::TraceMeEncode(
"#"); "FunctionRun", {{"id", opts->step_id}, {"$r", 1} /*root_event*/});
}, },
profiler::ContextType::kTfExecutor, opts->step_id,
profiler::TraceMeLevel::kInfo); profiler::TraceMeLevel::kInfo);
pflr_->Run(*opts, handle_, inputs, outputs, pflr_->Run(*opts, handle_, inputs, outputs,
[opts, rendezvous, local_cm, step_container, this, activity, [opts, rendezvous, local_cm, step_container, this,
done = std::move(done)](const Status& s) { done = std::move(done)](const Status& s) {
delete activity;
rendezvous->Unref(); rendezvous->Unref();
if (step_container == nullptr) { if (step_container == nullptr) {
this->step_container_.CleanUp(); this->step_container_.CleanUp();

View File

@ -65,8 +65,9 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/profiler/lib/scoped_annotation.h"
#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/protobuf/error_codes.pb.h"
#include "tensorflow/core/util/tensor_slice_reader_cache.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h"
@ -627,16 +628,20 @@ void ExecutorState<PropagatorStateType>::ProcessConstTensor(
template <class PropagatorStateType> template <class PropagatorStateType>
void ExecutorState<PropagatorStateType>::Process(TaggedNode tagged_node, void ExecutorState<PropagatorStateType>::Process(TaggedNode tagged_node,
int64 scheduled_nsec) { int64 scheduled_nsec) {
profiler::TraceMe activity( profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] { [&] {
// NOTE: This tracing uses the iteration number from the first tagged // NOTE: This tracing uses the iteration number from the first tagged
// node that executes during this call to `Process()`. In principle, // node that executes during this call to `Process()`. In principle,
// subsequent nodes could have different values of `iter_num` that // subsequent nodes could have different values of `iter_num` that
// will not be traced. // will not be traced.
return absl::StrCat("ExecutorState::Process#id=", step_id_, return profiler::TraceMeEncode(
",iter_num=", tagged_node.get_iter_num(), "#"); "ExecutorState::Process",
{{"id", step_id_}, {"iter_num", tagged_node.get_iter_num()}});
}, },
2); profiler::ContextType::kTfExecutor, step_id_,
profiler::TraceMeLevel::kInfo);
WithContext wc(context_); WithContext wc(context_);
TaggedNodeSeq ready; TaggedNodeSeq ready;
TaggedNodeReadyQueue inline_ready; TaggedNodeReadyQueue inline_ready;
@ -1240,11 +1245,15 @@ void ExecutorState<PropagatorStateType>::Finish() {
} }
delete this; delete this;
runner([step_id, status, done_cb = std::move(done_cb)]() { runner([step_id, status, done_cb = std::move(done_cb)]() {
profiler::TraceMe traceme( profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] { [&] {
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); return profiler::TraceMeEncode("ExecutorDoneCallback",
{{"id", step_id}});
}, },
2); profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done_cb(status); done_cb(status);
}); });
return; return;
@ -1259,22 +1268,30 @@ void ExecutorState<PropagatorStateType>::Finish() {
done_cb = std::move(done_cb)](const Status& status) mutable { done_cb = std::move(done_cb)](const Status& status) mutable {
delete this; delete this;
runner([step_id, status, done_cb = std::move(done_cb)]() { runner([step_id, status, done_cb = std::move(done_cb)]() {
profiler::TraceMe traceme( profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] { [&] {
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); return profiler::TraceMeEncode("ExecutorDoneCallback",
{{"id", step_id}});
}, },
2); profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done_cb(status); done_cb(status);
}); });
}); });
} else { } else {
delete this; delete this;
runner([step_id, status, done_cb = std::move(done_cb)]() { runner([step_id, status, done_cb = std::move(done_cb)]() {
profiler::TraceMe traceme( profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] { [&] {
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); return profiler::TraceMeEncode("ExecutorDoneCallback",
{{"id", step_id}});
}, },
2); profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done_cb(status); done_cb(status);
}); });
} }

View File

@ -439,7 +439,8 @@ cc_library(
"//tensorflow/core:protos_all_cc", "//tensorflow/core:protos_all_cc",
"//tensorflow/core:worker_proto_cc", "//tensorflow/core:worker_proto_cc",
"//tensorflow/core/debug", "//tensorflow/core/debug",
"//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:traceme_encode",
], ],
) )

View File

@ -49,7 +49,8 @@ limitations under the License.
#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/protobuf/worker.pb.h" #include "tensorflow/core/protobuf/worker.pb.h"
#include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/env_var.h"
@ -419,8 +420,13 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
CancellationManager* cancellation_manager, CancellationManager* cancellation_manager,
const NamedTensors& in, StatusCallback done) { const NamedTensors& in, StatusCallback done) {
const uint64 start_time_usecs = Env::Default()->NowMicros(); const uint64 start_time_usecs = Env::Default()->NowMicros();
profiler::TraceMe activity( profiler::TraceMeProducer activity(
[step_id] { return absl::StrCat("RunGraph#id=", step_id, "#"); }, // To TraceMeConsumers in ExecutorState::Process/Finish or RunGraphDone.
[step_id] {
return profiler::TraceMeEncode(
"RunGraph", {{"id", step_id}, {"$r", 1} /*root_event*/});
},
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo); profiler::TraceMeLevel::kInfo);
// Lookup an item. Holds one ref while executing. // Lookup an item. Holds one ref while executing.
Item* item = nullptr; Item* item = nullptr;
@ -486,10 +492,12 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
cancellation_manager, session, cancellation_manager, session,
[item, rendezvous, ce_handle, done, start_time_usecs, input_size, [item, rendezvous, ce_handle, done, start_time_usecs, input_size,
step_id](const Status& s) { step_id](const Status& s) {
profiler::TraceMe activity( profiler::TraceMeConsumer activity(
// From TraceMeProducer in GraphMgr::ExecuteAsync.
[step_id] { [step_id] {
return absl::StrCat("RunGraphDone#id=", step_id, "#"); return profiler::TraceMeEncode("RunGraphDone", {{"id", step_id}});
}, },
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo); profiler::TraceMeLevel::kInfo);
done(s); done(s);
metrics::RecordGraphInputTensors(input_size); metrics::RecordGraphInputTensors(input_size);

View File

@ -175,6 +175,7 @@ filegroup(
name = "mobile_srcs", name = "mobile_srcs",
srcs = [ srcs = [
"annotated_traceme.h", "annotated_traceme.h",
"connected_traceme.h",
"profiler_session.cc", "profiler_session.cc",
"profiler_session.h", "profiler_session.h",
"scoped_annotation.h", "scoped_annotation.h",