Fix FunctionRun's TraceMe and apply the new TraceMe APIs.

PiperOrigin-RevId: 315830899
Change-Id: Ic16e3e98efa6bbcb702f3a59c9236eb35e3f5e6f
This commit is contained in:
Jiho Choi 2020-06-10 21:42:44 -07:00 committed by TensorFlower Gardener
parent a529dbd7d4
commit b46d4e1c09
8 changed files with 74 additions and 33 deletions

View File

@ -582,8 +582,9 @@ cc_library(
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/lib:annotated_traceme",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:scoped_annotation",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/lib:traceme_encode",
"@com_google_absl//absl/memory",
],
alwayslink = 1,
@ -1696,9 +1697,10 @@ tf_cuda_library(
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/debug:debug_graph_utils",
"//tensorflow/core/kernels:function_ops",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:profiler_backends",
"//tensorflow/core/profiler/lib:profiler_session",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/lib:traceme_encode",
"@com_google_absl//absl/container:flat_hash_set",
],
alwayslink = 1,

View File

@ -71,8 +71,9 @@ limitations under the License.
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/profiler_session.h"
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/protobuf/config.pb.h"
#include "tensorflow/core/util/device_name_utils.h"
#include "tensorflow/core/util/env_var.h"
@ -500,18 +501,24 @@ Status DirectSession::RunInternal(
RunState run_state(step_id, &devices_);
const size_t num_executors = executors_and_keys->items.size();
profiler::TraceMe activity(
profiler::TraceMeProducer activity(
// To TraceMeConsumers in ExecutorState::Process/Finish.
[&] {
if (options_.config.experimental().has_session_metadata()) {
const auto& model_metadata =
options_.config.experimental().session_metadata();
return strings::StrCat("SessionRun#id=", step_id,
",model_id=", model_metadata.name(), ":",
model_metadata.version(), "#");
string model_id = strings::StrCat(model_metadata.name(), ":",
model_metadata.version());
return profiler::TraceMeEncode("SessionRun",
{{"id", step_id},
{"$r", 1} /*root_event*/,
{"model_id", model_id}});
} else {
return strings::StrCat("SessionRun#id=", step_id, "#");
return profiler::TraceMeEncode(
"SessionRun", {{"id", step_id}, {"$r", 1} /*root_event*/});
}
},
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
std::unique_ptr<DebuggerStateInterface> debugger_state;

View File

@ -290,7 +290,9 @@ KERNEL_AND_DEVICE_DEPS = [
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler/lib:annotated_traceme",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/lib:traceme_encode",
"//tensorflow/core/grappler/optimizers:meta_optimizer",
]

View File

@ -40,7 +40,9 @@ limitations under the License.
#include "tensorflow/core/platform/fingerprint.h"
#include "tensorflow/core/platform/setround.h"
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/public/version.h"
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
#if !defined(IS_MOBILE_PLATFORM)
@ -381,16 +383,17 @@ void KernelAndDeviceFunc::RunAsync(
outputs->clear();
profiler::TraceMe* activity = new profiler::TraceMe(
profiler::TraceMeProducer activity(
// To TraceMeConsumers in ExecutorState::Process/Finish.
[&] {
return absl::StrCat("FunctionRun#name=", name(), ",id=", opts->step_id,
"#");
return profiler::TraceMeEncode(
"FunctionRun", {{"id", opts->step_id}, {"$r", 1} /*root_event*/});
},
profiler::ContextType::kTfExecutor, opts->step_id,
profiler::TraceMeLevel::kInfo);
pflr_->Run(*opts, handle_, inputs, outputs,
[opts, rendezvous, local_cm, step_container, this, activity,
[opts, rendezvous, local_cm, step_container, this,
done = std::move(done)](const Status& s) {
delete activity;
rendezvous->Unref();
if (step_container == nullptr) {
this->step_container_.CleanUp();

View File

@ -65,8 +65,9 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/scoped_annotation.h"
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/protobuf/error_codes.pb.h"
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
@ -627,16 +628,20 @@ void ExecutorState<PropagatorStateType>::ProcessConstTensor(
template <class PropagatorStateType>
void ExecutorState<PropagatorStateType>::Process(TaggedNode tagged_node,
int64 scheduled_nsec) {
profiler::TraceMe activity(
profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] {
// NOTE: This tracing uses the iteration number from the first tagged
// node that executes during this call to `Process()`. In principle,
// subsequent nodes could have different values of `iter_num` that
// will not be traced.
return absl::StrCat("ExecutorState::Process#id=", step_id_,
",iter_num=", tagged_node.get_iter_num(), "#");
return profiler::TraceMeEncode(
"ExecutorState::Process",
{{"id", step_id_}, {"iter_num", tagged_node.get_iter_num()}});
},
2);
profiler::ContextType::kTfExecutor, step_id_,
profiler::TraceMeLevel::kInfo);
WithContext wc(context_);
TaggedNodeSeq ready;
TaggedNodeReadyQueue inline_ready;
@ -1240,11 +1245,15 @@ void ExecutorState<PropagatorStateType>::Finish() {
}
delete this;
runner([step_id, status, done_cb = std::move(done_cb)]() {
profiler::TraceMe traceme(
profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] {
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
return profiler::TraceMeEncode("ExecutorDoneCallback",
{{"id", step_id}});
},
2);
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done_cb(status);
});
return;
@ -1259,22 +1268,30 @@ void ExecutorState<PropagatorStateType>::Finish() {
done_cb = std::move(done_cb)](const Status& status) mutable {
delete this;
runner([step_id, status, done_cb = std::move(done_cb)]() {
profiler::TraceMe traceme(
profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] {
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
return profiler::TraceMeEncode("ExecutorDoneCallback",
{{"id", step_id}});
},
2);
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done_cb(status);
});
});
} else {
delete this;
runner([step_id, status, done_cb = std::move(done_cb)]() {
profiler::TraceMe traceme(
profiler::TraceMeConsumer activity(
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
[&] {
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
return profiler::TraceMeEncode("ExecutorDoneCallback",
{{"id", step_id}});
},
2);
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done_cb(status);
});
}

View File

@ -439,7 +439,8 @@ cc_library(
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:worker_proto_cc",
"//tensorflow/core/debug",
"//tensorflow/core/profiler/lib:traceme",
"//tensorflow/core/profiler/lib:connected_traceme",
"//tensorflow/core/profiler/lib:traceme_encode",
],
)

View File

@ -49,7 +49,8 @@ limitations under the License.
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/tracing.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/profiler/lib/connected_traceme.h"
#include "tensorflow/core/profiler/lib/traceme_encode.h"
#include "tensorflow/core/protobuf/worker.pb.h"
#include "tensorflow/core/util/env_var.h"
@ -419,8 +420,13 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
CancellationManager* cancellation_manager,
const NamedTensors& in, StatusCallback done) {
const uint64 start_time_usecs = Env::Default()->NowMicros();
profiler::TraceMe activity(
[step_id] { return absl::StrCat("RunGraph#id=", step_id, "#"); },
profiler::TraceMeProducer activity(
// To TraceMeConsumers in ExecutorState::Process/Finish or RunGraphDone.
[step_id] {
return profiler::TraceMeEncode(
"RunGraph", {{"id", step_id}, {"$r", 1} /*root_event*/});
},
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
// Lookup an item. Holds one ref while executing.
Item* item = nullptr;
@ -486,10 +492,12 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
cancellation_manager, session,
[item, rendezvous, ce_handle, done, start_time_usecs, input_size,
step_id](const Status& s) {
profiler::TraceMe activity(
profiler::TraceMeConsumer activity(
// From TraceMeProducer in GraphMgr::ExecuteAsync.
[step_id] {
return absl::StrCat("RunGraphDone#id=", step_id, "#");
return profiler::TraceMeEncode("RunGraphDone", {{"id", step_id}});
},
profiler::ContextType::kTfExecutor, step_id,
profiler::TraceMeLevel::kInfo);
done(s);
metrics::RecordGraphInputTensors(input_size);

View File

@ -175,6 +175,7 @@ filegroup(
name = "mobile_srcs",
srcs = [
"annotated_traceme.h",
"connected_traceme.h",
"profiler_session.cc",
"profiler_session.h",
"scoped_annotation.h",