Fix FunctionRun's TraceMe and apply the new TraceMe APIs.
PiperOrigin-RevId: 315830899 Change-Id: Ic16e3e98efa6bbcb702f3a59c9236eb35e3f5e6f
This commit is contained in:
parent
a529dbd7d4
commit
b46d4e1c09
@ -582,8 +582,9 @@ cc_library(
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/profiler/lib:annotated_traceme",
|
||||
"//tensorflow/core/profiler/lib:connected_traceme",
|
||||
"//tensorflow/core/profiler/lib:scoped_annotation",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/lib:traceme_encode",
|
||||
"@com_google_absl//absl/memory",
|
||||
],
|
||||
alwayslink = 1,
|
||||
@ -1696,9 +1697,10 @@ tf_cuda_library(
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/debug:debug_graph_utils",
|
||||
"//tensorflow/core/kernels:function_ops",
|
||||
"//tensorflow/core/profiler/lib:connected_traceme",
|
||||
"//tensorflow/core/profiler/lib:profiler_backends",
|
||||
"//tensorflow/core/profiler/lib:profiler_session",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/lib:traceme_encode",
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
],
|
||||
alwayslink = 1,
|
||||
|
@ -71,8 +71,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/platform/tracing.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/lib/connected_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/profiler_session.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme_encode.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h"
|
||||
#include "tensorflow/core/util/device_name_utils.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
@ -500,18 +501,24 @@ Status DirectSession::RunInternal(
|
||||
RunState run_state(step_id, &devices_);
|
||||
const size_t num_executors = executors_and_keys->items.size();
|
||||
|
||||
profiler::TraceMe activity(
|
||||
profiler::TraceMeProducer activity(
|
||||
// To TraceMeConsumers in ExecutorState::Process/Finish.
|
||||
[&] {
|
||||
if (options_.config.experimental().has_session_metadata()) {
|
||||
const auto& model_metadata =
|
||||
options_.config.experimental().session_metadata();
|
||||
return strings::StrCat("SessionRun#id=", step_id,
|
||||
",model_id=", model_metadata.name(), ":",
|
||||
model_metadata.version(), "#");
|
||||
string model_id = strings::StrCat(model_metadata.name(), ":",
|
||||
model_metadata.version());
|
||||
return profiler::TraceMeEncode("SessionRun",
|
||||
{{"id", step_id},
|
||||
{"$r", 1} /*root_event*/,
|
||||
{"model_id", model_id}});
|
||||
} else {
|
||||
return strings::StrCat("SessionRun#id=", step_id, "#");
|
||||
return profiler::TraceMeEncode(
|
||||
"SessionRun", {{"id", step_id}, {"$r", 1} /*root_event*/});
|
||||
}
|
||||
},
|
||||
profiler::ContextType::kTfExecutor, step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
|
||||
std::unique_ptr<DebuggerStateInterface> debugger_state;
|
||||
|
@ -290,7 +290,9 @@ KERNEL_AND_DEVICE_DEPS = [
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/profiler/lib:annotated_traceme",
|
||||
"//tensorflow/core/profiler/lib:connected_traceme",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/lib:traceme_encode",
|
||||
"//tensorflow/core/grappler/optimizers:meta_optimizer",
|
||||
]
|
||||
|
||||
|
@ -40,7 +40,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/fingerprint.h"
|
||||
#include "tensorflow/core/platform/setround.h"
|
||||
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/connected_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme_encode.h"
|
||||
#include "tensorflow/core/public/version.h"
|
||||
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
@ -381,16 +383,17 @@ void KernelAndDeviceFunc::RunAsync(
|
||||
|
||||
outputs->clear();
|
||||
|
||||
profiler::TraceMe* activity = new profiler::TraceMe(
|
||||
profiler::TraceMeProducer activity(
|
||||
// To TraceMeConsumers in ExecutorState::Process/Finish.
|
||||
[&] {
|
||||
return absl::StrCat("FunctionRun#name=", name(), ",id=", opts->step_id,
|
||||
"#");
|
||||
return profiler::TraceMeEncode(
|
||||
"FunctionRun", {{"id", opts->step_id}, {"$r", 1} /*root_event*/});
|
||||
},
|
||||
profiler::ContextType::kTfExecutor, opts->step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
pflr_->Run(*opts, handle_, inputs, outputs,
|
||||
[opts, rendezvous, local_cm, step_container, this, activity,
|
||||
[opts, rendezvous, local_cm, step_container, this,
|
||||
done = std::move(done)](const Status& s) {
|
||||
delete activity;
|
||||
rendezvous->Unref();
|
||||
if (step_container == nullptr) {
|
||||
this->step_container_.CleanUp();
|
||||
|
@ -65,8 +65,9 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/tracing.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/lib/annotated_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/connected_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/scoped_annotation.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme_encode.h"
|
||||
#include "tensorflow/core/protobuf/error_codes.pb.h"
|
||||
#include "tensorflow/core/util/tensor_slice_reader_cache.h"
|
||||
|
||||
@ -627,16 +628,20 @@ void ExecutorState<PropagatorStateType>::ProcessConstTensor(
|
||||
template <class PropagatorStateType>
|
||||
void ExecutorState<PropagatorStateType>::Process(TaggedNode tagged_node,
|
||||
int64 scheduled_nsec) {
|
||||
profiler::TraceMe activity(
|
||||
profiler::TraceMeConsumer activity(
|
||||
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
|
||||
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
|
||||
[&] {
|
||||
// NOTE: This tracing uses the iteration number from the first tagged
|
||||
// node that executes during this call to `Process()`. In principle,
|
||||
// subsequent nodes could have different values of `iter_num` that
|
||||
// will not be traced.
|
||||
return absl::StrCat("ExecutorState::Process#id=", step_id_,
|
||||
",iter_num=", tagged_node.get_iter_num(), "#");
|
||||
return profiler::TraceMeEncode(
|
||||
"ExecutorState::Process",
|
||||
{{"id", step_id_}, {"iter_num", tagged_node.get_iter_num()}});
|
||||
},
|
||||
2);
|
||||
profiler::ContextType::kTfExecutor, step_id_,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
WithContext wc(context_);
|
||||
TaggedNodeSeq ready;
|
||||
TaggedNodeReadyQueue inline_ready;
|
||||
@ -1240,11 +1245,15 @@ void ExecutorState<PropagatorStateType>::Finish() {
|
||||
}
|
||||
delete this;
|
||||
runner([step_id, status, done_cb = std::move(done_cb)]() {
|
||||
profiler::TraceMe traceme(
|
||||
profiler::TraceMeConsumer activity(
|
||||
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
|
||||
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
|
||||
[&] {
|
||||
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
|
||||
return profiler::TraceMeEncode("ExecutorDoneCallback",
|
||||
{{"id", step_id}});
|
||||
},
|
||||
2);
|
||||
profiler::ContextType::kTfExecutor, step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
done_cb(status);
|
||||
});
|
||||
return;
|
||||
@ -1259,22 +1268,30 @@ void ExecutorState<PropagatorStateType>::Finish() {
|
||||
done_cb = std::move(done_cb)](const Status& status) mutable {
|
||||
delete this;
|
||||
runner([step_id, status, done_cb = std::move(done_cb)]() {
|
||||
profiler::TraceMe traceme(
|
||||
profiler::TraceMeConsumer activity(
|
||||
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
|
||||
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
|
||||
[&] {
|
||||
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
|
||||
return profiler::TraceMeEncode("ExecutorDoneCallback",
|
||||
{{"id", step_id}});
|
||||
},
|
||||
2);
|
||||
profiler::ContextType::kTfExecutor, step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
done_cb(status);
|
||||
});
|
||||
});
|
||||
} else {
|
||||
delete this;
|
||||
runner([step_id, status, done_cb = std::move(done_cb)]() {
|
||||
profiler::TraceMe traceme(
|
||||
profiler::TraceMeConsumer activity(
|
||||
// From TraceMeProducer in KernelAndDeviceFunc::RunAsync,
|
||||
// DirectSession::RunInternal or GraphMgr::ExecuteAsync.
|
||||
[&] {
|
||||
return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#");
|
||||
return profiler::TraceMeEncode("ExecutorDoneCallback",
|
||||
{{"id", step_id}});
|
||||
},
|
||||
2);
|
||||
profiler::ContextType::kTfExecutor, step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
done_cb(status);
|
||||
});
|
||||
}
|
||||
|
@ -439,7 +439,8 @@ cc_library(
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:worker_proto_cc",
|
||||
"//tensorflow/core/debug",
|
||||
"//tensorflow/core/profiler/lib:traceme",
|
||||
"//tensorflow/core/profiler/lib:connected_traceme",
|
||||
"//tensorflow/core/profiler/lib:traceme_encode",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -49,7 +49,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/platform/tracing.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/connected_traceme.h"
|
||||
#include "tensorflow/core/profiler/lib/traceme_encode.h"
|
||||
#include "tensorflow/core/protobuf/worker.pb.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
|
||||
@ -419,8 +420,13 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
|
||||
CancellationManager* cancellation_manager,
|
||||
const NamedTensors& in, StatusCallback done) {
|
||||
const uint64 start_time_usecs = Env::Default()->NowMicros();
|
||||
profiler::TraceMe activity(
|
||||
[step_id] { return absl::StrCat("RunGraph#id=", step_id, "#"); },
|
||||
profiler::TraceMeProducer activity(
|
||||
// To TraceMeConsumers in ExecutorState::Process/Finish or RunGraphDone.
|
||||
[step_id] {
|
||||
return profiler::TraceMeEncode(
|
||||
"RunGraph", {{"id", step_id}, {"$r", 1} /*root_event*/});
|
||||
},
|
||||
profiler::ContextType::kTfExecutor, step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
// Lookup an item. Holds one ref while executing.
|
||||
Item* item = nullptr;
|
||||
@ -486,10 +492,12 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
|
||||
cancellation_manager, session,
|
||||
[item, rendezvous, ce_handle, done, start_time_usecs, input_size,
|
||||
step_id](const Status& s) {
|
||||
profiler::TraceMe activity(
|
||||
profiler::TraceMeConsumer activity(
|
||||
// From TraceMeProducer in GraphMgr::ExecuteAsync.
|
||||
[step_id] {
|
||||
return absl::StrCat("RunGraphDone#id=", step_id, "#");
|
||||
return profiler::TraceMeEncode("RunGraphDone", {{"id", step_id}});
|
||||
},
|
||||
profiler::ContextType::kTfExecutor, step_id,
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
done(s);
|
||||
metrics::RecordGraphInputTensors(input_size);
|
||||
|
@ -175,6 +175,7 @@ filegroup(
|
||||
name = "mobile_srcs",
|
||||
srcs = [
|
||||
"annotated_traceme.h",
|
||||
"connected_traceme.h",
|
||||
"profiler_session.cc",
|
||||
"profiler_session.h",
|
||||
"scoped_annotation.h",
|
||||
|
Loading…
x
Reference in New Issue
Block a user