This change make sure that schedule activities are always recorded during profiling. This Cl also fixed a bug in trace event converter. Now it filters out events whose end time is bigger than the end time of profiler session.

PiperOrigin-RevId: 240899395
This commit is contained in:
Xiao Yu 2019-03-28 18:51:22 -07:00 committed by TensorFlower Gardener
parent d54eb295a6
commit d75adcad0d
2 changed files with 16 additions and 14 deletions

View File

@ -1792,24 +1792,21 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
if (TF_PREDICT_FALSE(
MightTrace(item, event_collector_, trace_using_annotations_))) {
const string& op_name = op_kernel->name();
const string kernel_label = strings::StrCat(
op_name, ":", op_kernel->type_string(), "#id=", step_id_, "#");
tracing::ScopedRegion region(tracing::EventCategory::kCompute,
op_name);
if (trace_using_annotations_) {
// The OpKernel may create child activities (such as GPU kernel
// launches), so use a `ScopedAnnotation` to relate these activities
// in the trace.
tracing::ScopedAnnotation activity(
op_name, strings::StrCat(op_kernel->type_string(),
"#id=", step_id_, "#"));
// 'ScopedActivity' will trace the OpKernel scheduling time.
tracing::ScopedActivity activity(kernel_label);
// 'ScopedAnnotation' will trace the OpKernel execution time.
tracing::ScopedAnnotation annotation(kernel_label);
device->Compute(op_kernel, &ctx);
} else {
// Use the cheaper `ScopedActivity` to trace just the OpKernel
// execution.
tracing::ScopedActivity activity(
op_name,
strings::StrCat(op_kernel->type_string(), "#id=", step_id_,
"#"),
item.kernel_is_expensive);
tracing::ScopedActivity activity(kernel_label,
item.kernel_is_expensive);
device->Compute(op_kernel, &ctx);
}
} else {

View File

@ -68,7 +68,8 @@ void AssignLanes(RunMetadata* run_metadata) {
void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
profiler::Trace* trace,
const uint64 profile_start_time_micros) {
const uint64 profile_start_time_micros,
const uint64 profile_end_time_micros) {
AssignLanes(run_metadata);
auto trace_devices = trace->mutable_devices();
@ -95,7 +96,9 @@ void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
// Emit events.
for (auto node :
run_metadata->step_stats().dev_stats(device_id).node_stats()) {
if (node.all_start_micros() < profile_start_time_micros) {
if (node.all_start_micros() < profile_start_time_micros ||
node.all_start_micros() + node.all_end_rel_micros() >
profile_end_time_micros) {
continue;
}
auto* event = trace->add_trace_events();
@ -146,7 +149,9 @@ Status ProfilerSession::SerializeToString(string* content) {
profiler::Trace trace;
ConvertRunMetadataToTraceEvent(&run_metadata, &trace, start_time_micros_);
ConvertRunMetadataToTraceEvent(
&run_metadata, &trace, start_time_micros_,
Env::Default()->NowNanos() / EnvTime::kMicrosToNanos);
trace.SerializeToString(content);
return Status::OK();