This change make sure that schedule activities are always recorded during profiling. This Cl also fixed a bug in trace event converter. Now it filters out events whose end time is bigger than the end time of profiler session.
PiperOrigin-RevId: 240899395
This commit is contained in:
parent
d54eb295a6
commit
d75adcad0d
@ -1792,24 +1792,21 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
|
||||
if (TF_PREDICT_FALSE(
|
||||
MightTrace(item, event_collector_, trace_using_annotations_))) {
|
||||
const string& op_name = op_kernel->name();
|
||||
const string kernel_label = strings::StrCat(
|
||||
op_name, ":", op_kernel->type_string(), "#id=", step_id_, "#");
|
||||
tracing::ScopedRegion region(tracing::EventCategory::kCompute,
|
||||
op_name);
|
||||
if (trace_using_annotations_) {
|
||||
// The OpKernel may create child activities (such as GPU kernel
|
||||
// launches), so use a `ScopedAnnotation` to relate these activities
|
||||
// in the trace.
|
||||
tracing::ScopedAnnotation activity(
|
||||
op_name, strings::StrCat(op_kernel->type_string(),
|
||||
"#id=", step_id_, "#"));
|
||||
// 'ScopedActivity' will trace the OpKernel scheduling time.
|
||||
tracing::ScopedActivity activity(kernel_label);
|
||||
// 'ScopedAnnotation' will trace the OpKernel execution time.
|
||||
tracing::ScopedAnnotation annotation(kernel_label);
|
||||
device->Compute(op_kernel, &ctx);
|
||||
} else {
|
||||
// Use the cheaper `ScopedActivity` to trace just the OpKernel
|
||||
// execution.
|
||||
tracing::ScopedActivity activity(
|
||||
op_name,
|
||||
strings::StrCat(op_kernel->type_string(), "#id=", step_id_,
|
||||
"#"),
|
||||
item.kernel_is_expensive);
|
||||
tracing::ScopedActivity activity(kernel_label,
|
||||
item.kernel_is_expensive);
|
||||
device->Compute(op_kernel, &ctx);
|
||||
}
|
||||
} else {
|
||||
|
@ -68,7 +68,8 @@ void AssignLanes(RunMetadata* run_metadata) {
|
||||
|
||||
void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
|
||||
profiler::Trace* trace,
|
||||
const uint64 profile_start_time_micros) {
|
||||
const uint64 profile_start_time_micros,
|
||||
const uint64 profile_end_time_micros) {
|
||||
AssignLanes(run_metadata);
|
||||
auto trace_devices = trace->mutable_devices();
|
||||
|
||||
@ -95,7 +96,9 @@ void ConvertRunMetadataToTraceEvent(RunMetadata* run_metadata,
|
||||
// Emit events.
|
||||
for (auto node :
|
||||
run_metadata->step_stats().dev_stats(device_id).node_stats()) {
|
||||
if (node.all_start_micros() < profile_start_time_micros) {
|
||||
if (node.all_start_micros() < profile_start_time_micros ||
|
||||
node.all_start_micros() + node.all_end_rel_micros() >
|
||||
profile_end_time_micros) {
|
||||
continue;
|
||||
}
|
||||
auto* event = trace->add_trace_events();
|
||||
@ -146,7 +149,9 @@ Status ProfilerSession::SerializeToString(string* content) {
|
||||
|
||||
profiler::Trace trace;
|
||||
|
||||
ConvertRunMetadataToTraceEvent(&run_metadata, &trace, start_time_micros_);
|
||||
ConvertRunMetadataToTraceEvent(
|
||||
&run_metadata, &trace, start_time_micros_,
|
||||
Env::Default()->NowNanos() / EnvTime::kMicrosToNanos);
|
||||
|
||||
trace.SerializeToString(content);
|
||||
return Status::OK();
|
||||
|
Loading…
Reference in New Issue
Block a user