add different pattern to show op in timeline
This commit is contained in:
parent
42e6af1ac7
commit
6c372f244b
@ -359,7 +359,8 @@ class Timeline(object):
|
|||||||
graph: (Optional) The 'Graph' that was executed.
|
graph: (Optional) The 'Graph' that was executed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._step_stats = step_stats
|
self._origin_step_stats = step_stats
|
||||||
|
self._step_stats = None
|
||||||
self._graph = graph
|
self._graph = graph
|
||||||
self._chrome_trace = _ChromeTraceFormatter()
|
self._chrome_trace = _ChromeTraceFormatter()
|
||||||
self._next_pid = 0
|
self._next_pid = 0
|
||||||
@ -396,6 +397,17 @@ class Timeline(object):
|
|||||||
inputs = inputs.split(', ')
|
inputs = inputs.split(', ')
|
||||||
return nn, op, inputs
|
return nn, op, inputs
|
||||||
|
|
||||||
|
def _parse_kernel_label(self, label, node_name):
|
||||||
|
"""Parses the fields in a node timeline label."""
|
||||||
|
# Expects labels of the form: retval (arg) detail @@annotation
|
||||||
|
match = re.match(r'.*@@(.*)\#id.*', label)
|
||||||
|
if match is not None:
|
||||||
|
node_name = match.group(1)
|
||||||
|
# Node names should always have the form 'name:op'.
|
||||||
|
fields = node_name.split(':') + ['unknown']
|
||||||
|
name, op = fields[:2]
|
||||||
|
return name, op
|
||||||
|
|
||||||
def _assign_lanes(self):
|
def _assign_lanes(self):
|
||||||
"""Assigns non-overlapping lanes for the activities on each device."""
|
"""Assigns non-overlapping lanes for the activities on each device."""
|
||||||
for device_stats in self._step_stats.dev_stats:
|
for device_stats in self._step_stats.dev_stats:
|
||||||
@ -427,9 +439,7 @@ class Timeline(object):
|
|||||||
tid = nodestats.thread_id
|
tid = nodestats.thread_id
|
||||||
inputs = []
|
inputs = []
|
||||||
if is_gputrace:
|
if is_gputrace:
|
||||||
# Node names should always have the form 'name:op'.
|
node_name, op = self._parse_kernel_label(nodestats.timeline_label, node_name)
|
||||||
fields = node_name.split(':') + ['unknown']
|
|
||||||
node_name, op = fields[:2]
|
|
||||||
elif node_name == 'RecvTensor':
|
elif node_name == 'RecvTensor':
|
||||||
# RPC tracing does not use the standard timeline_label format.
|
# RPC tracing does not use the standard timeline_label format.
|
||||||
op = 'RecvTensor'
|
op = 'RecvTensor'
|
||||||
@ -607,7 +617,47 @@ class Timeline(object):
|
|||||||
total_bytes)
|
total_bytes)
|
||||||
self._allocator_maximums = alloc_maxes
|
self._allocator_maximums = alloc_maxes
|
||||||
|
|
||||||
def analyze_step_stats(self, show_dataflow=True, show_memory=True):
|
def _preprocess_op_time(self, op_time):
|
||||||
|
"""Modified the start and end of ops in step stats"""
|
||||||
|
if op_time == "schedule":
|
||||||
|
return
|
||||||
|
self._step_stats = copy.deepcopy(self._origin_step_stats)
|
||||||
|
# Separate job task and gpu tracer stream
|
||||||
|
stream_all_stats = []
|
||||||
|
job_stats = []
|
||||||
|
for stats in self._step_stats.dev_stats:
|
||||||
|
if "/stream:all" in stats.device:
|
||||||
|
stream_all_stats.append(stats)
|
||||||
|
elif "/job" in stats.device:
|
||||||
|
job_stats.append(stats)
|
||||||
|
|
||||||
|
# Record the start time of the first kernel and the end time of
|
||||||
|
# the last kernel for all ops.
|
||||||
|
op_gpu_start = {}
|
||||||
|
op_gpu_end = {}
|
||||||
|
for stats in stream_all_stats:
|
||||||
|
for kernel in stats.node_stats:
|
||||||
|
name, _ = self._parse_kernel_label(kernel.timeline_label, kernel.node_name)
|
||||||
|
start = kernel.all_start_micros
|
||||||
|
end = kernel.all_start_micros + kernel.all_end_rel_micros
|
||||||
|
if name in op_gpu_start:
|
||||||
|
op_gpu_start[name] = min(op_gpu_start[name], start)
|
||||||
|
op_gpu_end[name] = max(op_gpu_end[name], end)
|
||||||
|
else:
|
||||||
|
op_gpu_start[name] = start
|
||||||
|
op_gpu_end[name] = end
|
||||||
|
|
||||||
|
# Update the start and end time of each op according to the op_timement
|
||||||
|
for stats in job_stats:
|
||||||
|
for op in stats.node_stats:
|
||||||
|
if op.node_name in op_gpu_start:
|
||||||
|
end = max(op_gpu_end[op.node_name], op.all_start_micros + op.all_end_rel_micros)
|
||||||
|
if op_time == "gpu":
|
||||||
|
op.all_start_micros = op_gpu_start[op.node_name]
|
||||||
|
op.all_end_rel_micros = end - op.all_start_micros
|
||||||
|
|
||||||
|
def analyze_step_stats(self, show_dataflow=True, show_memory=True, op_time="schedule"):
|
||||||
|
self._preprocess_op_time(op_time)
|
||||||
self._allocate_pids()
|
self._allocate_pids()
|
||||||
self._assign_lanes()
|
self._assign_lanes()
|
||||||
self._analyze_tensors(show_memory)
|
self._analyze_tensors(show_memory)
|
||||||
@ -618,7 +668,7 @@ class Timeline(object):
|
|||||||
chrome_trace=self._chrome_trace,
|
chrome_trace=self._chrome_trace,
|
||||||
allocator_maximums=self._allocator_maximums)
|
allocator_maximums=self._allocator_maximums)
|
||||||
|
|
||||||
def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False):
|
def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False, op_time="schedule"):
|
||||||
"""Produces a trace in Chrome Trace Format.
|
"""Produces a trace in Chrome Trace Format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -626,11 +676,18 @@ class Timeline(object):
|
|||||||
connecting producers and consumers of tensors.
|
connecting producers and consumers of tensors.
|
||||||
show_memory: (Optional.) If True, add object snapshot events to the trace
|
show_memory: (Optional.) If True, add object snapshot events to the trace
|
||||||
showing the sizes and lifetimes of tensors.
|
showing the sizes and lifetimes of tensors.
|
||||||
|
op_time: (Optional.) How the execution time of op is shown in timeline.
|
||||||
|
Possible values are "schedule", "gpu" and "all".
|
||||||
|
"schedule" will show op from the time it is scheduled to the end of the scheduling.
|
||||||
|
Notice by the end of its scheduling its async kernels may not start yet.
|
||||||
|
It is shown using the default value from step_stats.
|
||||||
|
"gpu" will show op with the execution time of its kernels.
|
||||||
|
"all" will show op from the start of its scheduling to the end of its last kernel.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A JSON formatted string in Chrome Trace format.
|
A JSON formatted string in Chrome Trace format.
|
||||||
"""
|
"""
|
||||||
step_stats_analysis = self.analyze_step_stats(
|
step_stats_analysis = self.analyze_step_stats(
|
||||||
show_dataflow=show_dataflow, show_memory=show_memory)
|
show_dataflow=show_dataflow, show_memory=show_memory, op_time=op_time)
|
||||||
|
|
||||||
return step_stats_analysis.chrome_trace.format_to_string(pretty=True)
|
return step_stats_analysis.chrome_trace.format_to_string(pretty=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user