From 6c372f244b37a1dc4dbd735aa1fc981611be5877 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Wed, 26 Feb 2020 10:10:14 +0800 Subject: [PATCH] add different pattern to show op in timeline --- tensorflow/python/client/timeline.py | 71 +++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/client/timeline.py b/tensorflow/python/client/timeline.py index c3f38294b5e..760b1747f73 100644 --- a/tensorflow/python/client/timeline.py +++ b/tensorflow/python/client/timeline.py @@ -359,7 +359,8 @@ class Timeline(object): graph: (Optional) The 'Graph' that was executed. """ - self._step_stats = step_stats + self._origin_step_stats = step_stats + self._step_stats = None self._graph = graph self._chrome_trace = _ChromeTraceFormatter() self._next_pid = 0 @@ -396,6 +397,17 @@ class Timeline(object): inputs = inputs.split(', ') return nn, op, inputs + def _parse_kernel_label(self, label, node_name): + """Parses the fields in a node timeline label.""" + # Expects labels of the form: retval (arg) detail @@annotation + match = re.match(r'.*@@(.*)\#id.*', label) + if match is not None: + node_name = match.group(1) + # Node names should always have the form 'name:op'. + fields = node_name.split(':') + ['unknown'] + name, op = fields[:2] + return name, op + def _assign_lanes(self): """Assigns non-overlapping lanes for the activities on each device.""" for device_stats in self._step_stats.dev_stats: @@ -427,9 +439,7 @@ class Timeline(object): tid = nodestats.thread_id inputs = [] if is_gputrace: - # Node names should always have the form 'name:op'. - fields = node_name.split(':') + ['unknown'] - node_name, op = fields[:2] + node_name, op = self._parse_kernel_label(nodestats.timeline_label, node_name) elif node_name == 'RecvTensor': # RPC tracing does not use the standard timeline_label format. op = 'RecvTensor' @@ -607,7 +617,47 @@ class Timeline(object): total_bytes) self._allocator_maximums = alloc_maxes - def analyze_step_stats(self, show_dataflow=True, show_memory=True): + def _preprocess_op_time(self, op_time): + """Modified the start and end of ops in step stats""" + if op_time == "schedule": + return + self._step_stats = copy.deepcopy(self._origin_step_stats) + # Separate job task and gpu tracer stream + stream_all_stats = [] + job_stats = [] + for stats in self._step_stats.dev_stats: + if "/stream:all" in stats.device: + stream_all_stats.append(stats) + elif "/job" in stats.device: + job_stats.append(stats) + + # Record the start time of the first kernel and the end time of + # the last kernel for all ops. + op_gpu_start = {} + op_gpu_end = {} + for stats in stream_all_stats: + for kernel in stats.node_stats: + name, _ = self._parse_kernel_label(kernel.timeline_label, kernel.node_name) + start = kernel.all_start_micros + end = kernel.all_start_micros + kernel.all_end_rel_micros + if name in op_gpu_start: + op_gpu_start[name] = min(op_gpu_start[name], start) + op_gpu_end[name] = max(op_gpu_end[name], end) + else: + op_gpu_start[name] = start + op_gpu_end[name] = end + + # Update the start and end time of each op according to the op_timement + for stats in job_stats: + for op in stats.node_stats: + if op.node_name in op_gpu_start: + end = max(op_gpu_end[op.node_name], op.all_start_micros + op.all_end_rel_micros) + if op_time == "gpu": + op.all_start_micros = op_gpu_start[op.node_name] + op.all_end_rel_micros = end - op.all_start_micros + + def analyze_step_stats(self, show_dataflow=True, show_memory=True, op_time="schedule"): + self._preprocess_op_time(op_time) self._allocate_pids() self._assign_lanes() self._analyze_tensors(show_memory) @@ -618,7 +668,7 @@ class Timeline(object): chrome_trace=self._chrome_trace, allocator_maximums=self._allocator_maximums) - def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False): + def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False, op_time="schedule"): """Produces a trace in Chrome Trace Format. Args: @@ -626,11 +676,18 @@ class Timeline(object): connecting producers and consumers of tensors. show_memory: (Optional.) If True, add object snapshot events to the trace showing the sizes and lifetimes of tensors. + op_time: (Optional.) How the execution time of op is shown in timeline. + Possible values are "schedule", "gpu" and "all". + "schedule" will show op from the time it is scheduled to the end of the scheduling. + Notice by the end of its scheduling its async kernels may not start yet. + It is shown using the default value from step_stats. + "gpu" will show op with the execution time of its kernels. + "all" will show op from the start of its scheduling to the end of its last kernel. Returns: A JSON formatted string in Chrome Trace format. """ step_stats_analysis = self.analyze_step_stats( - show_dataflow=show_dataflow, show_memory=show_memory) + show_dataflow=show_dataflow, show_memory=show_memory, op_time=op_time) return step_stats_analysis.chrome_trace.format_to_string(pretty=True)