add different pattern to show op in timeline

2020-02-26 10:10:14 +08:00 · 2020-02-26 10:10:14 +08:00 · 6c372f244b
commit 6c372f244b
parent 42e6af1ac7
1 changed files with 64 additions and 7 deletions
--- a/tensorflow/python/client/timeline.py
+++ b/tensorflow/python/client/timeline.py
@ -359,7 +359,8 @@ class Timeline(object):
      graph: (Optional) The 'Graph' that was executed.
    """
-    self._step_stats = step_stats
+    self._origin_step_stats = step_stats
    self._step_stats = None
    self._graph = graph
    self._chrome_trace = _ChromeTraceFormatter()
    self._next_pid = 0
@ -396,6 +397,17 @@ class Timeline(object):
      inputs = inputs.split(', ')
    return nn, op, inputs
  def _parse_kernel_label(self, label, node_name):
    """Parses the fields in a node timeline label."""
    # Expects labels of the form: retval (arg) detail @@annotation
    match = re.match(r'.*@@(.*)\#id.*', label)
    if match is not None:
      node_name = match.group(1)
    # Node names should always have the form 'name:op'.
    fields = node_name.split(':') + ['unknown']
    name, op = fields[:2]
    return name, op
  def _assign_lanes(self):
    """Assigns non-overlapping lanes for the activities on each device."""
    for device_stats in self._step_stats.dev_stats:
@ -427,9 +439,7 @@ class Timeline(object):
    tid = nodestats.thread_id
    inputs = []
    if is_gputrace:
-      # Node names should always have the form 'name:op'.
+      node_name, op = self._parse_kernel_label(nodestats.timeline_label, node_name)
      fields = node_name.split(':') + ['unknown']
      node_name, op = fields[:2]
    elif node_name == 'RecvTensor':
      # RPC tracing does not use the standard timeline_label format.
      op = 'RecvTensor'
@ -607,7 +617,47 @@ class Timeline(object):
                                        total_bytes)
    self._allocator_maximums = alloc_maxes
-  def analyze_step_stats(self, show_dataflow=True, show_memory=True):
+  def _preprocess_op_time(self, op_time):
    """Modified the start and end of ops in step stats"""
    if op_time == "schedule":
      return
    self._step_stats = copy.deepcopy(self._origin_step_stats)
    # Separate job task and gpu tracer stream
    stream_all_stats = []
    job_stats = []
    for stats in self._step_stats.dev_stats:
      if "/stream:all" in stats.device:
        stream_all_stats.append(stats)
      elif "/job" in stats.device:
        job_stats.append(stats)
    # Record the start time of the first kernel and the end time of
    # the last kernel for all ops.
    op_gpu_start = {}
    op_gpu_end = {}
    for stats in stream_all_stats:
      for kernel in stats.node_stats:
        name, _ = self._parse_kernel_label(kernel.timeline_label, kernel.node_name)
        start = kernel.all_start_micros
        end = kernel.all_start_micros + kernel.all_end_rel_micros
        if name in op_gpu_start:
          op_gpu_start[name] = min(op_gpu_start[name], start)
          op_gpu_end[name] = max(op_gpu_end[name], end)
        else:
          op_gpu_start[name] = start
          op_gpu_end[name] = end
    # Update the start and end time of each op according to the op_timement
    for stats in job_stats:
      for op in stats.node_stats:
        if op.node_name in op_gpu_start:
          end = max(op_gpu_end[op.node_name], op.all_start_micros + op.all_end_rel_micros)
          if op_time == "gpu":
            op.all_start_micros = op_gpu_start[op.node_name]
          op.all_end_rel_micros = end - op.all_start_micros
  def analyze_step_stats(self, show_dataflow=True, show_memory=True, op_time="schedule"):
    self._preprocess_op_time(op_time)
    self._allocate_pids()
    self._assign_lanes()
    self._analyze_tensors(show_memory)
@ -618,7 +668,7 @@ class Timeline(object):
        chrome_trace=self._chrome_trace,
        allocator_maximums=self._allocator_maximums)
-  def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False):
+  def generate_chrome_trace_format(self, show_dataflow=True, show_memory=False, op_time="schedule"):
    """Produces a trace in Chrome Trace Format.
    Args:
@ -626,11 +676,18 @@ class Timeline(object):
        connecting producers and consumers of tensors.
      show_memory: (Optional.) If True, add object snapshot events to the trace
        showing the sizes and lifetimes of tensors.
      op_time: (Optional.) How the execution time of op is shown in timeline.
        Possible values are "schedule", "gpu" and "all".
        "schedule" will show op from the time it is scheduled to the end of the scheduling.
          Notice by the end of its scheduling its async kernels may not start yet.
          It is shown using the default value from step_stats.
        "gpu" will show op with the execution time of its kernels.
        "all" will show op from the start of its scheduling to the end of its last kernel.
    Returns:
      A JSON formatted string in Chrome Trace format.
    """
    step_stats_analysis = self.analyze_step_stats(
-        show_dataflow=show_dataflow, show_memory=show_memory)
+        show_dataflow=show_dataflow, show_memory=show_memory, op_time=op_time)
    return step_stats_analysis.chrome_trace.format_to_string(pretty=True)