tfdbg CLI: let list_tensors (lt) output display dump file size

Also let it allow sorting of the dumped tensors by dump file size. Change: 143511153
2017-01-03 19:15:24 -08:00 · 2017-01-03 19:15:24 -08:00 · 271b7f34f4
commit 271b7f34f4
parent 6703501903
10 changed files with 333 additions and 37 deletions
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@ -441,6 +441,7 @@ cuda_py_test(
    ],
    additional_deps = [
        ":analyzer_cli",
+        ":command_parser",
        ":debug_data",
        ":debug_utils",
        ":debugger_cli_common",
--- a/tensorflow/python/debug/cli/analyzer_cli.py
+++ b/tensorflow/python/debug/cli/analyzer_cli.py
@ -51,6 +51,7 @@ CTRL_LABEL = "(Ctrl) "
 ELLIPSIS = "..."

 SORT_TENSORS_BY_TIMESTAMP = "timestamp"
+SORT_TENSORS_BY_DUMP_SIZE = "dump_size"
 SORT_TENSORS_BY_OP_TYPE = "op_type"
 SORT_TENSORS_BY_TENSOR_NAME = "tensor_name"

@ -173,11 +174,9 @@ class DebugAnalyzer(object):
        dest="sort_by",
        type=str,
        default=SORT_TENSORS_BY_TIMESTAMP,
-        help=("the field to sort the data by: " +
-              "(%s | %s | %s)" %
-              (SORT_TENSORS_BY_TIMESTAMP,
-               SORT_TENSORS_BY_OP_TYPE,
-               SORT_TENSORS_BY_TENSOR_NAME)))
+        help=("the field to sort the data by: (%s | %s | %s | %s)" %
+              (SORT_TENSORS_BY_TIMESTAMP, SORT_TENSORS_BY_DUMP_SIZE,
+               SORT_TENSORS_BY_OP_TYPE, SORT_TENSORS_BY_TENSOR_NAME)))
    ap.add_argument(
        "-r",
        "--reverse",
@ -431,7 +430,7 @@ class DebugAnalyzer(object):

    # TODO(cais): Implement filter by lambda on tensor value.

-    max_timestamp_width, max_op_type_width = (
+    max_timestamp_width, max_dump_size_width, max_op_type_width = (
        self._measure_tensor_list_column_widths(data_to_show))

    # Sort the data.
@ -440,7 +439,7 @@ class DebugAnalyzer(object):

    output.extend(
        self._tensor_list_column_heads(parsed, max_timestamp_width,
-                                       max_op_type_width))
+                                       max_dump_size_width, max_op_type_width))

    dump_count = 0
    for dump in data_to_show:
@ -453,13 +452,17 @@ class DebugAnalyzer(object):
          continue

      rel_time = (dump.timestamp - self._debug_dump.t0) / 1000.0
+      dump_size_str = cli_shared.bytes_to_readable_str(dump.dump_size_bytes)
      dumped_tensor_name = "%s:%d" % (dump.node_name, dump.output_slot)
      op_type = self._debug_dump.node_op_type(dump.node_name)

      line = "[%.3f]" % rel_time
      line += " " * (max_timestamp_width - len(line))
+      line += dump_size_str
+      line += " " * (max_timestamp_width + max_dump_size_width - len(line))
      line += op_type
-      line += " " * (max_timestamp_width + max_op_type_width - len(line))
+      line += " " * (max_timestamp_width + max_dump_size_width +
+                     max_op_type_width - len(line))
      line += " %s" % dumped_tensor_name

      output.append(
@ -492,6 +495,7 @@ class DebugAnalyzer(object):

    Returns:
      (int) maximum width of the timestamp column. 0 if data is empty.
+      (int) maximum width of the dump size column. 0 if data is empty.
      (int) maximum width of the op type column. 0 if data is empty.
    """

@ -500,13 +504,19 @@ class DebugAnalyzer(object):
      max_rel_time_ms = (data[-1].timestamp - self._debug_dump.t0) / 1000.0
      max_timestamp_width = len("[%.3f] " % max_rel_time_ms)

+    max_dump_size_width = 0
+    for dump in data:
+      dump_size_str = cli_shared.bytes_to_readable_str(dump.dump_size_bytes)
+      if len(dump_size_str) + 1 > max_dump_size_width:
+        max_dump_size_width = len(dump_size_str) + 1
+
    max_op_type_width = 0
    for dump in data:
      op_type = self._debug_dump.node_op_type(dump.node_name)
      if len(op_type) > max_op_type_width:
        max_op_type_width = len(op_type)

-    return max_timestamp_width, max_op_type_width
+    return max_timestamp_width, max_dump_size_width, max_op_type_width

  def _sort_dump_data_by(self, data, sort_by, reverse):
    """Sort a list of DebugTensorDatum in specified order.
@ -528,6 +538,8 @@ class DebugAnalyzer(object):
          data,
          reverse=reverse,
          key=lambda x: x.timestamp)
+    elif sort_by == SORT_TENSORS_BY_DUMP_SIZE:
+      return sorted(data, reverse=reverse, key=lambda x: x.dump_size_bytes)
    elif sort_by == SORT_TENSORS_BY_OP_TYPE:
      return sorted(
          data,
@ -542,12 +554,13 @@ class DebugAnalyzer(object):
      raise ValueError("Unsupported key to sort tensors by: %s" % sort_by)

  def _tensor_list_column_heads(self, parsed, max_timestamp_width,
-                                max_op_type_width):
+                                max_dump_size_width, max_op_type_width):
    """Generate a line containing the column heads of the tensor list.

    Args:
      parsed: Parsed arguments (by argparse) of the list_tensors command.
      max_timestamp_width: (int) maximum width of the timestamp column.
+      max_dump_size_width: (int) maximum width of the dump size column.
      max_op_type_width: (int) maximum width of the op type column.

    Returns:
@ -564,30 +577,43 @@ class DebugAnalyzer(object):

    attr_segs = {0: []}
    row = "t (ms)"
-    command = "%s -s timestamp" % base_command
-    if parsed.sort_by == "timestamp" and not parsed.reverse:
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_TIMESTAMP)
+    if parsed.sort_by == SORT_TENSORS_BY_TIMESTAMP and not parsed.reverse:
      command += " -r"
    attr_segs[0].append(
        (0, len(row), [debugger_cli_common.MenuItem(None, command), "bold"]))
    row += " " * (max_timestamp_width - len(row))

    prev_len = len(row)
-    row += "Op type"
-    command = "%s -s op_type" % base_command
-    if parsed.sort_by == "op_type" and not parsed.reverse:
+    row += "Size"
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_DUMP_SIZE)
+    if parsed.sort_by == SORT_TENSORS_BY_DUMP_SIZE and not parsed.reverse:
      command += " -r"
    attr_segs[0].append((prev_len, len(row),
                         [debugger_cli_common.MenuItem(None, command), "bold"]))
-    row += " " * (max_op_type_width + max_timestamp_width - len(row))
+    row += " " * (max_dump_size_width + max_timestamp_width - len(row))
+
+    prev_len = len(row)
+    row += "Op type"
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_OP_TYPE)
+    if parsed.sort_by == SORT_TENSORS_BY_OP_TYPE and not parsed.reverse:
+      command += " -r"
+    attr_segs[0].append((prev_len, len(row),
+                         [debugger_cli_common.MenuItem(None, command), "bold"]))
+    row += " " * (
+        max_op_type_width + max_dump_size_width + max_timestamp_width - len(row)
+    )

    prev_len = len(row)
    row += " Tensor name"
-    command = "%s -s tensor_name" % base_command
-    if parsed.sort_by == "tensor_name" and not parsed.reverse:
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_TENSOR_NAME)
+    if parsed.sort_by == SORT_TENSORS_BY_TENSOR_NAME and not parsed.reverse:
      command += " -r"
    attr_segs[0].append((prev_len + 1, len(row),
                         [debugger_cli_common.MenuItem("", command), "bold"]))
-    row += " " * (max_op_type_width + max_timestamp_width - len(row))
+    row += " " * (
+        max_op_type_width + max_dump_size_width + max_timestamp_width - len(row)
+    )

    return debugger_cli_common.RichTextLines([row], font_attr_segs=attr_segs)

--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@ -28,6 +28,7 @@ from tensorflow.python.client import session
 from tensorflow.python.debug import debug_data
 from tensorflow.python.debug import debug_utils
 from tensorflow.python.debug.cli import analyzer_cli
+from tensorflow.python.debug.cli import command_parser
 from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
@ -135,37 +136,51 @@ def assert_listed_tensors(tst,

  # Verify the command shortcuts in the top row.
  attr_segs = out.font_attr_segs[line_counter]
-  tst.assertEqual(0, attr_segs[0][0])
-  tst.assertEqual(len("t (ms)"), attr_segs[0][1])
-  command = attr_segs[0][2][0].content
+  attr_seg = attr_segs[0]
+  tst.assertEqual(0, attr_seg[0])
+  tst.assertEqual(len("t (ms)"), attr_seg[1])
+  command = attr_seg[2][0].content
  tst.assertIn("-s timestamp", command)
  assert_column_header_command_shortcut(
      tst, command, reverse, node_name_regex, op_type_regex,
      tensor_filter_name)
-  tst.assertEqual("bold", attr_segs[0][2][1])
+  tst.assertEqual("bold", attr_seg[2][1])
+
+  idx0 = line.index("Size")
+  attr_seg = attr_segs[1]
+  tst.assertEqual(idx0, attr_seg[0])
+  tst.assertEqual(idx0 + len("Size"), attr_seg[1])
+  command = attr_seg[2][0].content
+  tst.assertIn("-s dump_size", command)
+  assert_column_header_command_shortcut(tst, command, reverse, node_name_regex,
+                                        op_type_regex, tensor_filter_name)
+  tst.assertEqual("bold", attr_seg[2][1])

  idx0 = line.index("Op type")
-  tst.assertEqual(idx0, attr_segs[1][0])
-  tst.assertEqual(idx0 + len("Op type"), attr_segs[1][1])
-  command = attr_segs[1][2][0].content
+  attr_seg = attr_segs[2]
+  tst.assertEqual(idx0, attr_seg[0])
+  tst.assertEqual(idx0 + len("Op type"), attr_seg[1])
+  command = attr_seg[2][0].content
  tst.assertIn("-s op_type", command)
  assert_column_header_command_shortcut(
      tst, command, reverse, node_name_regex, op_type_regex,
      tensor_filter_name)
-  tst.assertEqual("bold", attr_segs[1][2][1])
+  tst.assertEqual("bold", attr_seg[2][1])

  idx0 = line.index("Tensor name")
-  tst.assertEqual(idx0, attr_segs[2][0])
-  tst.assertEqual(idx0 + len("Tensor name"), attr_segs[2][1])
-  command = attr_segs[2][2][0].content
+  attr_seg = attr_segs[3]
+  tst.assertEqual(idx0, attr_seg[0])
+  tst.assertEqual(idx0 + len("Tensor name"), attr_seg[1])
+  command = attr_seg[2][0].content
  tst.assertIn("-s tensor_name", command)
  assert_column_header_command_shortcut(
      tst, command, reverse, node_name_regex, op_type_regex,
      tensor_filter_name)
-  tst.assertEqual("bold", attr_segs[2][2][1])
+  tst.assertEqual("bold", attr_seg[2][1])

  # Verify the listed tensors and their timestamps.
  tensor_timestamps = []
+  dump_sizes_bytes = []
  op_types = []
  tensor_names = []
  for line in line_iter:
@ -176,8 +191,9 @@ def assert_listed_tensors(tst,
    tst.assertGreaterEqual(rel_time, 0.0)

    tensor_timestamps.append(rel_time)
-    op_types.append(items[1])
-    tensor_names.append(items[2])
+    dump_sizes_bytes.append(command_parser.parse_readable_size_str(items[1]))
+    op_types.append(items[2])
+    tensor_names.append(items[3])

  # Verify that the tensors should be listed in ascending order of their
  # timestamps.
@ -186,6 +202,11 @@ def assert_listed_tensors(tst,
    if reverse:
      sorted_timestamps.reverse()
    tst.assertEqual(sorted_timestamps, tensor_timestamps)
+  elif sort_by == "dump_size":
+    sorted_dump_sizes_bytes = sorted(dump_sizes_bytes)
+    if reverse:
+      sorted_dump_sizes_bytes.reverse()
+    tst.assertEqual(sorted_dump_sizes_bytes, dump_sizes_bytes)
  elif sort_by == "op_type":
    sorted_op_types = sorted(op_types)
    if reverse:
@ -353,7 +374,6 @@ def assert_node_attribute_lines(tst,
        while True:
          for i in range(5):
            line = next(line_iter)
-            print(line)
            if i == 0:
              tst.assertEqual(depth_counter, int(line.split(":")[0]))
            elif i == 1:
@ -564,6 +584,33 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
        reverse=True)
    check_main_menu(self, out, list_tensors_enabled=False)

+  def testListTensorsInDumpSizeOrderWorks(self):
+    out = self._registry.dispatch_command("lt", ["-s", "dump_size"])
+    assert_listed_tensors(
+        self,
+        out, [
+            "simple_mul_add/u:0", "simple_mul_add/v:0",
+            "simple_mul_add/u/read:0", "simple_mul_add/v/read:0",
+            "simple_mul_add/matmul:0", "simple_mul_add/add:0"
+        ],
+        ["VariableV2", "VariableV2", "Identity", "Identity", "MatMul", "Add"],
+        sort_by="dump_size")
+    check_main_menu(self, out, list_tensors_enabled=False)
+
+  def testListTensorsInReverseDumpSizeOrderWorks(self):
+    out = self._registry.dispatch_command("lt", ["-s", "dump_size", "-r"])
+    assert_listed_tensors(
+        self,
+        out, [
+            "simple_mul_add/u:0", "simple_mul_add/v:0",
+            "simple_mul_add/u/read:0", "simple_mul_add/v/read:0",
+            "simple_mul_add/matmul:0", "simple_mul_add/add:0"
+        ],
+        ["VariableV2", "VariableV2", "Identity", "Identity", "MatMul", "Add"],
+        sort_by="dump_size",
+        reverse=True)
+    check_main_menu(self, out, list_tensors_enabled=False)
+
  def testListTensorsWithInvalidSortByFieldGivesError(self):
    out = self._registry.dispatch_command("lt", ["-s", "foobar"])
    self.assertIn("ValueError: Unsupported key to sort tensors by: foobar",
--- a/tensorflow/python/debug/cli/cli_shared.py
+++ b/tensorflow/python/debug/cli/cli_shared.py
@ -32,6 +32,36 @@ from tensorflow.python.ops import variables
 DEFAULT_NDARRAY_DISPLAY_THRESHOLD = 2000


+def bytes_to_readable_str(num_bytes, include_b=False):
+  """Generate a human-readable string representing number of bytes.
+
+  The units B, kB, MB and GB are used.
+
+  Args:
+    num_bytes: (`int` or None) Number of bytes.
+    include_b: (`bool`) Include the letter B at the end of the unit.
+
+  Returns:
+    (`str`) A string representing the number of bytes in a human-readable way,
+      including a unit at the end.
+  """
+
+  if num_bytes is None:
+    return str(num_bytes)
+  if num_bytes < 1024:
+    result = "%d" % num_bytes
+  elif num_bytes < 1048576:
+    result = "%.2fk" % (num_bytes / 1024.0)
+  elif num_bytes < 1073741824:
+    result = "%.2fM" % (num_bytes / 1048576.0)
+  else:
+    result = "%.2fG" % (num_bytes / 1073741824.0)
+
+  if include_b:
+    result += "B"
+  return result
+
+
 def parse_ranges_highlight(ranges_string):
  """Process ranges highlight string.

--- a/tensorflow/python/debug/cli/cli_shared_test.py
+++ b/tensorflow/python/debug/cli/cli_shared_test.py
@ -30,6 +30,46 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest


+class BytesToReadableStrTest(test_util.TensorFlowTestCase):
+
+  def testNoneSizeWorks(self):
+    self.assertEqual(str(None), cli_shared.bytes_to_readable_str(None))
+
+  def testSizesBelowOneKiloByteWorks(self):
+    self.assertEqual("0", cli_shared.bytes_to_readable_str(0))
+    self.assertEqual("500", cli_shared.bytes_to_readable_str(500))
+    self.assertEqual("1023", cli_shared.bytes_to_readable_str(1023))
+
+  def testSizesBetweenOneKiloByteandOneMegaByteWorks(self):
+    self.assertEqual("1.00k", cli_shared.bytes_to_readable_str(1024))
+    self.assertEqual("2.40k", cli_shared.bytes_to_readable_str(int(1024 * 2.4)))
+    self.assertEqual("1023.00k", cli_shared.bytes_to_readable_str(1024 * 1023))
+
+  def testSizesBetweenOneMegaByteandOneGigaByteWorks(self):
+    self.assertEqual("1.00M", cli_shared.bytes_to_readable_str(1024**2))
+    self.assertEqual("2.40M",
+                     cli_shared.bytes_to_readable_str(int(1024**2 * 2.4)))
+    self.assertEqual("1023.00M",
+                     cli_shared.bytes_to_readable_str(1024**2 * 1023))
+
+  def testSizeAboveOneGigaByteWorks(self):
+    self.assertEqual("1.00G", cli_shared.bytes_to_readable_str(1024**3))
+    self.assertEqual("2000.00G",
+                     cli_shared.bytes_to_readable_str(1024**3 * 2000))
+
+  def testReadableStrIncludesBAtTheEndOnRequest(self):
+    self.assertEqual("0B", cli_shared.bytes_to_readable_str(0, include_b=True))
+    self.assertEqual(
+        "1.00kB", cli_shared.bytes_to_readable_str(
+            1024, include_b=True))
+    self.assertEqual(
+        "1.00MB", cli_shared.bytes_to_readable_str(
+            1024**2, include_b=True))
+    self.assertEqual(
+        "1.00GB", cli_shared.bytes_to_readable_str(
+            1024**3, include_b=True))
+
+
 class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):

  def setUp(self):
--- a/tensorflow/python/debug/cli/command_parser.py
+++ b/tensorflow/python/debug/cli/command_parser.py
@ -214,6 +214,40 @@ def parse_ranges(range_string):
  return ranges


+def parse_readable_size_str(size_str):
+  """Convert a human-readable str representation to number of bytes.
+
+  Only the units "kB", "MB", "GB" are supported. The "B character at the end
+  of the input `str` may be omitted.
+
+  Args:
+    size_str: (`str`) A human-readable str representing a number of bytes
+      (e.g., "0", "1023", "1.1kB", "24 MB", "23GB", "100 G".
+
+  Returns:
+    (`int`) The parsed number of bytes.
+
+  Raises:
+    ValueError: on failure to parse the input `size_str`.
+  """
+
+  size_str = size_str.strip()
+  if size_str.endswith("B"):
+    size_str = size_str[:-1]
+
+  if size_str.isdigit():
+    return int(size_str)
+  elif size_str.endswith("k"):
+    return int(float(size_str[:-1]) * 1024)
+  elif size_str.endswith("M"):
+    return int(float(size_str[:-1]) * 1048576)
+  elif size_str.endswith("G"):
+    return int(float(size_str[:-1]) * 1073741824)
+  else:
+    raise ValueError("Failed to parsed human-readable byte size str: \"%s\"" %
+                     size_str)
+
+
 def evaluate_tensor_slice(tensor, tensor_slicing):
  """Call eval on the slicing of a tensor, with validation.

--- a/tensorflow/python/debug/cli/command_parser_test.py
+++ b/tensorflow/python/debug/cli/command_parser_test.py
@ -251,5 +251,51 @@ class ParseRangesTest(test_util.TensorFlowTestCase):
      command_parser.parse_ranges("[1, 1j]")


+class ParseReadableSizeStrTest(test_util.TensorFlowTestCase):
+
+  def testParseNoUnitWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0"))
+    self.assertEqual(1024, command_parser.parse_readable_size_str("1024 "))
+    self.assertEqual(2000, command_parser.parse_readable_size_str(" 2000 "))
+
+  def testParseKiloBytesWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0kB"))
+    self.assertEqual(1024**2, command_parser.parse_readable_size_str("1024 kB"))
+    self.assertEqual(1024**2 * 2,
+                     command_parser.parse_readable_size_str("2048k"))
+    self.assertEqual(1024**2 * 2,
+                     command_parser.parse_readable_size_str("2048kB"))
+    self.assertEqual(1024 / 4, command_parser.parse_readable_size_str("0.25k"))
+
+  def testParseMegaBytesWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0MB"))
+    self.assertEqual(1024**3, command_parser.parse_readable_size_str("1024 MB"))
+    self.assertEqual(1024**3 * 2,
+                     command_parser.parse_readable_size_str("2048M"))
+    self.assertEqual(1024**3 * 2,
+                     command_parser.parse_readable_size_str("2048MB"))
+    self.assertEqual(1024**2 / 4,
+                     command_parser.parse_readable_size_str("0.25M"))
+
+  def testParseGigaBytesWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0GB"))
+    self.assertEqual(1024**4, command_parser.parse_readable_size_str("1024 GB"))
+    self.assertEqual(1024**4 * 2,
+                     command_parser.parse_readable_size_str("2048G"))
+    self.assertEqual(1024**4 * 2,
+                     command_parser.parse_readable_size_str("2048GB"))
+    self.assertEqual(1024**3 / 4,
+                     command_parser.parse_readable_size_str("0.25G"))
+
+  def testParseUnsupportedUnitRaisesException(self):
+    with self.assertRaisesRegexp(
+        ValueError, "Failed to parsed human-readable byte size str: \"0foo\""):
+      command_parser.parse_readable_size_str("0foo")
+
+    with self.assertRaisesRegexp(
+        ValueError, "Failed to parsed human-readable byte size str: \"2E\""):
+      command_parser.parse_readable_size_str("2EB")
+
+
 if __name__ == "__main__":
  googletest.main()
--- a/tensorflow/python/debug/debug_data.py
+++ b/tensorflow/python/debug/debug_data.py
@ -298,6 +298,8 @@ class DebugTensorDatum(object):
      self._node_name = namespace + "/" + node_base_name

    self._file_path = os.path.join(dump_root, debug_dump_rel_path)
+    self._dump_size_bytes = (gfile.Stat(self._file_path).length if
+                             gfile.Exists(self._file_path) else None)

  def __str__(self):
    return "{DebugTensorDatum: %s:%d @ %s @ %d}" % (self.node_name,
@ -385,6 +387,19 @@ class DebugTensorDatum(object):

    return self._file_path

+  @property
+  def dump_size_bytes(self):
+    """Size of the dump file.
+
+    Unit: byte.
+
+    Returns:
+      If the dump file exists, size of the dump file, in bytes.
+      If the dump file does not exist, None.
+    """
+
+    return self._dump_size_bytes
+

 class DebugDumpDir(object):
  """Data set from a debug-dump directory on filesystem.
@ -498,16 +513,22 @@ class DebugDumpDir(object):

    self._watch_key_to_datum = {}
    self._watch_key_to_rel_time = {}
+    self._watch_key_to_dump_size_bytes = {}
    for datum in self._dump_tensor_data:
      if datum.watch_key not in self._watch_key_to_datum:
        self._watch_key_to_datum[datum.watch_key] = [datum]
        self._watch_key_to_rel_time[datum.watch_key] = [
            datum.timestamp - self._t0
        ]
+        self._watch_key_to_dump_size_bytes[datum.watch_key] = [
+            datum.dump_size_bytes
+        ]
      else:
        self._watch_key_to_datum[datum.watch_key].append(datum)
        self._watch_key_to_rel_time[datum.watch_key].append(datum.timestamp -
                                                            self._t0)
+        self._watch_key_to_dump_size_bytes[datum.watch_key].append(
+            datum.dump_size_bytes)

  def set_python_graph(self, python_graph):
    """Provide Python `Graph` object to the wrapper.
@ -1193,10 +1214,10 @@ class DebugDumpDir(object):
      debug_op: (`str`) name of the debug op.

    Returns:
-      (list of int) list of relative timestamps.
+      (`list` of `int`) list of relative timestamps.

    Raises:
-      ValueError: If the tensor does not exist in the debub dump data.
+      ValueError: If the tensor watch key does not exist in the debug dump data.
    """

    watch_key = _get_tensor_watch_key(node_name, output_slot, debug_op)
@ -1206,6 +1227,30 @@ class DebugDumpDir(object):

    return self._watch_key_to_rel_time[watch_key]

+  def get_dump_sizes_bytes(self, node_name, output_slot, debug_op):
+    """Get the sizes of the dump files for a debug-dumped tensor.
+
+    Unit of the file size: byte.
+
+    Args:
+      node_name: (`str`) name of the node that the tensor is produced by.
+      output_slot: (`int`) output slot index of tensor.
+      debug_op: (`str`) name of the debug op.
+
+    Returns:
+      (`list` of `int`): list of dump file sizes in bytes.
+
+    Raises:
+      ValueError: If the tensor watch key does not exist in the debug dump data.
+    """
+
+    watch_key = _get_tensor_watch_key(node_name, output_slot, debug_op)
+    if watch_key not in self._watch_key_to_datum:
+      raise ValueError("Watch key \"%s\" does not exist in the debug dump" %
+                       watch_key)
+
+    return self._watch_key_to_dump_size_bytes[watch_key]
+
  def node_traceback(self, element_name):
    """Try to retrieve the Python traceback of node's construction.

--- a/tensorflow/python/debug/debug_data_test.py
+++ b/tensorflow/python/debug/debug_data_test.py
@ -179,6 +179,13 @@ class DebugTensorDatumTest(test_util.TensorFlowTestCase):
                                                              datum.timestamp),
                     repr(datum))

+  def testDumpSizeBytesIsNoneForNonexistentFilePath(self):
+    dump_root = "/tmp/tfdbg_1"
+    debug_dump_rel_path = "ns1/ns2/node_foo_1_2_DebugIdentity_1472563253536385"
+    datum = debug_data.DebugTensorDatum(dump_root, debug_dump_rel_path)
+
+    self.assertIsNone(datum.dump_size_bytes)
+

 class DebugDumpDirTest(test_util.TensorFlowTestCase):

--- a/tensorflow/python/debug/session_debug_testlib.py
+++ b/tensorflow/python/debug/session_debug_testlib.py
@ -150,6 +150,13 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
        results.dump.get_rel_timestamps("%s/read" % results.v_name, 0,
                                        "DebugIdentity")[0], 0)

+    self.assertGreater(
+        results.dump.get_dump_sizes_bytes("%s/read" % results.u_name, 0,
+                                          "DebugIdentity")[0], 0)
+    self.assertGreater(
+        results.dump.get_dump_sizes_bytes("%s/read" % results.v_name, 0,
+                                          "DebugIdentity")[0], 0)
+
  def testGetOpTypeWorks(self):
    results = self._generate_dump_from_simple_addition_graph()

@ -218,6 +225,13 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
          dump.get_rel_timestamps("%s/read" % str2_name, 0, "DebugIdentity")[0],
          0)

+      self.assertGreater(
+          dump.get_dump_sizes_bytes("%s/read" % str1_name, 0,
+                                    "DebugIdentity")[0], 0)
+      self.assertGreater(
+          dump.get_dump_sizes_bytes("%s/read" % str2_name, 0,
+                                    "DebugIdentity")[0], 0)
+
  def testDumpUninitializedVariable(self):
    op_namespace = "testDumpUninitializedVariable"
    with session.Session() as sess:
@ -362,11 +376,17 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
      # Verify ascending timestamps from the while loops.
      while_id_rel_timestamps = dump.get_rel_timestamps("while/Identity", 0,
                                                        "DebugIdentity")
+      while_id_dump_sizes_bytes = dump.get_dump_sizes_bytes("while/Identity", 0,
+                                                            "DebugIdentity")
      self.assertEqual(10, len(while_id_rel_timestamps))
      prev_rel_time = 0
-      for rel_time in while_id_rel_timestamps:
+      prev_dump_size_bytes = while_id_dump_sizes_bytes[0]
+      for rel_time, dump_size_bytes in zip(while_id_rel_timestamps,
+                                           while_id_dump_sizes_bytes):
        self.assertGreaterEqual(rel_time, prev_rel_time)
+        self.assertEqual(dump_size_bytes, prev_dump_size_bytes)
        prev_rel_time = rel_time
+        prev_dump_size_bytes = dump_size_bytes

      # Test querying debug watch keys from node name.
      watch_keys = dump.debug_watch_keys("while/Identity")