[tfdbg2] Let DebugIdentityV2 op carry circular_buffer_size attribute

Motivation: - This CL addresses a bug in which the `circular_buffer_size` kwarg of the `tf.debugging.experimental.enable_dump_debug_info()` API works only on a local machine and doesn't behave as expected when a TF graph is execution on a remote TF server (e.g., a TPU worker). Technical aspect of the change: - Add an attribute to the DebugIdentityV2Op used by `enable_dump_debug_info()`: namely `circular_buffer_size`. This new attribute defaults to its previous effective default value (1000), and hence is backward compatible. - This new attribute helps propagate the value of `circular_buffer_size` setting from the host on which the instrumented graph is created to the host on which the graph is executed. PiperOrigin-RevId: 314761103 Change-Id: Ifbc898a1272d9498d6f856020f0b1145190da2e7
2020-06-04 10:54:30 -07:00 · 2020-06-04 10:54:30 -07:00 · 171d688aaa
commit 171d688aaa
parent 7e85bf98da
6 changed files with 146 additions and 34 deletions
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@ -428,13 +428,21 @@ class DebugIdentityV2Op : public OpKernel {
    OP_REQUIRES_OK(context, context->GetAttr("output_slot", &output_slot_));
    OP_REQUIRES_OK(context,
                   context->GetAttr("tensor_debug_mode", &tensor_debug_mode_));
    if (context->HasAttr("circular_buffer_size")) {
      OP_REQUIRES_OK(context, context->GetAttr("circular_buffer_size",
                                               &circular_buffer_size_));
    } else {
      circular_buffer_size_ =
          tfdbg::DebugEventsWriter::kDefaultCyclicBufferSize;
    }
  }
  void Compute(OpKernelContext* context) override {
    const Tensor& tensor = context->input(0);
    for (const string& dump_root : dump_roots_) {
      tfdbg::DebugEventsWriter* debug_events_writer =
-          tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root);
+          tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root,
                                                         circular_buffer_size_);
      OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace(
                                  tfdbg_context_id_, device_name_, op_name_,
                                  output_slot_, tensor_debug_mode_, tensor));
@ -449,6 +457,7 @@ class DebugIdentityV2Op : public OpKernel {
  string op_name_;
  int32 output_slot_;
  int32 tensor_debug_mode_;
  int64 circular_buffer_size_;
 };
 typedef Eigen::ThreadPoolDevice CPUDevice;
--- a/tensorflow/core/ops/debug_ops.cc
+++ b/tensorflow/core/ops/debug_ops.cc
@ -90,6 +90,7 @@ REGISTER_OP("DebugIdentityV2")
    .Attr("output_slot: int = -1")
    .Attr("tensor_debug_mode: int = -1")
    .Attr("debug_urls: list(string) = []")
    .Attr("circular_buffer_size: int = 1000")
    .SetIsStateful()
    .SetShapeFn(shape_inference::UnchangedShape);
--- a/tensorflow/python/debug/lib/debug_v2_ops_test.py
+++ b/tensorflow/python/debug/lib/debug_v2_ops_test.py
@ -23,6 +23,7 @@ import os
 import numpy as np
 from tensorflow.core.protobuf import debug_event_pb2
 from tensorflow.python.compat import compat
 from tensorflow.python.debug.lib import debug_events_reader
 from tensorflow.python.debug.lib import debug_events_writer
 from tensorflow.python.debug.lib import dumping_callback_test_lib
@ -40,6 +41,12 @@ from tensorflow.python.platform import googletest
 class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
  """Tests for DebugIdentityV2Op: when DebugEventsWriter is initialized.
  DebugEventsWriter being initialized prior to DebugIdentityV2 ops being invoked
  for the first time is the typical case (e.g., tfdbg2 running on a local
  machine with only local devices.)
  """
  def setUp(self):
    super(DebugIdentityV2OpTest, self).setUp()
@ -57,8 +64,6 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
    @def_function.function
    def write_debug_trace(x):
      # DebugIdentityV2 is a stateful op. It ought to be included by auto
      # control dependency.
      square = math_ops.square(x)
      gen_debug_ops.debug_identity_v2(
          square,
@ -223,6 +228,64 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
        with self.assertRaises(StopIteration):
          next(graph_trace_iter)
 class DebugIdentityV2OpUninitializedWriterTest(
    dumping_callback_test_lib.DumpingCallbackTestBase):
  """Tests for DebugIdentityV2Op: when DebugEventsWriter is not initialized.
  This case can occur when DebugIdentityV2Ops are running on a remote
  TensorFlow server (e.g., a TPU worker).
  """
  @test_util.run_in_graph_and_eager_modes
  def testInvokingDebugIdentityV2OpBeforeCreatingDebugEventsWriterWorks(self):
    if not compat.forward_compatible(2020, 6, 24):
      self.skipTest("Functionality currently not supported.")
    circular_buffer_size = 3
    @def_function.function
    def write_debug_trace(x):
      # DebugIdentityV2 is a stateful op. It ought to be included by auto
      # control dependency.
      square = math_ops.square(x)
      gen_debug_ops.debug_identity_v2(
          square,
          tfdbg_context_id="deadbeaf",
          op_name="Square",
          output_slot=0,
          tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR,
          debug_urls=["file://%s" % self.dump_root],
          circular_buffer_size=circular_buffer_size)
      return square
    # The DebugIdentityV2 ops are invokes *before* a DebugEventsWriter at the
    # same dump root is created.
    for i in range(circular_buffer_size * 2):
      self.assertAllClose(
          write_debug_trace(np.array([i]).astype(np.float32)), [i**2.0])
    writer = debug_events_writer.DebugEventsWriter(self.dump_root,
                                                   circular_buffer_size)
    writer.FlushNonExecutionFiles()
    writer.FlushExecutionFiles()
    with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
      graph_trace_iter = reader.graph_execution_traces_iterator()
      graph_execution_traces = []
      while True:
        try:
          graph_execution_traces.append(
              next(graph_trace_iter).debug_event.graph_execution_trace)
        except StopIteration:
          break
      self.assertLen(graph_execution_traces, circular_buffer_size)
      for i in range(circular_buffer_size):
        self.assertAllClose(
            tensor_util.MakeNdarray(graph_execution_traces[i].tensor_proto),
            [(i + circular_buffer_size)**2.0])
 class DebugNumericSummaryV2Test(test_util.TensorFlowTestCase):
  @test_util.run_in_graph_and_eager_modes
  def testDebugNumericSummaryV2OpReduceInfNanThreeSlots(self):
--- a/tensorflow/python/debug/lib/dumping_callback.py
+++ b/tensorflow/python/debug/lib/dumping_callback.py
@ -30,6 +30,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.core.framework import tensor_pb2
 from tensorflow.core.protobuf import debug_event_pb2
 from tensorflow.core.protobuf import graph_debug_info_pb2
 from tensorflow.python.compat import compat as tf_compat
 from tensorflow.python.debug.lib import debug_events_writer
 from tensorflow.python.debug.lib import op_callbacks_common
 from tensorflow.python.debug.lib import source_utils
@ -366,17 +367,31 @@ class _DumpingCallback(object):
        with self._symbolic_tensor_counter_lock:
          debug_identity_name = ("DebugIdentityV2_%d" %
                                 self._symbolic_tensor_counter)
-        debug_tensor = gen_debug_ops.debug_identity_v2(
+        if tf_compat.forward_compatible(2020, 6, 24):
-            # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
+          debug_tensor = gen_debug_ops.debug_identity_v2(
-            # as a low-overhead placeholder, since no actual tensor value is
+              # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
-            # traced.
+              # as a low-overhead placeholder, since no actual tensor value is
-            constant_op.constant([], dtype=dtypes.float32),
+              # traced.
-            tfdbg_context_id=tfdbg_context_id,
+              constant_op.constant([], dtype=dtypes.float32),
-            op_name=op_name,
+              tfdbg_context_id=tfdbg_context_id,
-            output_slot=output_slot,
+              op_name=op_name,
-            tensor_debug_mode=self._tensor_debug_mode,
+              output_slot=output_slot,
-            debug_urls=debug_urls,
+              tensor_debug_mode=self._tensor_debug_mode,
-            name=debug_identity_name)
+              debug_urls=debug_urls,
              circular_buffer_size=self._circular_buffer_size,
              name=debug_identity_name)
        else:
          debug_tensor = gen_debug_ops.debug_identity_v2(
              # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
              # as a low-overhead placeholder, since no actual tensor value is
              # traced.
              constant_op.constant([], dtype=dtypes.float32),
              tfdbg_context_id=tfdbg_context_id,
              op_name=op_name,
              output_slot=output_slot,
              tensor_debug_mode=self._tensor_debug_mode,
              debug_urls=debug_urls,
              name=debug_identity_name)
        if is_v1_graph_mode:
          instrumented_tensors.append(self._process_v1_graph_mode_tensor(
              op_type, tensor, debug_tensor, tensor_debug_mode))
@ -400,17 +415,31 @@ class _DumpingCallback(object):
          if is_v1_graph_mode:
            instrumented_tensors.append(tensor)
          continue
-        debug_tensor = gen_debug_ops.debug_identity_v2(
+        if tf_compat.forward_compatible(2020, 6, 24):
-            gen_debug_ops.debug_numeric_summary_v2(
+          debug_tensor = gen_debug_ops.debug_identity_v2(
-                tensor,
+              gen_debug_ops.debug_numeric_summary_v2(
-                tensor_id=tensor_ids[output_slot],
+                  tensor,
-                tensor_debug_mode=self._tensor_debug_mode,
+                  tensor_id=tensor_ids[output_slot],
-                output_dtype=dtypes.float64),
+                  tensor_debug_mode=self._tensor_debug_mode,
-            tfdbg_context_id=tfdbg_context_id,
+                  output_dtype=dtypes.float64),
-            op_name=op_name,
+              tfdbg_context_id=tfdbg_context_id,
-            output_slot=output_slot,
+              op_name=op_name,
-            tensor_debug_mode=self._tensor_debug_mode,
+              output_slot=output_slot,
-            debug_urls=debug_urls)
+              tensor_debug_mode=self._tensor_debug_mode,
              debug_urls=debug_urls,
              circular_buffer_size=self._circular_buffer_size)
        else:
          debug_tensor = gen_debug_ops.debug_identity_v2(
              gen_debug_ops.debug_numeric_summary_v2(
                  tensor,
                  tensor_id=tensor_ids[output_slot],
                  tensor_debug_mode=self._tensor_debug_mode,
                  output_dtype=dtypes.float64),
              tfdbg_context_id=tfdbg_context_id,
              op_name=op_name,
              output_slot=output_slot,
              tensor_debug_mode=self._tensor_debug_mode,
              debug_urls=debug_urls)
        if is_v1_graph_mode:
          instrumented_tensors.append(self._process_v1_graph_mode_tensor(
              op_type, tensor, debug_tensor, tensor_debug_mode))
@ -424,13 +453,23 @@ class _DumpingCallback(object):
          if is_v1_graph_mode:
            instrumented_tensors.append(tensor)
          continue
-        debug_tensor = gen_debug_ops.debug_identity_v2(
+        if tf_compat.forward_compatible(2020, 6, 24):
-            tensor,
+          debug_tensor = gen_debug_ops.debug_identity_v2(
-            tfdbg_context_id=tfdbg_context_id,
+              tensor,
-            op_name=op_name,
+              tfdbg_context_id=tfdbg_context_id,
-            output_slot=output_slot,
+              op_name=op_name,
-            tensor_debug_mode=self._tensor_debug_mode,
+              output_slot=output_slot,
-            debug_urls=debug_urls)
+              tensor_debug_mode=self._tensor_debug_mode,
              debug_urls=debug_urls,
              circular_buffer_size=self._circular_buffer_size)
        else:
          debug_tensor = gen_debug_ops.debug_identity_v2(
              tensor,
              tfdbg_context_id=tfdbg_context_id,
              op_name=op_name,
              output_slot=output_slot,
              tensor_debug_mode=self._tensor_debug_mode,
              debug_urls=debug_urls)
        if is_v1_graph_mode:
          instrumented_tensors.append(self._process_v1_graph_mode_tensor(
              op_type, tensor, debug_tensor, tensor_debug_mode))
--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
@ -982,7 +982,7 @@ tf_module {
  }
  member_method {
    name: "DebugIdentityV2"
-    argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'None\'], "
+    argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
  }
  member_method {
    name: "DebugNanCount"
--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
@ -982,7 +982,7 @@ tf_module {
  }
  member_method {
    name: "DebugIdentityV2"
-    argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'None\'], "
+    argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
  }
  member_method {
    name: "DebugNanCount"