[tfdbg2] Let DebugIdentityV2 op carry circular_buffer_size attribute

Motivation:
- This CL addresses a bug in which the `circular_buffer_size` kwarg
  of the `tf.debugging.experimental.enable_dump_debug_info()` API works
  only on a local machine and doesn't behave as expected when a TF graph
  is execution on a remote TF server (e.g., a TPU worker).

Technical aspect of the change:
- Add an attribute to the DebugIdentityV2Op used by `enable_dump_debug_info()`:
  namely `circular_buffer_size`. This new attribute defaults to its previous
  effective default value (1000), and hence is backward compatible.
- This new attribute helps propagate the value of `circular_buffer_size`
  setting from the host on which the instrumented graph is created to the
  host on which the graph is executed.

PiperOrigin-RevId: 314761103
Change-Id: Ifbc898a1272d9498d6f856020f0b1145190da2e7
This commit is contained in:
Shanqing Cai 2020-06-04 10:54:30 -07:00 committed by TensorFlower Gardener
parent 7e85bf98da
commit 171d688aaa
6 changed files with 146 additions and 34 deletions

View File

@ -428,13 +428,21 @@ class DebugIdentityV2Op : public OpKernel {
OP_REQUIRES_OK(context, context->GetAttr("output_slot", &output_slot_)); OP_REQUIRES_OK(context, context->GetAttr("output_slot", &output_slot_));
OP_REQUIRES_OK(context, OP_REQUIRES_OK(context,
context->GetAttr("tensor_debug_mode", &tensor_debug_mode_)); context->GetAttr("tensor_debug_mode", &tensor_debug_mode_));
if (context->HasAttr("circular_buffer_size")) {
OP_REQUIRES_OK(context, context->GetAttr("circular_buffer_size",
&circular_buffer_size_));
} else {
circular_buffer_size_ =
tfdbg::DebugEventsWriter::kDefaultCyclicBufferSize;
}
} }
void Compute(OpKernelContext* context) override { void Compute(OpKernelContext* context) override {
const Tensor& tensor = context->input(0); const Tensor& tensor = context->input(0);
for (const string& dump_root : dump_roots_) { for (const string& dump_root : dump_roots_) {
tfdbg::DebugEventsWriter* debug_events_writer = tfdbg::DebugEventsWriter* debug_events_writer =
tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root); tfdbg::DebugEventsWriter::GetDebugEventsWriter(dump_root,
circular_buffer_size_);
OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace( OP_REQUIRES_OK(context, debug_events_writer->WriteGraphExecutionTrace(
tfdbg_context_id_, device_name_, op_name_, tfdbg_context_id_, device_name_, op_name_,
output_slot_, tensor_debug_mode_, tensor)); output_slot_, tensor_debug_mode_, tensor));
@ -449,6 +457,7 @@ class DebugIdentityV2Op : public OpKernel {
string op_name_; string op_name_;
int32 output_slot_; int32 output_slot_;
int32 tensor_debug_mode_; int32 tensor_debug_mode_;
int64 circular_buffer_size_;
}; };
typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::ThreadPoolDevice CPUDevice;

View File

@ -90,6 +90,7 @@ REGISTER_OP("DebugIdentityV2")
.Attr("output_slot: int = -1") .Attr("output_slot: int = -1")
.Attr("tensor_debug_mode: int = -1") .Attr("tensor_debug_mode: int = -1")
.Attr("debug_urls: list(string) = []") .Attr("debug_urls: list(string) = []")
.Attr("circular_buffer_size: int = 1000")
.SetIsStateful() .SetIsStateful()
.SetShapeFn(shape_inference::UnchangedShape); .SetShapeFn(shape_inference::UnchangedShape);

View File

@ -23,6 +23,7 @@ import os
import numpy as np import numpy as np
from tensorflow.core.protobuf import debug_event_pb2 from tensorflow.core.protobuf import debug_event_pb2
from tensorflow.python.compat import compat
from tensorflow.python.debug.lib import debug_events_reader from tensorflow.python.debug.lib import debug_events_reader
from tensorflow.python.debug.lib import debug_events_writer from tensorflow.python.debug.lib import debug_events_writer
from tensorflow.python.debug.lib import dumping_callback_test_lib from tensorflow.python.debug.lib import dumping_callback_test_lib
@ -40,6 +41,12 @@ from tensorflow.python.platform import googletest
class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase): class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
"""Tests for DebugIdentityV2Op: when DebugEventsWriter is initialized.
DebugEventsWriter being initialized prior to DebugIdentityV2 ops being invoked
for the first time is the typical case (e.g., tfdbg2 running on a local
machine with only local devices.)
"""
def setUp(self): def setUp(self):
super(DebugIdentityV2OpTest, self).setUp() super(DebugIdentityV2OpTest, self).setUp()
@ -57,8 +64,6 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
@def_function.function @def_function.function
def write_debug_trace(x): def write_debug_trace(x):
# DebugIdentityV2 is a stateful op. It ought to be included by auto
# control dependency.
square = math_ops.square(x) square = math_ops.square(x)
gen_debug_ops.debug_identity_v2( gen_debug_ops.debug_identity_v2(
square, square,
@ -223,6 +228,64 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
with self.assertRaises(StopIteration): with self.assertRaises(StopIteration):
next(graph_trace_iter) next(graph_trace_iter)
class DebugIdentityV2OpUninitializedWriterTest(
dumping_callback_test_lib.DumpingCallbackTestBase):
"""Tests for DebugIdentityV2Op: when DebugEventsWriter is not initialized.
This case can occur when DebugIdentityV2Ops are running on a remote
TensorFlow server (e.g., a TPU worker).
"""
@test_util.run_in_graph_and_eager_modes
def testInvokingDebugIdentityV2OpBeforeCreatingDebugEventsWriterWorks(self):
if not compat.forward_compatible(2020, 6, 24):
self.skipTest("Functionality currently not supported.")
circular_buffer_size = 3
@def_function.function
def write_debug_trace(x):
# DebugIdentityV2 is a stateful op. It ought to be included by auto
# control dependency.
square = math_ops.square(x)
gen_debug_ops.debug_identity_v2(
square,
tfdbg_context_id="deadbeaf",
op_name="Square",
output_slot=0,
tensor_debug_mode=debug_event_pb2.TensorDebugMode.FULL_TENSOR,
debug_urls=["file://%s" % self.dump_root],
circular_buffer_size=circular_buffer_size)
return square
# The DebugIdentityV2 ops are invokes *before* a DebugEventsWriter at the
# same dump root is created.
for i in range(circular_buffer_size * 2):
self.assertAllClose(
write_debug_trace(np.array([i]).astype(np.float32)), [i**2.0])
writer = debug_events_writer.DebugEventsWriter(self.dump_root,
circular_buffer_size)
writer.FlushNonExecutionFiles()
writer.FlushExecutionFiles()
with debug_events_reader.DebugEventsReader(self.dump_root) as reader:
graph_trace_iter = reader.graph_execution_traces_iterator()
graph_execution_traces = []
while True:
try:
graph_execution_traces.append(
next(graph_trace_iter).debug_event.graph_execution_trace)
except StopIteration:
break
self.assertLen(graph_execution_traces, circular_buffer_size)
for i in range(circular_buffer_size):
self.assertAllClose(
tensor_util.MakeNdarray(graph_execution_traces[i].tensor_proto),
[(i + circular_buffer_size)**2.0])
class DebugNumericSummaryV2Test(test_util.TensorFlowTestCase):
@test_util.run_in_graph_and_eager_modes @test_util.run_in_graph_and_eager_modes
def testDebugNumericSummaryV2OpReduceInfNanThreeSlots(self): def testDebugNumericSummaryV2OpReduceInfNanThreeSlots(self):

View File

@ -30,6 +30,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin
from tensorflow.core.framework import tensor_pb2 from tensorflow.core.framework import tensor_pb2
from tensorflow.core.protobuf import debug_event_pb2 from tensorflow.core.protobuf import debug_event_pb2
from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2
from tensorflow.python.compat import compat as tf_compat
from tensorflow.python.debug.lib import debug_events_writer from tensorflow.python.debug.lib import debug_events_writer
from tensorflow.python.debug.lib import op_callbacks_common from tensorflow.python.debug.lib import op_callbacks_common
from tensorflow.python.debug.lib import source_utils from tensorflow.python.debug.lib import source_utils
@ -366,17 +367,31 @@ class _DumpingCallback(object):
with self._symbolic_tensor_counter_lock: with self._symbolic_tensor_counter_lock:
debug_identity_name = ("DebugIdentityV2_%d" % debug_identity_name = ("DebugIdentityV2_%d" %
self._symbolic_tensor_counter) self._symbolic_tensor_counter)
debug_tensor = gen_debug_ops.debug_identity_v2( if tf_compat.forward_compatible(2020, 6, 24):
# Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode debug_tensor = gen_debug_ops.debug_identity_v2(
# as a low-overhead placeholder, since no actual tensor value is # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
# traced. # as a low-overhead placeholder, since no actual tensor value is
constant_op.constant([], dtype=dtypes.float32), # traced.
tfdbg_context_id=tfdbg_context_id, constant_op.constant([], dtype=dtypes.float32),
op_name=op_name, tfdbg_context_id=tfdbg_context_id,
output_slot=output_slot, op_name=op_name,
tensor_debug_mode=self._tensor_debug_mode, output_slot=output_slot,
debug_urls=debug_urls, tensor_debug_mode=self._tensor_debug_mode,
name=debug_identity_name) debug_urls=debug_urls,
circular_buffer_size=self._circular_buffer_size,
name=debug_identity_name)
else:
debug_tensor = gen_debug_ops.debug_identity_v2(
# Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode
# as a low-overhead placeholder, since no actual tensor value is
# traced.
constant_op.constant([], dtype=dtypes.float32),
tfdbg_context_id=tfdbg_context_id,
op_name=op_name,
output_slot=output_slot,
tensor_debug_mode=self._tensor_debug_mode,
debug_urls=debug_urls,
name=debug_identity_name)
if is_v1_graph_mode: if is_v1_graph_mode:
instrumented_tensors.append(self._process_v1_graph_mode_tensor( instrumented_tensors.append(self._process_v1_graph_mode_tensor(
op_type, tensor, debug_tensor, tensor_debug_mode)) op_type, tensor, debug_tensor, tensor_debug_mode))
@ -400,17 +415,31 @@ class _DumpingCallback(object):
if is_v1_graph_mode: if is_v1_graph_mode:
instrumented_tensors.append(tensor) instrumented_tensors.append(tensor)
continue continue
debug_tensor = gen_debug_ops.debug_identity_v2( if tf_compat.forward_compatible(2020, 6, 24):
gen_debug_ops.debug_numeric_summary_v2( debug_tensor = gen_debug_ops.debug_identity_v2(
tensor, gen_debug_ops.debug_numeric_summary_v2(
tensor_id=tensor_ids[output_slot], tensor,
tensor_debug_mode=self._tensor_debug_mode, tensor_id=tensor_ids[output_slot],
output_dtype=dtypes.float64), tensor_debug_mode=self._tensor_debug_mode,
tfdbg_context_id=tfdbg_context_id, output_dtype=dtypes.float64),
op_name=op_name, tfdbg_context_id=tfdbg_context_id,
output_slot=output_slot, op_name=op_name,
tensor_debug_mode=self._tensor_debug_mode, output_slot=output_slot,
debug_urls=debug_urls) tensor_debug_mode=self._tensor_debug_mode,
debug_urls=debug_urls,
circular_buffer_size=self._circular_buffer_size)
else:
debug_tensor = gen_debug_ops.debug_identity_v2(
gen_debug_ops.debug_numeric_summary_v2(
tensor,
tensor_id=tensor_ids[output_slot],
tensor_debug_mode=self._tensor_debug_mode,
output_dtype=dtypes.float64),
tfdbg_context_id=tfdbg_context_id,
op_name=op_name,
output_slot=output_slot,
tensor_debug_mode=self._tensor_debug_mode,
debug_urls=debug_urls)
if is_v1_graph_mode: if is_v1_graph_mode:
instrumented_tensors.append(self._process_v1_graph_mode_tensor( instrumented_tensors.append(self._process_v1_graph_mode_tensor(
op_type, tensor, debug_tensor, tensor_debug_mode)) op_type, tensor, debug_tensor, tensor_debug_mode))
@ -424,13 +453,23 @@ class _DumpingCallback(object):
if is_v1_graph_mode: if is_v1_graph_mode:
instrumented_tensors.append(tensor) instrumented_tensors.append(tensor)
continue continue
debug_tensor = gen_debug_ops.debug_identity_v2( if tf_compat.forward_compatible(2020, 6, 24):
tensor, debug_tensor = gen_debug_ops.debug_identity_v2(
tfdbg_context_id=tfdbg_context_id, tensor,
op_name=op_name, tfdbg_context_id=tfdbg_context_id,
output_slot=output_slot, op_name=op_name,
tensor_debug_mode=self._tensor_debug_mode, output_slot=output_slot,
debug_urls=debug_urls) tensor_debug_mode=self._tensor_debug_mode,
debug_urls=debug_urls,
circular_buffer_size=self._circular_buffer_size)
else:
debug_tensor = gen_debug_ops.debug_identity_v2(
tensor,
tfdbg_context_id=tfdbg_context_id,
op_name=op_name,
output_slot=output_slot,
tensor_debug_mode=self._tensor_debug_mode,
debug_urls=debug_urls)
if is_v1_graph_mode: if is_v1_graph_mode:
instrumented_tensors.append(self._process_v1_graph_mode_tensor( instrumented_tensors.append(self._process_v1_graph_mode_tensor(
op_type, tensor, debug_tensor, tensor_debug_mode)) op_type, tensor, debug_tensor, tensor_debug_mode))

View File

@ -982,7 +982,7 @@ tf_module {
} }
member_method { member_method {
name: "DebugIdentityV2" name: "DebugIdentityV2"
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'None\'], " argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
} }
member_method { member_method {
name: "DebugNanCount" name: "DebugNanCount"

View File

@ -982,7 +982,7 @@ tf_module {
} }
member_method { member_method {
name: "DebugIdentityV2" name: "DebugIdentityV2"
argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'None\'], " argspec: "args=[\'input\', \'tfdbg_context_id\', \'op_name\', \'output_slot\', \'tensor_debug_mode\', \'debug_urls\', \'circular_buffer_size\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'\', \'-1\', \'-1\', \'[]\', \'1000\', \'None\'], "
} }
member_method { member_method {
name: "DebugNanCount" name: "DebugNanCount"