[tfdbg2] Fix a bug in which InfNanMonitor does not handle SHAPE mode correctly

Description of bug: - Introduced by CL/299093247 - The _check_debug_tensor_value() method is intended to check for infs and nans if and only if the debug_tensor_value carries data about inf and nan count (e.g., FULL_HEALTH and CONCISE_HEALTH). It should ignore tenosr_debug_modes that don't carry such information in debug_tensor_value (e.g., SHAPE). But currently it throws an error for such modes. Fix: - Remove the error-throwing if-else branch - Add a unit test to cover the correct behavior - Clarify the contract of _check_debug_tensor_value() method in its doc string. PiperOrigin-RevId: 300369252 Change-Id: Ib9497ff2154716f524c243ed066cf87a02231786
2020-03-11 11:29:21 -07:00 · 2020-03-11 11:29:21 -07:00 · ad78070af5
commit ad78070af5
parent 8d504919ca
2 changed files with 47 additions and 5 deletions
--- a/tensorflow/python/debug/lib/debug_events_monitors.py
+++ b/tensorflow/python/debug/lib/debug_events_monitors.py
@ -200,7 +200,24 @@ class InfNanMonitor(BaseMonitor):
                                output_slot,
                                execution_index=None,
                                graph_execution_trace_index=None):
-    """Check for bad numerical values based on debug summary of tensor value."""
+    """Check for bad numerical values based on debug summary of tensor value.
+
+    If tensor_debug_mode is one in which debug_tensor_value does not carry
+    information about the presence or count of inf / nan values (e.g., SHAPE),
+    this method is a no-op.
+
+    When infs and/or nans are found, `InfNanAlert` objects are created and
+    appended to `self._alerts`.
+
+    Args:
+      tensor_debug_mode: TensorDebugMode proto enum.
+      debug_tensor_value: Debug tensor value as a list of numbers.
+      wall_time: Wall timestamp for the tensor event.
+      op_type: Type of the op that generated the tensor (e.g., "Conv2D").
+      output_slot: Output slot index of the tensor for the op.
+      execution_index: Top-level execution index.
+      graph_execution_trace_index: Intra-graph execution index.
+    """
    # FULL_TENSOR mode is handled by a separate code path.
    assert tensor_debug_mode != debug_event_pb2.TensorDebugMode.FULL_TENSOR
    if not debug_tensor_value:
@ -241,10 +258,6 @@ class InfNanMonitor(BaseMonitor):
            num_nan=num_nan,
            execution_index=execution_index,
            graph_execution_trace_index=graph_execution_trace_index))
-    else:
-      raise ValueError(
-          "Unsupported tensor debug mode: %s" %
-          debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))

  def on_execution(self,
                   execution_index,
--- a/tensorflow/python/debug/lib/debug_events_monitors_test.py
+++ b/tensorflow/python/debug/lib/debug_events_monitors_test.py
@ -304,6 +304,35 @@ class InfNanMonitorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
    self.assertEqual(alert.execution_index, 60)
    self.assertIsNone(alert.graph_execution_trace_index)

+  @parameterized.named_parameters(
+      ("Shape",
+       debug_event_pb2.TensorDebugMode.SHAPE,
+       # [tensor_id, dtype, rank, element_cont, ...shape_truncate_6]
+       [[-1, 1, 2, 6, 3, 2, 0, 0, 0, 0],
+        [-1, 10, 1, 7, 7, 0, 0, 0, 0, 0]]),
+  )
+  def testInfNanMonitorOnExecutionUnderModeWithNoInfNanInfo(
+      self,
+      tensor_debug_mode,
+      debug_tensor_values):
+    mock_reader = test.mock.MagicMock()
+    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
+    execution_digest = debug_events_reader.ExecutionDigest(
+        1234, 1, "BarOp", output_tensor_device_ids=[0, 1])
+
+    execution = debug_events_reader.Execution(
+        execution_digest,
+        "worker01",
+        ["a1", "b2", "e3"],
+        tensor_debug_mode,
+        graph_id=None,
+        input_tensor_ids=[12, 34],
+        output_tensor_ids=[56, 78],
+        debug_tensor_values=debug_tensor_values)
+    monitor.on_execution(60, execution)
+
+    self.assertEmpty(monitor.alerts())
+
  @parameterized.named_parameters(
      ("FloatsScalarWithInfAndNan", np.inf, np.float32, 1, 0, 1, 0),
      ("Floats2DWithInfAndNan", [[0, np.nan, np.nan, -np.inf]