diff --git a/tensorflow/python/debug/lib/debug_events_monitors.py b/tensorflow/python/debug/lib/debug_events_monitors.py index 7d97130ea98..fd7c28f1328 100644 --- a/tensorflow/python/debug/lib/debug_events_monitors.py +++ b/tensorflow/python/debug/lib/debug_events_monitors.py @@ -200,7 +200,24 @@ class InfNanMonitor(BaseMonitor): output_slot, execution_index=None, graph_execution_trace_index=None): - """Check for bad numerical values based on debug summary of tensor value.""" + """Check for bad numerical values based on debug summary of tensor value. + + If tensor_debug_mode is one in which debug_tensor_value does not carry + information about the presence or count of inf / nan values (e.g., SHAPE), + this method is a no-op. + + When infs and/or nans are found, `InfNanAlert` objects are created and + appended to `self._alerts`. + + Args: + tensor_debug_mode: TensorDebugMode proto enum. + debug_tensor_value: Debug tensor value as a list of numbers. + wall_time: Wall timestamp for the tensor event. + op_type: Type of the op that generated the tensor (e.g., "Conv2D"). + output_slot: Output slot index of the tensor for the op. + execution_index: Top-level execution index. + graph_execution_trace_index: Intra-graph execution index. + """ # FULL_TENSOR mode is handled by a separate code path. assert tensor_debug_mode != debug_event_pb2.TensorDebugMode.FULL_TENSOR if not debug_tensor_value: @@ -241,10 +258,6 @@ class InfNanMonitor(BaseMonitor): num_nan=num_nan, execution_index=execution_index, graph_execution_trace_index=graph_execution_trace_index)) - else: - raise ValueError( - "Unsupported tensor debug mode: %s" % - debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode)) def on_execution(self, execution_index, diff --git a/tensorflow/python/debug/lib/debug_events_monitors_test.py b/tensorflow/python/debug/lib/debug_events_monitors_test.py index 231a22d0e37..05eaa510648 100644 --- a/tensorflow/python/debug/lib/debug_events_monitors_test.py +++ b/tensorflow/python/debug/lib/debug_events_monitors_test.py @@ -304,6 +304,35 @@ class InfNanMonitorTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertEqual(alert.execution_index, 60) self.assertIsNone(alert.graph_execution_trace_index) + @parameterized.named_parameters( + ("Shape", + debug_event_pb2.TensorDebugMode.SHAPE, + # [tensor_id, dtype, rank, element_cont, ...shape_truncate_6] + [[-1, 1, 2, 6, 3, 2, 0, 0, 0, 0], + [-1, 10, 1, 7, 7, 0, 0, 0, 0, 0]]), + ) + def testInfNanMonitorOnExecutionUnderModeWithNoInfNanInfo( + self, + tensor_debug_mode, + debug_tensor_values): + mock_reader = test.mock.MagicMock() + monitor = debug_events_monitors.InfNanMonitor(mock_reader) + execution_digest = debug_events_reader.ExecutionDigest( + 1234, 1, "BarOp", output_tensor_device_ids=[0, 1]) + + execution = debug_events_reader.Execution( + execution_digest, + "worker01", + ["a1", "b2", "e3"], + tensor_debug_mode, + graph_id=None, + input_tensor_ids=[12, 34], + output_tensor_ids=[56, 78], + debug_tensor_values=debug_tensor_values) + monitor.on_execution(60, execution) + + self.assertEmpty(monitor.alerts()) + @parameterized.named_parameters( ("FloatsScalarWithInfAndNan", np.inf, np.float32, 1, 0, 1, 0), ("Floats2DWithInfAndNan", [[0, np.nan, np.nan, -np.inf]