[tfdbg2] Fix a bug in which InfNanMonitor does not handle SHAPE mode correctly
Description of bug: - Introduced by CL/299093247 - The _check_debug_tensor_value() method is intended to check for infs and nans if and only if the debug_tensor_value carries data about inf and nan count (e.g., FULL_HEALTH and CONCISE_HEALTH). It should ignore tenosr_debug_modes that don't carry such information in debug_tensor_value (e.g., SHAPE). But currently it throws an error for such modes. Fix: - Remove the error-throwing if-else branch - Add a unit test to cover the correct behavior - Clarify the contract of _check_debug_tensor_value() method in its doc string. PiperOrigin-RevId: 300369252 Change-Id: Ib9497ff2154716f524c243ed066cf87a02231786
This commit is contained in:
parent
8d504919ca
commit
ad78070af5
@ -200,7 +200,24 @@ class InfNanMonitor(BaseMonitor):
|
||||
output_slot,
|
||||
execution_index=None,
|
||||
graph_execution_trace_index=None):
|
||||
"""Check for bad numerical values based on debug summary of tensor value."""
|
||||
"""Check for bad numerical values based on debug summary of tensor value.
|
||||
|
||||
If tensor_debug_mode is one in which debug_tensor_value does not carry
|
||||
information about the presence or count of inf / nan values (e.g., SHAPE),
|
||||
this method is a no-op.
|
||||
|
||||
When infs and/or nans are found, `InfNanAlert` objects are created and
|
||||
appended to `self._alerts`.
|
||||
|
||||
Args:
|
||||
tensor_debug_mode: TensorDebugMode proto enum.
|
||||
debug_tensor_value: Debug tensor value as a list of numbers.
|
||||
wall_time: Wall timestamp for the tensor event.
|
||||
op_type: Type of the op that generated the tensor (e.g., "Conv2D").
|
||||
output_slot: Output slot index of the tensor for the op.
|
||||
execution_index: Top-level execution index.
|
||||
graph_execution_trace_index: Intra-graph execution index.
|
||||
"""
|
||||
# FULL_TENSOR mode is handled by a separate code path.
|
||||
assert tensor_debug_mode != debug_event_pb2.TensorDebugMode.FULL_TENSOR
|
||||
if not debug_tensor_value:
|
||||
@ -241,10 +258,6 @@ class InfNanMonitor(BaseMonitor):
|
||||
num_nan=num_nan,
|
||||
execution_index=execution_index,
|
||||
graph_execution_trace_index=graph_execution_trace_index))
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported tensor debug mode: %s" %
|
||||
debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))
|
||||
|
||||
def on_execution(self,
|
||||
execution_index,
|
||||
|
@ -304,6 +304,35 @@ class InfNanMonitorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
|
||||
self.assertEqual(alert.execution_index, 60)
|
||||
self.assertIsNone(alert.graph_execution_trace_index)
|
||||
|
||||
@parameterized.named_parameters(
|
||||
("Shape",
|
||||
debug_event_pb2.TensorDebugMode.SHAPE,
|
||||
# [tensor_id, dtype, rank, element_cont, ...shape_truncate_6]
|
||||
[[-1, 1, 2, 6, 3, 2, 0, 0, 0, 0],
|
||||
[-1, 10, 1, 7, 7, 0, 0, 0, 0, 0]]),
|
||||
)
|
||||
def testInfNanMonitorOnExecutionUnderModeWithNoInfNanInfo(
|
||||
self,
|
||||
tensor_debug_mode,
|
||||
debug_tensor_values):
|
||||
mock_reader = test.mock.MagicMock()
|
||||
monitor = debug_events_monitors.InfNanMonitor(mock_reader)
|
||||
execution_digest = debug_events_reader.ExecutionDigest(
|
||||
1234, 1, "BarOp", output_tensor_device_ids=[0, 1])
|
||||
|
||||
execution = debug_events_reader.Execution(
|
||||
execution_digest,
|
||||
"worker01",
|
||||
["a1", "b2", "e3"],
|
||||
tensor_debug_mode,
|
||||
graph_id=None,
|
||||
input_tensor_ids=[12, 34],
|
||||
output_tensor_ids=[56, 78],
|
||||
debug_tensor_values=debug_tensor_values)
|
||||
monitor.on_execution(60, execution)
|
||||
|
||||
self.assertEmpty(monitor.alerts())
|
||||
|
||||
@parameterized.named_parameters(
|
||||
("FloatsScalarWithInfAndNan", np.inf, np.float32, 1, 0, 1, 0),
|
||||
("Floats2DWithInfAndNan", [[0, np.nan, np.nan, -np.inf]
|
||||
|
Loading…
Reference in New Issue
Block a user