diff --git a/tensorflow/python/debug/__init__.py b/tensorflow/python/debug/__init__.py index 812c31ed165..a2e4f1635a7 100644 --- a/tensorflow/python/debug/__init__.py +++ b/tensorflow/python/debug/__init__.py @@ -12,7 +12,53 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Public Python API of TensorFlow Debugger (tfdbg).""" +"""Public Python API of TensorFlow Debugger (tfdbg). + +## Functions for adding debug watches + +These functions help you modify `RunOptions` to specify which `Tensor`s are to +be watched when the TensorFlow graph is executed at runtime. + +@@add_debug_tensor_watch +@@watch_graph +@@watch_graph_with_blacklists + + +## Classes for debug-dump data and directories + +These classes allow you to load and inspect tensor values dumped from +TensorFlow graphs during runtime. + +@@DebugTensorDatum +@@DebugDumpDir + + +## Functions for loading debug-dump data + +@@load_tensor_from_event_file + + +## Tensor-value predicates + +Built-in tensor-filter predicates to support conditional breakpoint between +runs. See `DebugDumpDir.find()` for more details. + +@@has_inf_or_nan + + +## Session wrapper class and `SessionRunHook` implementations + +These classes allow you to + +* wrap aroundTensorFlow `Session` objects to debug plain TensorFlow models + (see `LocalCLIDebugWrapperSession`), or +* generate `SessionRunHook` objects to debug `tf.contrib.learn` models (see + `LocalCLIDebugHook`). + +@@LocalCLIDebugHook +@@LocalCLIDebugWrapperSession + +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/python/debug/debug_data.py b/tensorflow/python/debug/debug_data.py index 312395049a2..293fcf6b1b2 100644 --- a/tensorflow/python/debug/debug_data.py +++ b/tensorflow/python/debug/debug_data.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Data structures and helpers for TensorFlow Debugger (tfdbg).""" +"""Classes and functions to handle debug-dump data of TensorFlow Debugger.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -32,15 +33,15 @@ from tensorflow.python.platform import gfile def load_tensor_from_event_file(event_file_path): """Load a tensor from an event file. - Assumes that the event file contains a Event protobuf and the Event protobuf - contains a tensor. + Assumes that the event file contains a `Event` protobuf and the `Event` + protobuf contains a `Tensor` value. Args: - event_file_path: Path to the event file. + event_file_path: (`str`) path to the event file. Returns: - The tensor value loaded from the event file. For uninitialized tensors, - return None. + The tensor value loaded from the event file, as a `numpy.ndarray`. For + uninitialized tensors, returns None. """ event = event_pb2.Event() @@ -105,10 +106,10 @@ def get_output_slot(element_name): assumed. Args: - element_name: (str) name of the graph element in question. + element_name: (`str`) name of the graph element in question. Returns: - (int) output slot number. + (`int`) output slot number. """ return int(element_name.split(":")[-1]) if ":" in element_name else 0 @@ -220,17 +221,17 @@ def has_inf_or_nan(datum, tensor): """A predicate for whether a tensor consists of any bad numerical values. This predicate is common enough to merit definition in this module. - Bad numerical values include nans and infs. - The signature of this function follows the requiremnet of DebugDumpDir's - find() method. + Bad numerical values include `nan`s and `inf`s. + The signature of this function follows the requirement of the method + `DebugDumpDir.find()`. Args: - datum: (DebugTensorDatum) Datum metadata. - tensor: (numpy.ndarray or None) Value of the tensor. None represents + datum: (`DebugTensorDatum`) Datum metadata. + tensor: (`numpy.ndarray` or None) Value of the tensor. None represents an uninitialized tensor. Returns: - (bool) True if and only if tensor consists of any nan or inf values. + (`bool`) True if and only if tensor consists of any nan or inf values. """ _ = datum # Datum metadata is unused in this predicate. @@ -247,32 +248,33 @@ def has_inf_or_nan(datum, tensor): class DebugTensorDatum(object): - """A single tensor dumped by tfdbg. + """A single tensor dumped by TensorFlow Debugger (tfdbg). - Contains "metadata" for the dumped tensor, including node name, output slot, - debug op and timestamp. + Contains metadata about the dumped tensor, including `timestamp`, + `node_name`, `output_slot`, `debug_op`, and path to the dump file + (`file_path`). - This type does not contain the space-expensive tensor (numpy array) itself. - It just points to the file path from which the tensor can be loaded if - needed. + This type does not hold the generally space-expensive tensor value (numpy + array). Instead, it points to the file from which the tensor value can be + loaded (with the `get_tensor` method) if needed. """ def __init__(self, dump_root, debug_dump_rel_path): - """DebugTensorDatum constructor. + """`DebugTensorDatum` constructor. Args: - dump_root: Debug dump root directory. - debug_dump_rel_path: Path to a debug dump file, relative to the debug - dump root directory. For example, suppose the debug dump root - directory is "/tmp/tfdbg_1" and the dump file is at - "/tmp/tfdbg_1/ns_1/node_a_0_DebugIdentity_123456789", then + dump_root: (`str`) Debug dump root directory. + debug_dump_rel_path: (`str`) Path to a debug dump file, relative to the + `dump_root`. For example, suppose the debug dump root + directory is `/tmp/tfdbg_1` and the dump file is at + `/tmp/tfdbg_1/ns_1/node_a_0_DebugIdentity_123456789`, then the value of the debug_dump_rel_path should be - "ns_1/node_a_0_DebugIdenity_1234456789". + `ns_1/node_a_0_DebugIdenity_1234456789`. Raises: ValueError: If the base file name of the dump file does not conform to the dump file naming pattern: - <op_name>_<output_slot>_<debug_op_name>_<timestamp_microsec> + `node_name`_`output_slot`_`debug_op`_`timestamp` """ base = os.path.basename(debug_dump_rel_path) @@ -307,31 +309,62 @@ class DebugTensorDatum(object): return self.__str__() def get_tensor(self): - """Get tensor from the dump (Event) file. + """Get tensor from the dump (`Event`) file. Returns: - The tensor loaded from the dump (Event) file. + The tensor loaded from the dump (`Event`) file. """ + return load_tensor_from_event_file(self.file_path) @property def timestamp(self): + """Timestamp of when this tensor value was dumped. + + Returns: + (`int`) The timestamp in microseconds. + """ + return self._timestamp @property def debug_op(self): + """Name of the debug op. + + Returns: + (`str`) debug op name (e.g., `DebugIdentity`). + """ + return self._debug_op @property def node_name(self): + """Name of the node from which the tensor value was dumped. + + Returns: + (`str`) name of the node watched by the debug op. + """ + return self._node_name @property def output_slot(self): + """Output slot index from which the tensor value was dumped. + + Returns: + (`int`) output slot index watched by the debug op. + """ + return self._output_slot @property def tensor_name(self): + """Name of the tensor watched by the debug op. + + Returns: + (`str`) `Tensor` name, in the form of `node_name`:`output_slot` + """ + return _get_tensor_name(self.node_name, self.output_slot) @property @@ -339,32 +372,34 @@ class DebugTensorDatum(object): """Watch key identities a debug watch on a tensor. Returns: - A watch key, in the form of <tensor_name>:<debug_op>. + (`str`) A watch key, in the form of `tensor_name`:`debug_op`. """ + return _get_tensor_watch_key(self.node_name, self.output_slot, self.debug_op) @property def file_path(self): + """Path to the file which stores the value of the dumped tensor.""" + return self._file_path class DebugDumpDir(object): - """Data set from a debug dump directory on filesystem. + """Data set from a debug-dump directory on filesystem. - An instance of DebugDumpDir contains all DebugTensorDatum in a tfdbg dump - root directory. This is an immutable object, of which all constitute tensor - dump files and partition_graphs are loaded during the __init__ call. + An instance of `DebugDumpDir` contains all `DebugTensorDatum` instances + in a tfdbg dump root directory. """ def __init__(self, dump_root, partition_graphs=None, validate=True): - """DebugDumpDir constructor. + """`DebugDumpDir` constructor. Args: - dump_root: Path to the dump root directory. + dump_root: (`str`) path to the dump root directory. partition_graphs: A repeated field of GraphDefs representing the partition graphs executed by the TensorFlow runtime. - validate: Whether the dump files are to be validated against the + validate: (`bool`) whether the dump files are to be validated against the partition graphs. Raises: @@ -381,10 +416,10 @@ class DebugDumpDir(object): self._python_graph = None def _load_dumps(self, dump_root): - """Load DebugTensorDatum instances from the dump root. + """Load `DebugTensorDatum` instances from the dump root. - Populates a list of DebugTensorDatum and sort the list by ascending - timestamp. + Populates a list of `DebugTensorDatum` instance and sorts the list by + ascending timestamp. This sorting order reflects the order in which the TensorFlow executor processed the nodes of the graph. It is (one of many possible) topological @@ -404,7 +439,7 @@ class DebugDumpDir(object): graphs may not be available, e.g., when the run errors out. Args: - dump_root: (str) Dump root directory. + dump_root: (`str`) Dump root directory. """ self._dump_root = dump_root @@ -439,11 +474,11 @@ class DebugDumpDir(object): """Obtain a DebugTensorDatum from the directory and file name. Args: - dir_name: (str) Name of the directory in which the dump file resides. - file_name: (str) Base name of the dump file. + dir_name: (`str`) Name of the directory in which the dump file resides. + file_name: (`str`) Base name of the dump file. Returns: - (DebugTensorDatum) The DebugTensorDatum loaded from the dump file. + (`DebugTensorDatum`) The `DebugTensorDatum` loaded from the dump file. """ # Calculate the relative path of the dump file with respect to the root. @@ -455,7 +490,7 @@ class DebugDumpDir(object): def _create_tensor_watch_maps(self): """Create maps from tensor watch keys to datum and to timestamps. - Create a map from watch key (tensor name + debug op) to DebugTensorDatum + Create a map from watch key (tensor name + debug op) to `DebugTensorDatum` item. Also make a map from watch key to relative timestamp. "relative" means (absolute timestamp - t0). """ @@ -478,7 +513,7 @@ class DebugDumpDir(object): Unlike the partition graphs, which are protobuf `GraphDef` objects, `Graph` is a Python object and carries additional information such as the traceback - of nodes in the graph. + of the construction of the nodes in the graph. Args: python_graph: (ops.Graph) The Python Graph object. @@ -499,8 +534,9 @@ class DebugDumpDir(object): """Absolute timestamp of the first dumped tensor. Returns: - Absolute timestamp of the first dumped tensor. + (`int`) absolute timestamp of the first dumped tensor, in microseconds. """ + return self._t0 @property @@ -508,8 +544,9 @@ class DebugDumpDir(object): """Total number of dumped tensors in the dump root directory. Returns: - Total number of dumped tensors in the dump root directory. + (`int`) total number of dumped tensors in the dump root directory. """ + return len(self._dump_tensor_data) def _load_partition_graphs(self, partition_graphs, validate): @@ -524,7 +561,7 @@ class DebugDumpDir(object): partition_graphs: Partition graphs executed by the TensorFlow runtime, represented as repeated fields of GraphDef. If no partition_graph is available, use None. - validate: (bool) Whether the dump files are to be validated against the + validate: (`bool`) Whether the dump files are to be validated against the partition graphs. """ @@ -619,7 +656,7 @@ class DebugDumpDir(object): """Prune nodes out of input and recipient maps. Args: - nodes_to_prune: (list of str) Names of the nodes to be pruned. + nodes_to_prune: (`list` of `str`) Names of the nodes to be pruned. """ for node in nodes_to_prune: @@ -759,6 +796,7 @@ class DebugDumpDir(object): Raises: LookupError: If no partition graphs have been loaded. """ + if self._partition_graphs is None: raise LookupError("No partition graphs have been loaded.") @@ -773,13 +811,14 @@ class DebugDumpDir(object): Raises: LookupError: If no partition graphs have been loaded. """ + if self._partition_graphs is None: raise LookupError("No partition graphs have been loaded.") return [node_name for node_name in self._node_inputs] def node_attributes(self, node_name): - """Get attributes of a node. + """Get the attributes of a node. Args: node_name: Name of the node in question. @@ -791,6 +830,7 @@ class DebugDumpDir(object): LookupError: If no partition graphs have been loaded. ValueError: If no node named node_name exists. """ + if self._partition_graphs is None: raise LookupError("No partition graphs have been loaded.") @@ -804,11 +844,11 @@ class DebugDumpDir(object): Args: node_name: Name of the node. - is_control: Whether control inputs, rather than non-control inputs, are - to be returned. + is_control: (`bool`) Whether control inputs, rather than non-control + inputs, are to be returned. Returns: - All non-control inputs to the node, as a list of node names. + (`list` of `str`) inputs to the node, as a list of node names. Raises: LookupError: If node inputs and control inputs have not been loaded @@ -837,7 +877,8 @@ class DebugDumpDir(object): include_control: Include control inputs (True by default). Returns: - All transitive inputs to the node, as a list of node names. + (`list` of `str`) all transitive inputs to the node, as a list of node + names. Raises: LookupError: If node inputs and control inputs have not been loaded @@ -900,12 +941,12 @@ class DebugDumpDir(object): """Get recipient of the given node's output according to partition graphs. Args: - node_name: Name of the node. - is_control: Whether control outputs, rather than non-control outputs, - are to be returned. + node_name: (`str`) name of the node. + is_control: (`bool`) whether control outputs, rather than non-control + outputs, are to be returned. Returns: - All non-control inputs to the node, as a list of node names. + (`list` of `str`) all inputs to the node, as a list of node names. Raises: LookupError: If node inputs and control inputs have not been loaded @@ -930,7 +971,7 @@ class DebugDumpDir(object): """Get the list of devices. Returns: - Number of devices. + (`list` of `str`) names of the devices. Raises: LookupError: If node inputs and control inputs have not been loaded @@ -946,7 +987,7 @@ class DebugDumpDir(object): """Test if a node exists in the partition graphs. Args: - node_name: Name of the node to be checked, as a str. + node_name: (`str`) name of the node to be checked. Returns: A boolean indicating whether the node exists. @@ -965,16 +1006,17 @@ class DebugDumpDir(object): """Get the device of a node. Args: - node_name: Name of the node. + node_name: (`str`) name of the node. Returns: - Name of the device on which the node is placed, as a str. + (`str`) name of the device on which the node is placed. Raises: LookupError: If node inputs and control inputs have not been loaded from partition graphs yet. ValueError: If the node does not exist in partition graphs. """ + if self._partition_graphs is None: raise LookupError( "Node devices are not loaded from partition graphs yet.") @@ -989,16 +1031,17 @@ class DebugDumpDir(object): """Get the op type of given node. Args: - node_name: Name of the node. + node_name: (`str`) name of the node. Returns: - Type of the node's op, as a str. + (`str`) op type of the node. Raises: LookupError: If node op types have not been loaded from partition graphs yet. ValueError: If the node does not exist in partition graphs. """ + if self._partition_graphs is None: raise LookupError( "Node op types are not loaded from partition graphs yet.") @@ -1013,14 +1056,14 @@ class DebugDumpDir(object): """Get all tensor watch keys of given node according to partition graphs. Args: - node_name: Name of the node. + node_name: (`str`) name of the node. Returns: - All debug tensor watch keys, as a list of strings. Returns an empty list - if the node name does not correspond to any debug watch keys. + (`list` of `str`) all debug tensor watch keys. Returns an empty list if + the node name does not correspond to any debug watch keys. Raises: - LookupError: If debug watch information has not been loaded from + `LookupError`: If debug watch information has not been loaded from partition graphs yet. """ @@ -1037,13 +1080,13 @@ class DebugDumpDir(object): return watch_keys def watch_key_to_data(self, debug_watch_key): - """Get all DebugTensorDatum instances corresponding to a debug watch key. + """Get all `DebugTensorDatum` instances corresponding to a debug watch key. Args: - debug_watch_key: A debug watch key, as a str. + debug_watch_key: (`str`) debug watch key. Returns: - A list of DebugTensorDatuminstances that correspond to the debug watch + A list of `DebugTensorDatum` instances that correspond to the debug watch key. If the watch key does not exist, returns an empty list. Raises: @@ -1057,18 +1100,24 @@ class DebugDumpDir(object): Args: predicate: A callable that takes two input arguments: - predicate(debug_tensor_datum, tensor), - where "debug_tensor_datum" is an instance of DebugTensorDatum, which - carries "metadata", such as the name of the node, the tensor's slot - index on the node, timestamp, debug op name, etc; and "tensor" is - the dumped tensor value as a numpy array. - first_n: Return only the first n dumped tensor data (in time order) for - which the predicate is True. To return all such data, let first_n be - <= 0. + + ```python + def predicate(debug_tensor_datum, tensor): + # returns a bool + ``` + + where `debug_tensor_datum` is an instance of `DebugTensorDatum`, which + carries the metadata, such as the `Tensor`'s node name, output slot + timestamp, debug op name, etc.; and `tensor` is the dumped tensor value + as a `numpy.ndarray`. + first_n: (`int`) return only the first n `DebugTensotDatum` instances (in + time order) for which the predicate returns True. To return all the + `DebugTensotDatum` instances, let first_n be <= 0. Returns: - A list of all DebugTensorDatum objects in this DebugDumpDir object for - which predicate returns True, sorted in ascending order of the timestamp. + A list of all `DebugTensorDatum` objects in this `DebugDumpDir` object + for which predicate returns True, sorted in ascending order of the + timestamp. """ matched_data = [] @@ -1085,16 +1134,16 @@ class DebugDumpDir(object): """Get the file paths from a debug-dumped tensor. Args: - node_name: Name of the node that the tensor is produced by. - output_slot: Output slot index of tensor. - debug_op: Name of the debug op. + node_name: (`str`) name of the node that the tensor is produced by. + output_slot: (`int`) output slot index of tensor. + debug_op: (`str`) name of the debug op. Returns: List of file path(s) loaded. This is a list because each debugged tensor may be dumped multiple times. Raises: - ValueError: If the tensor does not exist in the debub dump data. + ValueError: If the tensor does not exist in the debug-dump data. """ watch_key = _get_tensor_watch_key(node_name, output_slot, debug_op) @@ -1108,18 +1157,18 @@ class DebugDumpDir(object): """Get the tensor value from for a debug-dumped tensor. The tensor may be dumped multiple times in the dump root directory, so a - list of tensors (numpy arrays) is returned. + list of tensors (`numpy.ndarray`) is returned. Args: - node_name: Name of the node that the tensor is produced by. - output_slot: Output slot index of tensor. - debug_op: Name of the debug op. + node_name: (`str`) name of the node that the tensor is produced by. + output_slot: (`int`) output slot index of tensor. + debug_op: (`str`) name of the debug op. Returns: - List of tensor(s) loaded from the tensor dump file(s). + List of tensors (`numpy.ndarray`) loaded from the debug-dump file(s). Raises: - ValueError: If the tensor does not exist in the debub dump data. + ValueError: If the tensor does not exist in the debug-dump data. """ watch_key = _get_tensor_watch_key(node_name, output_slot, debug_op) @@ -1132,18 +1181,18 @@ class DebugDumpDir(object): def get_rel_timestamps(self, node_name, output_slot, debug_op): """Get the relative timestamp from for a debug-dumped tensor. - Relative timestamp means (absolute timestamp - t0), t0 being the absolute - timestamp of the first dumped tensor in the dump root. The tensor may be - dumped multiple times in the dump root directory, so a list of relative - timestamp (numpy arrays) is returned. + Relative timestamp means (absolute timestamp - `t0`), where `t0` is the + absolute timestamp of the first dumped tensor in the dump root. The tensor + may be dumped multiple times in the dump root directory, so a list of + relative timestamps (`numpy.ndarray`) is returned. Args: - node_name: Name of the node that the tensor is produced by. - output_slot: Output slot index of tensor. - debug_op: Name of the debug op. + node_name: (`str`) name of the node that the tensor is produced by. + output_slot: (`int`) output slot index of tensor. + debug_op: (`str`) name of the debug op. Returns: - List of relative timestamps. + (list of int) list of relative timestamps. Raises: ValueError: If the tensor does not exist in the debub dump data. @@ -1160,7 +1209,7 @@ class DebugDumpDir(object): """Try to retrieve the Python traceback of node's construction. Args: - element_name: (str) Name of a graph element (node or tensor). + element_name: (`str`) Name of a graph element (node or tensor). Returns: (list) The traceback list object as returned by the `extract_trace` diff --git a/tensorflow/python/debug/debug_utils.py b/tensorflow/python/debug/debug_utils.py index ea672f9c515..3d6d5ad4476 100644 --- a/tensorflow/python/debug/debug_utils.py +++ b/tensorflow/python/debug/debug_utils.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================== """TensorFlow Debugger (tfdbg) Utilities.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -27,18 +28,20 @@ def add_debug_tensor_watch(run_options, output_slot=0, debug_ops="DebugIdentity", debug_urls=None): - """Add debug tensor watch option to RunOptions. + """Add watch on a `Tensor` to `RunOptions`. + + N.B.: Under certain circumstances, the `Tensor` may not be actually watched + (e.g., if the node of the `Tensor` is constant-folded during runtime). Args: - run_options: An instance of tensorflow.core.protobuf.config_pb2.RunOptions - node_name: Name of the node to watch. - output_slot: Output slot index of the tensor from the watched node. - debug_ops: Name(s) of the debug op(s). Default: "DebugIdentity". - Can be a list of strings or a single string. The latter case is - equivalent to a list of string with only one element. - debug_urls: URLs to send debug signals to: a non-empty list of strings or - a string, or None. The case of a string is equivalent to a list of - string with only one element. + run_options: An instance of `config_pb2.RunOptions` to be modified. + node_name: (`str`) name of the node to watch. + output_slot: (`int`) output slot index of the tensor from the watched node. + debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s). Can be a + `list` of `str` or a single `str`. The latter case is equivalent to a + `list` of `str` with only one element. + debug_urls: (`str` or `list` of `str`) URL(s) to send debug values to, + e.g., `file:///tmp/tfdbg_dump_1`, `grpc://localhost:12345`. """ watch_opts = run_options.debug_options.debug_tensor_watch_opts @@ -65,27 +68,31 @@ def watch_graph(run_options, debug_urls=None, node_name_regex_whitelist=None, op_type_regex_whitelist=None): - """Add debug tensor watch options to RunOptions based on a TensorFlow graph. + """Add debug watches to `RunOptions` for a TensorFlow graph. - To watch all tensors on the graph, set both node_name_regex_whitelist - and op_type_regex_whitelist to None. + To watch all `Tensor`s on the graph, let both `node_name_regex_whitelist` + and `op_type_regex_whitelist` be the default (`None`). + + N.B.: Under certain circumstances, not all specified `Tensor`s will be + actually watched (e.g., nodes that are constant-folded during runtime will + not be watched). Args: - run_options: An instance of tensorflow.core.protobuf.config_pb2.RunOptions - graph: An instance of tensorflow.python.framework.ops.Graph - debug_ops: Name of the debug op to use. Default: "DebugIdentity". - Can be a list of strings of a single string. The latter case is - equivalent to a list of a single string. - debug_urls: Debug urls. Can be a list of strings, a single string, or - None. The case of a single string is equivalen to a list consisting - of a single string. - node_name_regex_whitelist: Regular-expression whitelist for node_name. - This should be a string, e.g., "(weight_[0-9]+|bias_.*)" + run_options: An instance of `config_pb2.RunOptions` to be modified. + graph: An instance of `ops.Graph`. + debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s) to use. + debug_urls: URLs to send debug values to. Can be a list of strings, + a single string, or None. The case of a single string is equivalent to + a list consisting of a single string, e.g., `file:///tmp/tfdbg_dump_1`, + `grpc://localhost:12345`. + node_name_regex_whitelist: Regular-expression whitelist for node_name, + e.g., `"(weight_[0-9]+|bias_.*)"` op_type_regex_whitelist: Regular-expression whitelist for the op type of - nodes. If both node_name_regex_whitelist and op_type_regex_whitelist - are none, the two filtering operations will occur in an "AND" - relation. In other words, a node will be included if and only if it - hits both whitelists. This should be a string, e.g., "(Variable|Add)". + nodes, e.g., `"(Variable|Add)"`. + If both `node_name_regex_whitelist` and `op_type_regex_whitelist` + are set, the two filtering operations will occur in a logical `AND` + relation. In other words, a node will be included if and only if it + hits both whitelists. """ if isinstance(debug_ops, str): @@ -130,29 +137,30 @@ def watch_graph_with_blacklists(run_options, debug_urls=None, node_name_regex_blacklist=None, op_type_regex_blacklist=None): - """Add debug tensor watch options, blacklisting nodes and op types. + """Add debug tensor watches, blacklisting nodes and op types. - This is similar to watch_graph(), but the node names and op types can be + This is similar to `watch_graph()`, but the node names and op types are blacklisted, instead of whitelisted. + N.B.: Under certain circumstances, not all specified `Tensor`s will be + actually watched (e.g., nodes that are constant-folded during runtime will + not be watched). + Args: - run_options: An instance of tensorflow.core.protobuf.config_pb2.RunOptions - graph: An instance of tensorflow.python.framework.ops.Graph - debug_ops: Name of the debug op to use. Default: "DebugIdentity". - Can be a list of strings of a single string. The latter case is - equivalent to a list of a single string. - debug_urls: Debug urls. Can be a list of strings, a single string, or - None. The case of a single string is equivalen to a list consisting - of a single string. + run_options: An instance of `config_pb2.RunOptions` to be modified. + graph: An instance of `ops.Graph`. + debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s) to use. + debug_urls: URL(s) to send ebug values to, e.g., + `file:///tmp/tfdbg_dump_1`, `grpc://localhost:12345`. node_name_regex_blacklist: Regular-expression blacklist for node_name. - This should be a string, e.g., "(weight_[0-9]+|bias_.*)" + This should be a string, e.g., `"(weight_[0-9]+|bias_.*)"`. op_type_regex_blacklist: Regular-expression blacklist for the op type of - nodes. If both node_name_regex_blacklist and op_type_regex_blacklist - are none, the two filtering operations will occur in an "OR" - relation. In other words, a node will be excluded if it hits either of - the two blacklists; a node will be included if and only if it hits - none of the blacklists. This should be a string, e.g., - "(Variable|Add)". + nodes, e.g., `"(Variable|Add)"`. + If both node_name_regex_blacklist and op_type_regex_blacklist + are set, the two filtering operations will occur in a logical `OR` + relation. In other words, a node will be excluded if it hits either of + the two blacklists; a node will be included if and only if it hits + neither of the blacklists. """ if isinstance(debug_ops, str): diff --git a/tensorflow/python/debug/wrappers/framework.py b/tensorflow/python/debug/wrappers/framework.py index b60646a46af..f72858863e5 100644 --- a/tensorflow/python/debug/wrappers/framework.py +++ b/tensorflow/python/debug/wrappers/framework.py @@ -176,7 +176,7 @@ class OnSessionInitResponse(object): """Constructor. Args: - action: (OnSessionInitAction) Debugger action to take on session init. + action: (`OnSessionInitAction`) Debugger action to take on session init. """ _check_type(action, str) self.action = action @@ -191,7 +191,7 @@ class OnRunStartRequest(object): def __init__(self, fetches, feed_dict, run_options, run_metadata, run_call_count): - """Constructor of OnRunStartRequest. + """Constructor of `OnRunStartRequest`. Args: fetches: Fetch targets of the run() call. @@ -233,10 +233,10 @@ class OnRunStartResponse(object): """ def __init__(self, action, debug_urls): - """Constructor of OnRunStartResponse. + """Constructor of `OnRunStartResponse`. Args: - action: (OnRunStartAction) the action actually taken by the wrapped + action: (`OnRunStartAction`) the action actually taken by the wrapped session for the run() call. debug_urls: (list of str) debug_urls used in watching the tensors during the run() call. @@ -260,10 +260,10 @@ class OnRunEndRequest(object): run_metadata=None, client_graph_def=None, tf_error=None): - """Constructor for OnRunEndRequest. + """Constructor for `OnRunEndRequest`. Args: - performed_action: (OnRunStartAction) Actually-performed action by the + performed_action: (`OnRunStartAction`) Actually-performed action by the debug-wrapper session. run_metadata: run_metadata output from the run() call (if any). client_graph_def: (GraphDef) GraphDef from the client side, i.e., from @@ -303,13 +303,13 @@ class BaseDebugWrapperSession(session.SessionInterface): # is available. def __init__(self, sess): - """Constructor of BaseDebugWrapperSession. + """Constructor of `BaseDebugWrapperSession`. Args: sess: An (unwrapped) TensorFlow session instance. Raises: - ValueError: On invalid OnSessionInitAction value. + ValueError: On invalid `OnSessionInitAction` value. """ _check_type(sess, session.BaseSession) @@ -352,16 +352,16 @@ class BaseDebugWrapperSession(session.SessionInterface): """Wrapper around Session.run() that inserts tensor watch options. Args: - fetches: Same as the fetches arg to regular Session.run() - feed_dict: Same as the feed_dict arg to regular Session.run() - options: Same as the options arg to regular Session.run() - run_metadata: Same as the run_metadata to regular Session.run() + fetches: Same as the `fetches` arg to regular `Session.run()`. + feed_dict: Same as the `feed_dict` arg to regular `Session.run()`. + options: Same as the `options` arg to regular `Session.run()`. + run_metadata: Same as the `run_metadata` arg to regular `Session.run()`. Returns: - Simply forwards the output of the wrapped Session.run() call. + Simply forwards the output of the wrapped `Session.run()` call. Raises: - ValueError: On invalid OnRunStartAction value. + ValueError: On invalid `OnRunStartAction` value. """ self._run_call_count += 1 @@ -458,11 +458,11 @@ class BaseDebugWrapperSession(session.SessionInterface): The invocation happens right before the constructor ends. Args: - request: (OnSessionInitRequest) callback request carrying information + request: (`OnSessionInitRequest`) callback request carrying information such as the session being wrapped. Returns: - An instance of OnSessionInitResponse. + An instance of `OnSessionInitResponse`. """ @abc.abstractmethod @@ -474,12 +474,13 @@ class BaseDebugWrapperSession(session.SessionInterface): after an increment of run call counter. Args: - request: (OnRunStartRequest) callback request object carrying information - about the run call such as the fetches, feed dict, run options, run - metadata, and how many run() calls to this wrapper session has occurred. + request: (`OnRunStartRequest`) callback request object carrying + information about the run call such as the fetches, feed dict, run + options, run metadata, and how many `run()` calls to this wrapper + session have occurred. Returns: - An instance of OnRunStartResponse, carrying information to + An instance of `OnRunStartResponse`, carrying information to 1) direct the wrapper session to perform a specified action (e.g., run with or without debug tensor watching, invoking the stepper.) 2) debug URLs used to watch the tensors. @@ -493,12 +494,12 @@ class BaseDebugWrapperSession(session.SessionInterface): The invocation happens right before the wrapper exits its run() call. Args: - request: (OnRunEndRequest) callback request object carrying information + request: (`OnRunEndRequest`) callback request object carrying information such as the actual action performed by the session wrapper for the run() call. Returns: - An instance of OnRunStartResponse. + An instance of `OnRunStartResponse`. """ def __enter__(self): diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper.py b/tensorflow/python/debug/wrappers/local_cli_wrapper.py index ef57fdbe7ff..1b966e110fe 100644 --- a/tensorflow/python/debug/wrappers/local_cli_wrapper.py +++ b/tensorflow/python/debug/wrappers/local_cli_wrapper.py @@ -37,18 +37,23 @@ _DUMP_ROOT_PREFIX = "tfdbg_" class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): - """Concrete subclass of BaseDebugWrapperSession implementing a local CLI.""" + """Concrete subclass of BaseDebugWrapperSession implementing a local CLI. + + This class has all the methods that a `session.Session` object has, in order + to support debugging with minimal code changes. Invoking its `run()` method + will launch the command-line interface (CLI) of tfdbg. + """ def __init__(self, sess, dump_root=None, log_usage=True): """Constructor of LocalCLIDebugWrapperSession. Args: - sess: (BaseSession subtypes) The TensorFlow Session object being wrapped. - dump_root: (str) Optional path to the dump root directory. Must be either - a directory that does not exist or an empty directory. If the directory + sess: The TensorFlow `Session` object being wrapped. + dump_root: (`str`) optional path to the dump root directory. Must be a + directory that does not exist or an empty directory. If the directory does not exist, it will be created by the debugger core during debug - run() calls and removed afterwards. - log_usage: (bool) Whether the usage of this class is to be logged. + `run()` calls and removed afterwards. + log_usage: (`bool`) whether the usage of this class is to be logged. Raises: ValueError: If dump_root is an existing and non-empty directory or if @@ -137,14 +142,10 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): def add_tensor_filter(self, filter_name, tensor_filter): """Add a tensor filter. - The signature of this command is identical to that of - debug_data.DebugDumpDir.add_tensor_filter(). This method is a thin wrapper - around that method. - Args: - filter_name: (str) Name of the filter. - tensor_filter: (callable) The filter callable. See the doc string of - debug_data.DebugDumpDir.add_tensor_filter() for more details. + filter_name: (`str`) name of the filter. + tensor_filter: (`callable`) the filter callable. See the doc string of + `DebugDumpDir.find()` for more details about its signature. """ self._tensor_filters[filter_name] = tensor_filter @@ -153,7 +154,7 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): """Overrides on-session-init callback. Args: - request: An instance of OnSessionInitRequest. + request: An instance of `OnSessionInitRequest`. Returns: An instance of OnSessionInitResponse. @@ -166,13 +167,13 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): """Overrides on-run-start callback. Invoke the CLI to let user choose what action to take: - run / run --no_debug / step. + `run` / `invoke_stepper`. Args: - request: An instance of OnSessionInitRequest. + request: An instance of `OnSessionInitRequest`. Returns: - An instance of OnSessionInitResponse. + An instance of `OnSessionInitResponse`. Raises: RuntimeError: If user chooses to prematurely exit the debugger. @@ -483,10 +484,11 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession): """Overrides method in base class to implement interactive node stepper. Args: - node_stepper: (stepper.NodeStepper) The underlying NodeStepper API object. - restore_variable_values_on_exit: (bool) Whether any variables whose values - have been altered during this node-stepper invocation should be restored - to their old values when this invocation ends. + node_stepper: (`stepper.NodeStepper`) The underlying NodeStepper API + object. + restore_variable_values_on_exit: (`bool`) Whether any variables whose + values have been altered during this node-stepper invocation should be + restored to their old values when this invocation ends. Returns: The same return values as the `Session.run()` call on the same fetches as diff --git a/tensorflow/python/framework/gen_docs_combined.py b/tensorflow/python/framework/gen_docs_combined.py index 4e3f242b949..377ccb7c9b6 100644 --- a/tensorflow/python/framework/gen_docs_combined.py +++ b/tensorflow/python/framework/gen_docs_combined.py @@ -25,6 +25,7 @@ import sys import tensorflow as tf from tensorflow.contrib import ffmpeg +from tensorflow.python import debug as tf_debug from tensorflow.python.client import client_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import docs @@ -79,6 +80,7 @@ def module_names(): "tf.contrib.solvers", "tf.contrib.training", "tf.contrib.util", + "tf_debug", ] @@ -89,6 +91,8 @@ def find_module(base_module, name): # to size concerns. elif name == "tf.contrib.ffmpeg": return ffmpeg + elif name == "tf_debug": + return tf_debug elif name.startswith("tf."): subname = name[3:] subnames = subname.split(".") @@ -240,6 +244,7 @@ def all_libraries(module_to_name, members, documented): library("contrib.util", "Utilities (contrib)", tf.contrib.util), library("contrib.copy_graph", "Copying Graph Elements (contrib)", tf.contrib.copy_graph), + library("tf_debug", "TensorFlow Debugger", tf_debug), ]) _hidden_symbols = ["Event", "LogMessage", "Summary", "SessionLog", "xrange",