Merge pull request #44433 from Molkree:typos_in_python_dir

PiperOrigin-RevId: 339937575 Change-Id: I1d80b244adcb27a429bc09c63434847107d23bee
2020-10-30 14:21:00 -07:00 · 2020-10-30 14:21:00 -07:00 · cb46f059a5
commit cb46f059a5
parent 84384703c0 2381ee56d9
55 changed files with 165 additions and 168 deletions
--- a/tensorflow/python/autograph/converters/directives.py
+++ b/tensorflow/python/autograph/converters/directives.py
@ -24,7 +24,7 @@ is, they do not change at runtime. So if you do something like this:
  tf.autograph.set_loop_options = <new function>

 Then the directive will may no longer be recognized. Furthermore, if the
-converted function is cached, such an action action may be irreversible.
+converted function is cached, such an action may be irreversible.
 """

 from __future__ import absolute_import
@ -71,7 +71,7 @@ def _map_args(call_node, function):
  # Keyword arguments not specified in kwds will be mapped to their defaults,
  # which are Python values. Since we don't currently have a way to transform
  # those into AST references, we simply remove them. By convention, directives
-  # use UNSPECIFIED as default value for for optional arguments. No other
+  # use UNSPECIFIED as default value for optional arguments. No other
  # defaults should be present.
  unexpected_defaults = []
  for k in call_args:
--- a/tensorflow/python/autograph/converters/return_statements.py
+++ b/tensorflow/python/autograph/converters/return_statements.py
@ -41,7 +41,7 @@ class _RewriteBlock(object):


 class ConditionalReturnRewriter(converter.Base):
-  """Rewrites a a pattern where it's unobvious that all paths return a value.
+  """Rewrites a pattern where it's unobvious that all paths return a value.

  This rewrite allows avoiding intermediate None return values.

@ -355,7 +355,7 @@ class ReturnStatementsTransformer(converter.Base):
        if block.return_used:

          if self.allow_missing_return:
-            # The function whould have a single `with` node that wraps the
+            # The function would have a single `with` node that wraps the
            # entire body. If the function had a docstring, the body has two
            # nodes, with the `with` as the second node.
            wrapper_node = node.body[-1]
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@ -143,7 +143,7 @@ class ConversionOptions(object):
      classes that the converted function may use.
    user_requested: bool, whether the conversion was explicitly requested by
      the user, as opposed to being performed as a result of other logic. This
-      value always auto-resets resets to False in child conversions.
+      value always auto-resets to False in child conversions.
    optional_features: Union[Feature, Set[Feature]], controls the use of
      optional features in the conversion process. See Feature for available
      options.
--- a/tensorflow/python/autograph/g3doc/reference/control_flow.md
+++ b/tensorflow/python/autograph/g3doc/reference/control_flow.md
@ -264,9 +264,10 @@ for i in tf.stack(l):
 ```

 <!-- TODO(mdan): List this under limitations -->
-Caution: A loop in which the type of the condition condition changes across
-iterations, in a way that would influence the way the loop is executed, is not
-allowed in AutoGraph.
+
+Caution: A loop in which the type of the condition changes across iterations, in
+a way that would influence the way the loop is executed, is not allowed in
+AutoGraph.

 For example, the loop below will generate an error. After the first iteration,
 `i` becomes a tf.Tensor, because
--- a/tensorflow/python/autograph/g3doc/reference/functions.md
+++ b/tensorflow/python/autograph/g3doc/reference/functions.md
@ -45,7 +45,7 @@ are handled correctly.
 The following types of functions are not converted:

 *   functions already converted
-*   functions defined in in a allowlisted module (see autograph/core/config.py)
+*   functions defined in a allowlisted module (see autograph/core/config.py)
 *   non-Python functions (such as native bindings)
 *   `print`, `pdb.set_trace`, `ipdb.set_trace`
 *   most built-in functions (exceptions are listed in
--- a/tensorflow/python/autograph/g3doc/reference/limitations.md
+++ b/tensorflow/python/autograph/g3doc/reference/limitations.md
@ -376,8 +376,7 @@ l()  # Prints 0!
 ```

 Note that none of these restrictions only apply to TensorFlow loops; Python
-loops correctly correctly handle closures in all cases.
-
+loops correctly handle closures in all cases.

 ### Python collections in TensorFlow control flow

--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@ -57,7 +57,7 @@ def _is_known_loaded_type(f, module_name, entity_name):
    return True
  # Note: inspect is required here, to avoid unpacking tf.function decorators.
  if inspect.ismethod(f):
-    # The the unbound method if of this type. Example:
+    # The unbound method if of this type. Example:
    #
    # class ClassType:
    #   @function
--- a/tensorflow/python/autograph/pyct/cfg.py
+++ b/tensorflow/python/autograph/pyct/cfg.py
@ -268,7 +268,7 @@ class GraphBuilder(object):
  nodes and their subsequent statements.

  Important concepts:
-   * nodes - nodes refer refer to CFG nodes; AST nodes are qualified explicitly
+   * nodes - nodes refer to CFG nodes; AST nodes are qualified explicitly
   * leaf set - since the graph is constructed gradually, a leaf set maintains
     the CFG nodes that will precede the node that the builder expects to
     receive next; when an ordinary node is added, it is connected to the
--- a/tensorflow/python/autograph/pyct/parser.py
+++ b/tensorflow/python/autograph/pyct/parser.py
@ -339,7 +339,7 @@ def parse(src, preamble_len=0, single_node=True):
    nodes = nodes[preamble_len:]
  if single_node:
    if len(nodes) != 1:
-      raise ValueError('expected exactly one node node, found {}'.format(nodes))
+      raise ValueError('expected exactly one node, found {}'.format(nodes))
    return nodes[0]
  return nodes

@ -370,7 +370,7 @@ def unparse(node, indentation=None, include_encoding_marker=True):
    node: The code to compile, as an AST object.
    indentation: Unused, deprecated. The returning code will always be indented
      at 4 spaces.
-    include_encoding_marker: Bool, thether to include a comment on the first
+    include_encoding_marker: Bool, whether to include a comment on the first
      line to explicitly specify UTF-8 encoding.

  Returns:
--- a/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_py3_test.py
+++ b/tensorflow/python/autograph/pyct/static_analysis/reaching_definitions_py3_test.py
@ -78,7 +78,7 @@ class ReachingDefinitionsAnalyzerTest(

    self.assertSameDef(local_body[1].test, local_body[2].value.elts[0])

-    # Note: the function name is is visible inside the function body. But it's
+    # Note: the function name is visible inside the function body. But it's
    # a closure variable, not a local.
    #
    # Example:
--- a/tensorflow/python/autograph/pyct/transpiler.py
+++ b/tensorflow/python/autograph/pyct/transpiler.py
@ -80,9 +80,9 @@ def _wrap_into_factory(nodes, entity_name, inner_factory_name,
        return inner_factory

  The lexical scoping is created using dummy symbol declarations which create
-  local fariables in the body of the outer factory, so that the Python parser
+  local variables in the body of the outer factory, so that the Python parser
  correctly marks them as free non-global variables upon load (that is, it
-  creates cell slots for each symbol. Thes symbols are initialized with None,
+  creates cell slots for each symbol. These symbols are initialized with None,
  but their values are not expected to be used; instead, the caller is expected
  to replace them with the cells of the source entity. For more details, see:
  https://docs.python.org/3/reference/executionmodel.html#binding-of-names
@ -277,7 +277,7 @@ class GenericTranspiler(object):
      user_context: An opaque object (may be None) that is forwarded to
        transform_ast, through the ctx.user_context argument.
    Returns:
-      Tre result of calling transform_function.
+      The result of calling transform_function.

    Raises:
      NotImplementedError: if the type of obj is not handled.
@ -288,7 +288,7 @@ class GenericTranspiler(object):
    raise NotImplementedError('Non-function: {}'.format(type(obj)))

  def _erase_arg_defaults(self, node):
-    """Erase argde fault expressions, which would otherwise be unbound."""
+    """Erase arg default expressions, which would otherwise be unbound."""
    args = node.args
    for i in range(len(args.defaults)):
      args.defaults[i] = parser.parse_expression('None')
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@ -741,7 +741,7 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable,

    The elements generated by `generator` must be compatible with either the
    given `output_signature` argument or with the given `output_types` and
-    (optionally) `output_shapes` arguments, whichiver was specified.
+    (optionally) `output_shapes` arguments, whichever was specified.

    The recommended way to call `from_generator` is to use the
    `output_signature` argument. In this case the output will be assumed to
@ -765,8 +765,8 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable,
    There is also a deprecated way to call `from_generator` by either with
    `output_types` argument alone or together with `output_shapes` argument.
    In this case the output of the function will be assumed to consist of
-    `tf.Tensor` objects with with the types defined by `output_types` and with
-    the shapes which are either unknown or defined by `output_shapes`.
+    `tf.Tensor` objects with the types defined by `output_types` and with the
+    shapes which are either unknown or defined by `output_shapes`.

    Note: The current implementation of `Dataset.from_generator()` uses
    `tf.numpy_function` and inherits the same constraints. In particular, it
@ -1655,7 +1655,7 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable,
    """
    if padded_shapes is None:
      padded_shapes = get_legacy_output_shapes(self)
-      # A `tf.TensorShape` only is only falsey if its *rank* is unknown:
+      # A `tf.TensorShape` is only false if its *rank* is unknown:
      # bool(tf.TensorShape(None)) is False
      if not all(nest.flatten(padded_shapes)):
        raise ValueError("You must set the `padded_shapes` argument to "
@ -2922,7 +2922,7 @@ def get_legacy_output_types(dataset_or_iterator):
    dataset_or_iterator: A `tf.data.Dataset` or `tf.data.Iterator`.

  Returns:
-    A nested structure of `tf.DType` objects objects matching the structure of
+    A nested structure of `tf.DType` objects matching the structure of
    dataset / iterator elements and specifying the shape of the individual
    components.
  """
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@ -35,7 +35,7 @@ the same way with eager and graph execution.

  The tutorials cover how to use `tf.distribute.Strategy` to do distributed
  training with native Keras APIs, custom training loops,
-  and Esitmator APIs. They also cover how to save/load model when using
+  and Estimator APIs. They also cover how to save/load model when using
  `tf.distribute.Strategy`.

 *Glossary*
@ -80,7 +80,7 @@ the same way with eager and graph execution.
  parameters/variables, used by some strategies (right now just
  `tf.distribute.experimental.ParameterServerStrategy`). All replicas that want
  to operate on a variable retrieve it at the beginning of a step and send an
-  update to be applied at the end of the step. These can in priniciple support
+  update to be applied at the end of the step. These can in principle support
  either sync or async training, but right now we only have support for async
  training with parameter servers. Compare to
  `tf.distribute.experimental.CentralStorageStrategy`, which puts all variables
@ -442,7 +442,7 @@ class InputReplicationMode(enum.Enum):
    Replicas will dequeue from the local Dataset on their worker.
    `tf.distribute.Strategy` doesn't manage any state sharing between such
    separate input pipelines.
-  * `PER_REPLICA`: The input function will be called on each replica seperately.
+  * `PER_REPLICA`: The input function will be called on each replica separately.
    `tf.distribute.Strategy` doesn't manage any state sharing between such
    separate input pipelines.
  """
@ -686,7 +686,7 @@ class StrategyBase(object):
  See [the guide](https://www.tensorflow.org/guide/distributed_training)
  for overview and examples. See `tf.distribute.StrategyExtended` and
  [`tf.distribute`](https://www.tensorflow.org/api_docs/python/tf/distribute)
-  for a glossory of concepts mentioned on this page such as "per-replica",
+  for a glossary of concepts mentioned on this page such as "per-replica",
  _replica_, and _reduce_.

  In short:
@ -1253,7 +1253,7 @@ class StrategyBase(object):

    with self.scope():
      # tf.distribute supports Eager functions, so AutoGraph should not be
-      # applied when when the caller is also in Eager mode.
+      # applied when the caller is also in Eager mode.
      fn = autograph.tf_convert(
          fn, autograph_ctx.control_status_ctx(), convert_by_default=False)
      return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
@ -2566,7 +2566,7 @@ class StrategyExtendedV2(object):
    Multi-worker training refers to the setup where the training is
    distributed across multiple workers, as opposed to the case where
    only a local process performs the training. This function is
-    used by higher-level apis such as Keras' `model.fit()` to infer
+    used by higher-level APIs such as Keras' `model.fit()` to infer
    for example whether or not a distribute coordinator should be run,
    and thus TensorFlow servers should be started for communication
    with other servers in the cluster, or whether or not saving/restoring
@ -2878,7 +2878,7 @@ class ReplicaContextBase(object):
      raise ValueError(
          "replica_id_in_sync_group can only be an integer, a Tensor or None.")
    self._replica_id_in_sync_group = replica_id_in_sync_group
-    # We need this check becaused TPUContext extends from ReplicaContext and
+    # We need this check because TPUContext extends from ReplicaContext and
    # does not pass a strategy object since it is used by TPUEstimator.
    if strategy:
      self._local_replica_id = strategy.extended._get_local_replica_id(
--- a/tensorflow/python/distribute/input_lib.py
+++ b/tensorflow/python/distribute/input_lib.py
@ -901,7 +901,7 @@ class DistributedIterator(DistributedIteratorBase,
    # (whose batch dimension may also be None). This is because with partial
    # batching handling we could always produce empty batches.
    #
-    # TODO(b/163362689): avoid this once we have more elegent way to handle
+    # TODO(b/163362689): avoid this once we have more elegant way to handle
    # retracing and collectives.
    if (self._enable_get_next_as_optional and
        self._strategy.extended._in_multi_worker_mode()):  # pylint: disable=protected-access
@ -1132,7 +1132,7 @@ class DistributedDataset(_IterableInput):
    # (whose batch dimension may also be None). This is because with partial
    # batching handling we could always produce empty batches.
    #
-    # TODO(b/163362689): avoid this once we have more elegent way to handle
+    # TODO(b/163362689): avoid this once we have more elegant way to handle
    # retracing and collectives.
    if (self._enable_get_next_as_optional and
        self._strategy.extended._in_multi_worker_mode()):  # pylint: disable=protected-access
@ -1312,7 +1312,7 @@ class DistributedDatasetsFromFunction(_IterableInput):
    # (whose batch dimension may also be None). This is because with partial
    # batching handling we could always produce empty batches.
    #
-    # TODO(b/163362689): avoid this once we have more elegent way to handle
+    # TODO(b/163362689): avoid this once we have more elegant way to handle
    # retracing and collectives.
    if (self._enable_get_next_as_optional and
        self._strategy.extended._in_multi_worker_mode()):  # pylint: disable=protected-access
@ -1369,7 +1369,7 @@ class DistributedDatasetsFromFunctionV1(DistributedDatasetsFromFunction):
                       "or when eager execution is enabled.")


-# TODO(anjalisridhar): This class will be soon be removed in favor of newer
+# TODO(anjalisridhar): This class will be soon removed in favor of newer
 # APIs.
 class InputFunctionIterator(DistributedIteratorV1):
  """Iterator created from input function."""
@ -1596,7 +1596,7 @@ class _SingleWorkerDatasetIteratorBase(object):
    """Get next element from the underlying iterator.

    Runs the iterator get_next() within a device scope. Since this doesn't use
-    get_next_as_optional(), is is considerably faster than get_next_as_list()
+    get_next_as_optional(), it is considerably faster than get_next_as_list()
    (but can only be used when the shapes are static).

    Args:
@ -2142,7 +2142,7 @@ def _enable_get_next_as_optional(strategy, dataset):
    return False

  if context.executing_eagerly():
-    # If the dataset is inifinite, we don't need to enable last partial batch
+    # If the dataset is infinite, we don't need to enable last partial batch
    # support. Currently the logic only applies to the case that distributed
    # dataset is created in eager mode, as we need to evaluate the dataset
    # cardinality.
@ -2181,7 +2181,7 @@ def _create_per_replica(value_list, strategy, get_next_as_optional):
  # (whose batch dimension may also be None). This is because with partial
  # batching handling we could always produce empty batches.
  #
-  # TODO(b/163362689): avoid this once we have more elegent way to handle
+  # TODO(b/163362689): avoid this once we have more elegant way to handle
  # retracing and collectives.
  if (get_next_as_optional and strategy.extended._in_multi_worker_mode()):  # pylint: disable=protected-access
    # Use expand_composites=False since we don't want to expand PerReplica,
--- a/tensorflow/python/distribute/input_lib_test.py
+++ b/tensorflow/python/distribute/input_lib_test.py
@ -897,7 +897,7 @@ class DistributedIteratorTest(DistributedIteratorTestBase,
        feature = data["feature"]
        label = data["label"]

-        # Asser the shapes are still staic from all replicas.
+        # Assert the shapes are still static from all replicas.
        for replica_id in range(len(distribution.extended.worker_devices)):
          self.assertEqual([per_replica_batch_size, 10],
                           feature[replica_id].shape)
--- a/tensorflow/python/distribute/parameter_server_strategy_test.py
+++ b/tensorflow/python/distribute/parameter_server_strategy_test.py
@ -191,7 +191,7 @@ class ParameterServerStrategyTestBase(
          g = e + 1.0
        self.assertEqual(g.device, worker_device + '/device:CPU:1')

-        # Ths ops.colocate_with will be ignored when defining a variable but not
+        # This ops.colocate_with will be ignored when defining a variable but not
        # for a normal tensor.
        with ops.colocate_with(x):
          u = variable_scope.get_variable('u', initializer=30.0)
@ -345,7 +345,7 @@ class ParameterServerStrategyTestBase(
          g = e + 1.0
        self.assertEqual(g.device, device_util.canonicalize('/device:CPU:1'))

-        # Ths ops.colocate_with will be ignored when defining a variable but not
+        # This ops.colocate_with will be ignored when defining a variable but not
        # for a normal tensor.
        with ops.colocate_with(x):
          u = variable_scope.get_variable('u', initializer=30.0)
--- a/tensorflow/python/distribute/test_util.py
+++ b/tensorflow/python/distribute/test_util.py
@ -82,8 +82,8 @@ def set_logical_devices_to_at_least(device, num):
          context.LogicalDeviceConfiguration(memory_limit=2048))
    else:
      logical_devices.append(context.LogicalDeviceConfiguration())
-  # Create logical devices from the the last device since sometimes the first
-  # GPU is the primary graphic card and may has less memory available.
+  # Create logical devices from the last device since sometimes the first GPU
+  # is the primary graphic card and may have less memory available.
  config.set_logical_device_configuration(physical_devices[-1], logical_devices)


--- a/tensorflow/python/distribute/tpu_strategy.py
+++ b/tensorflow/python/distribute/tpu_strategy.py
@ -338,8 +338,8 @@ class TPUStrategyV2(distribute_lib.Strategy):
    """Adds annotation that `tensor` will be split across logical devices.

    This adds an annotation to tensor `tensor` specifying that operations on
-    `tensor` will be be split among multiple logical devices. Tensor `tensor`
-    will be split across dimensions specified by `partition_dimensions`.
+    `tensor` will be split among multiple logical devices. Tensor `tensor` will
+    be split across dimensions specified by `partition_dimensions`.
    The dimensions of `tensor` must be divisible by corresponding value in
    `partition_dimensions`.

@ -799,7 +799,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
        raise ValueError(
            "Found tensor {} with spec {}. TPUStrategy does not support "
            "distributed datasets with device prefetch when using sparse or "
-            "ragged tensors. If you indend to use sparse or ragged tensors, "
+            "ragged tensors. If you intend to use sparse or ragged tensors, "
            "please pass a tf.distribute.InputOptions object with "
            "experimental_prefetch_to_device set to False to your dataset "
            "distribution function.".format(path, type(spec)))
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@ -633,7 +633,7 @@ class Context(object):
    """Sync both local executors and the ones on remote workers.

    In async execution mode, local function calls can return before the
-    coresponding remote op/function execution requests are completed. Calling
+    corresponding remote op/function execution requests are completed. Calling
    this method creates a synchronization barrier for remote executors. It only
    returns when all remote pending nodes are finished, potentially with errors
    if any remote executors are in error state.
@ -2281,7 +2281,7 @@ def async_scope():
  execution, potentially raising exceptions if async execution results in
  an error state.

-  Users may write the following code to asynchronuously invoke `train_step_fn`
+  Users may write the following code to asynchronously invoke `train_step_fn`
  and log the `loss` metric for every `num_steps` steps in a training loop.
  `train_step_fn` internally consumes data using `iterator.get_next()`, and may
  throw OutOfRangeError when running out of data. In the case:
--- a/tensorflow/python/eager/forwardprop_test.py
+++ b/tensorflow/python/eager/forwardprop_test.py
@ -473,7 +473,7 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
  def testFusedBatchNormGradsInference(self):

    if test.is_built_with_rocm():
-      # This test was addeded recently and has been failing on the ROCm
+      # This test was added recently and has been failing on the ROCm
      # platform, since it was added.
      # TODO(rocm): do root cause analysis of test failure and fix it.
      self.skipTest("Test fails on ROCm platform, needs further analysis")
@ -760,7 +760,7 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
      [("ForwardPropFirst", True),
       ("TapeFirst", False)])
  def testForwardOverBackwardMemoryEfficiency(self, forward_prop_first):
-    # Watching depends depends on nesting, not creation order
+    # Watching depends on nesting, not creation order
    c = constant_op.constant(1.)
    if forward_prop_first:
      forward_accumulator = forwardprop.ForwardAccumulator(c, .1)
@ -793,7 +793,7 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
       ("TapeFirst", False)])
  def testBackwardOverForward(self, forward_prop_first):
    c = constant_op.constant(1.)
-    # Watching depends depends on nesting, not creation order
+    # Watching depends on nesting, not creation order
    if forward_prop_first:
      forward_accumulator = forwardprop.ForwardAccumulator(c, .1)
      gradient_tape = backprop.GradientTape()
--- a/tensorflow/python/framework/convert_to_constants.py
+++ b/tensorflow/python/framework/convert_to_constants.py
@ -294,7 +294,7 @@ class _Node(_Convertible):
      The object referred to by 'input_name'.
    """

-    # The logic below oversimplifes the semantics, but is good enough for the
+    # The logic below oversimplifies the semantics, but is good enough for the
    # purposes of converting to constants. The introduction of new types of
    # operations may change this, forcing the code to be more generic.
    #
@ -786,7 +786,7 @@ class _FunctionConverterData(_ConverterData):
      func: ConcreteFunction.
      lower_control_flow: Boolean indicating whether or not to lower control
        flow ops such as If and While.
-      aggressive_inlining: Boolean indicating whether or not to to aggressive
+      aggressive_inlining: Boolean indicating whether or not to do aggressive
        function inlining (might be unsafe if function has stateful ops, not
        properly connected to control outputs).
      variable_names_allowlist: The set of variable names to convert (by
@ -918,7 +918,7 @@ def _run_inline_graph_optimization(func, lower_control_flow,
    func: ConcreteFunction.
    lower_control_flow: Boolean indicating whether or not to lower control flow
      ops such as If and While. (default True)
-    aggressive_inlining: Boolean indicating whether or not to to aggressive
+    aggressive_inlining: Boolean indicating whether or not to do aggressive
      function inlining (might be unsafe if function has stateful ops not
      properly connected to control outputs).

@ -1057,7 +1057,7 @@ def convert_variables_to_constants_v2(func,
    func: ConcreteFunction.
    lower_control_flow: Boolean indicating whether or not to lower control flow
      ops such as If and While. (default True)
-    aggressive_inlining: Boolean indicating whether or not to to aggressive
+    aggressive_inlining: Boolean indicating whether or not to do aggressive
      function inlining (might be unsafe if function has stateful ops, not
      properly connected to control outputs). (default False)

@ -1090,7 +1090,7 @@ def convert_variables_to_constants_v2_as_graph(func,
    func: ConcreteFunction.
    lower_control_flow: Boolean indicating whether or not to lower control flow
      ops such as If and While. (default True)
-    aggressive_inlining: Boolean indicating whether or not to to aggressive
+    aggressive_inlining: Boolean indicating whether or not to do aggressive
      function inlining (might be unsafe if function has stateful ops, not
      properly connected to control outputs).

--- a/tensorflow/python/framework/op_callbacks.py
+++ b/tensorflow/python/framework/op_callbacks.py
@ -170,7 +170,7 @@ def invoke_op_callbacks(op_type,
      eager execution and are non-eager `Tensor`s in the case of graph
      construction.
    op_name: Name of the op. Applicable if and only if this method is invoked
-      due to the graph construction of an op or the eager execution of of a
+      due to the graph construction of an op or the eager execution of a
      `FuncGraph`.
    graph: The graph involved (if any).
      - In the case if the eager execution of an op or FuncGraph, this is
--- a/tensorflow/python/keras/applications/densenet.py
+++ b/tensorflow/python/keras/applications/densenet.py
@ -33,22 +33,22 @@ from tensorflow.python.lib.io import file_io
 from tensorflow.python.util.tf_export import keras_export


-BASE_WEIGTHS_PATH = ('https://storage.googleapis.com/tensorflow/'
+BASE_WEIGHTS_PATH = ('https://storage.googleapis.com/tensorflow/'
                     'keras-applications/densenet/')
 DENSENET121_WEIGHT_PATH = (
-    BASE_WEIGTHS_PATH + 'densenet121_weights_tf_dim_ordering_tf_kernels.h5')
+    BASE_WEIGHTS_PATH + 'densenet121_weights_tf_dim_ordering_tf_kernels.h5')
 DENSENET121_WEIGHT_PATH_NO_TOP = (
-    BASE_WEIGTHS_PATH +
+    BASE_WEIGHTS_PATH +
    'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5')
 DENSENET169_WEIGHT_PATH = (
-    BASE_WEIGTHS_PATH + 'densenet169_weights_tf_dim_ordering_tf_kernels.h5')
+    BASE_WEIGHTS_PATH + 'densenet169_weights_tf_dim_ordering_tf_kernels.h5')
 DENSENET169_WEIGHT_PATH_NO_TOP = (
-    BASE_WEIGTHS_PATH +
+    BASE_WEIGHTS_PATH +
    'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5')
 DENSENET201_WEIGHT_PATH = (
-    BASE_WEIGTHS_PATH + 'densenet201_weights_tf_dim_ordering_tf_kernels.h5')
+    BASE_WEIGHTS_PATH + 'densenet201_weights_tf_dim_ordering_tf_kernels.h5')
 DENSENET201_WEIGHT_PATH_NO_TOP = (
-    BASE_WEIGTHS_PATH +
+    BASE_WEIGHTS_PATH +
    'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5')

 layers = VersionAwareLayers()
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@ -116,7 +116,7 @@ PER_GRAPH_OBJECT_NAME_UIDS = weakref.WeakKeyDictionary()


 # A global set tracking what object names have been seen so far.
-# Optionally used as an avoid-list when generaing names
+# Optionally used as an avoid-list when generating names
 OBSERVED_NAMES = set()


@ -450,7 +450,7 @@ def deprecated_internal_set_learning_phase(value):
  This method is an internal-only version of `set_learning_phase` that
  does not raise a deprecation error. It is required because
  saved_model needs to keep working with user code that uses the deprecated
-  learning phase methods until those apis are fully removed from the public api.
+  learning phase methods until those APIs are fully removed from the public API.

  Specifically SavedModel saving needs to make sure the learning phase is 0
  during tracing even if users overwrote it to a different value.
@ -516,7 +516,7 @@ def deprecated_internal_learning_phase_scope(value):
  with code that sets/gets the learning phase, but saved model
  saving itself shouldn't raise a deprecation warning.

-  We can get rid of this method and its usages when the public api is
+  We can get rid of this method and its usages when the public API is
  removed.

  Arguments:
@ -894,7 +894,7 @@ def _is_current_explicit_device(device_type):


 def _get_available_gpus():
-  """Get a list of available gpu devices (formatted as strings).
+  """Get a list of available GPU devices (formatted as strings).

  Returns:
      A list of available GPU devices.
@ -6582,9 +6582,9 @@ class ContextValueCache(weakref.WeakKeyDictionary):

  This class is similar to defaultdict, where values may be produced by the
  default factory specified during initialization. This class also has a default
-  value for the key (when key is `None`) -- the key is set to the the current
-  graph or eager context. The default factories for key and value are only used
-  in `__getitem__` and `setdefault`. The `.get()` behavior remains the same.
+  value for the key (when key is `None`) -- the key is set to the current graph
+  or eager context. The default factories for key and value are only used in
+  `__getitem__` and `setdefault`. The `.get()` behavior remains the same.

  This object will return the value of the current graph or closest parent graph
  if the current graph is a function. This is to reflect the fact that if a
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@ -792,9 +792,8 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
    """
    def check_type_return_shape(s):
      if not isinstance(s, tensor_spec.TensorSpec):
-        raise TypeError(
-            'Only TensorSpec signature types are supported, '
-            'but saw signature signature entry: {}.'.format(s))
+        raise TypeError('Only TensorSpec signature types are supported, '
+                        'but saw signature entry: {}.'.format(s))
      return s.shape
    input_shape = nest.map_structure(check_type_return_shape, input_signature)
    output_shape = self.compute_output_shape(input_shape)
@ -872,7 +871,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
          keras_tensor.keras_tensor_from_tensor, outputs)

    if hasattr(self, '_set_inputs') and not self.inputs:
-      # TODO(kaftan): figure out if we ned to do this at all
+      # TODO(kaftan): figure out if we need to do this at all
      # Subclassed network: explicitly set metadata normally set by
      # a call to self._set_inputs().
      self._set_inputs(inputs, outputs)
--- a/tensorflow/python/keras/engine/base_layer_v1.py
+++ b/tensorflow/python/keras/engine/base_layer_v1.py
@ -600,9 +600,8 @@ class Layer(base_layer.Layer):
    """
    def check_type_return_shape(s):
      if not isinstance(s, tensor_spec.TensorSpec):
-        raise TypeError(
-            'Only TensorSpec signature types are supported, '
-            'but saw signature signature entry: {}.'.format(s))
+        raise TypeError('Only TensorSpec signature types are supported, '
+                        'but saw signature entry: {}.'.format(s))
      return s.shape
    input_shape = nest.map_structure(check_type_return_shape, input_signature)
    output_shape = self.compute_output_shape(input_shape)
@ -993,7 +992,7 @@ class Layer(base_layer.Layer):
    x = tf.keras.layers.Dense(10)(inputs)
    outputs = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs, outputs)
-    # Actvity regularization.
+    # Activity regularization.
    model.add_loss(tf.abs(tf.reduce_mean(x)))
    ```

--- a/tensorflow/python/keras/engine/functional.py
+++ b/tensorflow/python/keras/engine/functional.py
@ -608,8 +608,8 @@ class Functional(training_lib.Model):
  def _conform_to_reference_input(self, tensor, ref_input):
    """Set shape and dtype based on `keras.Input`s."""
    if isinstance(tensor, ops.Tensor):
-      # Allow (None,) and (None, 1) Tensors to be passed interchangably. Use the
-      # shape specified by the `keras.Input`.
+      # Allow (None,) and (None, 1) Tensors to be passed interchangeably. Use
+      # the shape specified by the `keras.Input`.
      t_shape = tensor.shape
      t_rank = t_shape.rank
      ref_shape = ref_input.shape
@ -1117,7 +1117,7 @@ def reconstruct_from_config(config, custom_objects=None, created_layers=None):
    custom_objects: Optional dictionary mapping names (strings) to custom
      classes or functions to be considered during deserialization.
    created_layers: Optional dictionary mapping names to Layer objects. Any
-      layer not in this dictionary will be be created and added to the dict.
+      layer not in this dictionary will be created and added to the dict.
      This function will add new nodes to all layers (excluding InputLayers),
      instead of re-using pre-existing nodes in the layers.

--- a/tensorflow/python/keras/engine/keras_tensor.py
+++ b/tensorflow/python/keras/engine/keras_tensor.py
@ -96,8 +96,8 @@ class KerasTensor(object):
  placeholders.

  In rare cases (such as when directly manipulating shapes using Keras layers),
-  the layer may be able to partially infer the value of of the output in
-  addition to just inferring the signature.
+  the layer may be able to partially infer the value of the output in addition
+  to just inferring the signature.
  When this happens, the returned KerasTensor will also contain the inferred
  value information. Follow-on layers can use this information.
  during their own output signature inference.
@ -117,7 +117,7 @@ class KerasTensor(object):
  Calling a `tf.function` does not support dispatching, so you cannot pass
  `KerasTensor`s as inputs to a `tf.function`.

-  Higher-order apis that take methods which produce tensors (e.g. `tf.while`,
+  Higher-order APIs that take methods which produce tensors (e.g. `tf.while`,
  `tf.map_fn`, `tf.cond`) also do not currently support dispatching. So, you
  cannot directly pass KerasTensors as inputs to these APIs either. If you
  want to use these APIs inside of a Functional model, you must put them inside
--- a/tensorflow/python/keras/layers/recurrent_v2.py
+++ b/tensorflow/python/keras/layers/recurrent_v2.py
@ -444,8 +444,8 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU):
    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

-    # TODO(b/156447398) Investigate why the cuDNN kernel kernel fails with
-    # ragged inputs.
+    # TODO(b/156447398) Investigate why the cuDNN kernel fails with ragged
+    # inputs.
    if is_ragged_input or not self._could_use_gpu_kernel:
      kwargs = {'training': training}
      self._maybe_reset_cell_dropout_mask(self.cell)
@ -812,7 +812,7 @@ def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias,
        false_fn=standard_gru_fn)

  if _use_new_code():
-    # Chooses the implementation dynamicly based on the running device.
+    # Chooses the implementation dynamically based on the running device.
    (last_output, outputs, new_h,
     runtime) = control_flow_ops.execute_fn_for_device(
         {
@ -1162,8 +1162,8 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM):
    input_shape = K.int_shape(inputs)
    timesteps = input_shape[0] if self.time_major else input_shape[1]

-    # TODO(b/156447398) Investigate why the cuDNN kernel kernel fails with
-    # ragged inputs.
+    # TODO(b/156447398) Investigate why the cuDNN kernel fails with ragged
+    # inputs.
    if is_ragged_input or not self._could_use_gpu_kernel:
      # Fall back to use the normal LSTM.
      kwargs = {'training': training}
@ -1626,7 +1626,7 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel,
        false_fn=stardard_lstm_fn)

  if _use_new_code():
-    # Chooses the implementation dynamicly based on the running device.
+    # Chooses the implementation dynamically based on the running device.
    (last_output, outputs, new_h, new_c,
     runtime) = control_flow_ops.execute_fn_for_device(
         {
@ -1693,7 +1693,7 @@ def has_fully_masked_sequence(mask):
  # data. We walk around this issue by rerouting the computation to standard
  # kernel, until the issue on cudnn side has been fixed.
  # For a fully masked sequence, it will contain all Falses. To make it easy to
-  # check, we inverse the boolean, check if any of the seqence has all True.
+  # check, we inverse the boolean, check if any of the sequence has all True.
  return math_ops.reduce_any(
      math_ops.reduce_all(
          math_ops.logical_not(mask),
--- a/tensorflow/python/keras/mixed_precision/policy.py
+++ b/tensorflow/python/keras/mixed_precision/policy.py
@ -89,7 +89,7 @@ class Policy(object):
  >>> layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2)
  >>> layer.compute_dtype  # Equivalent to layer.dtype_policy.compute_dtype
  'float32'
-  >>> # `layer` casts it's inputs to its compute dtype and does computations in
+  >>> # `layer` casts its inputs to its compute dtype and does computations in
  >>> # that dtype.
  >>> y = layer(x)
  >>> y.dtype
@ -245,9 +245,8 @@ class Policy(object):
      dtype = dtypes.as_dtype(name).name
    except TypeError:
      error = ("Cannot convert value %s to a mixed precision Policy. "
-               "Valid policies include include 'mixed_float16', "
-               "'mixed_bfloat16', and the name of any dtype such as "
-               "'float32'." % (name,))
+               "Valid policies include 'mixed_float16', 'mixed_bfloat16', "
+               "and the name of any dtype such as 'float32'." % (name,))
      # six.raise_from suppresses the original TypeError from being raised
      six.raise_from(ValueError(error), None)
    return dtype, dtype
@ -264,7 +263,7 @@ class Policy(object):
    Variable regularizers are run in the variable dtype, not the compute dtype.

    Returns:
-      The variable dtype of this policy, as a string
+      The variable dtype of this policy, as a string.
    """
    return self._variable_dtype

@ -328,7 +327,7 @@ class PolicyV1(Policy):
  The difference between this class and the non-experimental class is that this
  class has a `loss_scale` field and the non-experimental class does not. The
  loss scale is only used by `tf.keras.Model.compile`, which automatically wraps
-  the optimizer with a `LossScaleOptimizer` if the optimzier is not already a
+  the optimizer with a `LossScaleOptimizer` if the optimizer is not already a
  `LossScaleOptimizer`. For the non-experimental Policy class, `Model.compile`
  instead wraps the optimizer with a `LossScaleOptimizer` if `Policy.name` is
  "mixed_float16".
@ -337,7 +336,7 @@ class PolicyV1(Policy):
  `tf.keras.utils.deserialize_keras_object`, the policy will be deserialized as
  the non-experimental `tf.keras.mixed_precision.Policy`, and the loss scale
  will silently be dropped. This is so that SavedModels that are generated
-  with an expeirmental policy can be restored after the experimental policy is
+  with an experimental policy can be restored after the experimental policy is
  removed.
  """

@ -568,7 +567,7 @@ def _policy_equivalent_to_dtype(policy):
  Returns:
    True, if the policy is equivalent to a single dtype.
  """
-  # We use type() instead of isinstance because a sublcass of Policy is never
+  # We use type() instead of isinstance because a subclass of Policy is never
  # equivalent to a dtype.
  return (type(policy) == Policy and  # pylint: disable=unidiomatic-typecheck
          list(policy.get_config().keys()) == ['name'] and
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@ -594,7 +594,7 @@ def clone_and_build_model(
    optimizer_config=None):
  """Clone a `Model` and build/compile it with the same settings used before.

-  This function can be be run in the same graph or in a separate graph from the
+  This function can be run in the same graph or in a separate graph from the
  model. When using a separate graph, `in_place_reset` must be `False`.

  Note that, currently, the clone produced from this function may not work with
@ -659,7 +659,7 @@ def clone_and_build_model(
                  model._build_input_shape, dtype=model.inputs[0].dtype))
    else:
      try:
-        # Prefer clonining the model if serial/deserial logic is implemented for
+        # Prefer cloning the model if serial/deserial logic is implemented for
        # subclassed model.
        clone = model.__class__.from_config(model.get_config())
      except NotImplementedError:
--- a/tensorflow/python/keras/tests/memory_checker_test.py
+++ b/tensorflow/python/keras/tests/memory_checker_test.py
@ -28,7 +28,7 @@ from tensorflow.python.platform import test
 class MemoryCheckerTest(test.TestCase):

  def testKerasBasic(self):
-    # TODO(kkb): Fix the the slowness on Forge.
+    # TODO(kkb): Fix the slowness on Forge.
    self.skipTest('This test is too slow on Forge so disabled for now.')

    x = array_ops.zeros([1, 1])
@ -47,7 +47,7 @@ class MemoryCheckerTest(test.TestCase):
    memory_checker.assert_no_leak_if_all_possibly_except_one()

  def testKerasAdvanced(self):
-    # TODO(kkb): Fix the the slowness on Forge.
+    # TODO(kkb): Fix the slowness on Forge.
    self.skipTest('This test is too slow on Forge so disabled for now.')

    # A real world example taken from the following.
--- a/tensorflow/python/keras/tests/model_architectures.py
+++ b/tensorflow/python/keras/tests/model_architectures.py
@ -66,7 +66,7 @@ def lstm():


 def multi_input_multi_output():
-  """Multi-input Multi-ouput model."""
+  """Multi-input Multi-output model."""
  body_input = keras.Input(shape=(None,), name='body')
  tags_input = keras.Input(shape=(2,), name='tags')

@ -290,7 +290,7 @@ ALL_MODELS = [


 def get_models(exclude_models=None):
-  """Get all models excluding the specificed ones."""
+  """Get all models excluding the specified ones."""
  models = [model for model in ALL_MODELS
            if model[0] not in exclude_models]
  return models
--- a/tensorflow/python/kernel_tests/ctc_loss_op_test.py
+++ b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
@ -286,7 +286,7 @@ class CTCLossTest(test.TestCase):
    with self.session(use_gpu=False):
      loss = _ctc_loss_v2(
          inputs=inputs_t, labels=labels, sequence_length=seq_lens)
-      # Taking ths second gradient should fail, since it is not
+      # Taking this second gradient should fail, since it is not
      # yet supported.
      with self.assertRaisesRegex(LookupError, "explicitly disabled"):
        _ = gradients_impl._hessian_vector_product(loss, [inputs_t], v)
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@ -702,7 +702,7 @@ def shape_n(input, out_type=dtypes.int32, name=None):
 def size_v2(input, out_type=dtypes.int32, name=None):
  # pylint: disable=redefined-builtin
  """Returns the size of a tensor.
-  
+
  See also `tf.shape`.

  Returns a 0-D `Tensor` representing the number of elements in `input`
@ -1758,9 +1758,9 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
            shape(tensor)[axis + ndims_mask:]
        ], 0))
    # TODO(yongtang): tf.reshape in C++ kernel might have set the shape
-    # correctly, so the following may not be needed? It still might ben
-    # possible that there are some edge case where tensor_util.constant_value
-    # resolves more case than ShapeInference of tf.reshape in C++ kernel.
+    # correctly, so the following may not be needed? It still might be possible
+    # that there are some edge case where tensor_util.constant_value resolves
+    # more cases than ShapeInference of tf.reshape in C++ kernel.
    if axis_value is not None:
      first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
      tensor.set_shape(
@ -2108,7 +2108,7 @@ def transpose_v2(a, perm=None, conjugate=False, name="transpose"):
  As above, simply calling `tf.transpose` will default to `perm=[2,1,0]`.

  To take the transpose of the matrices in dimension-0 (such as when you are
-  transposing matrices where 0 is the batch dimesnion), you would set
+  transposing matrices where 0 is the batch dimension), you would set
  `perm=[0,2,1]`.

  >>> tf.transpose(x, perm=[0, 2, 1])
@ -3650,7 +3650,7 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
  array([[inf, 1. ],
         [0.5, 1. ]], dtype=float32)>

-  The operaton returns a dense Tensor of shape `[2, 2]` with
+  The operation returns a dense Tensor of shape `[2, 2]` with
  edit distances normalized by `truth` lengths.

  **Note**: It is possible to calculate edit distance between two
@ -3685,7 +3685,7 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
  normalize = True

  # The output would be a dense Tensor of shape `(2,)`, with edit distances
-  noramlized by 'truth' lengths.
+  normalized by 'truth' lengths.
  # output => array([0., 0.5], dtype=float32)
  ```

@ -5314,8 +5314,8 @@ def tensor_scatter_nd_update(tensor, indices, updates, name=None):
  tf.Tensor([ 0 9  0 10  11  0  0 12], shape=(8,), dtype=int32)

  The length (first axis) of `updates` must equal the length of the `indices`:
-  `num_updates`. This is the the number of updates being inserted. Each
-  scalar update is inserted into `tensor` at the indexed location.
+  `num_updates`. This is the number of updates being inserted. Each scalar
+  update is inserted into `tensor` at the indexed location.

  For a higher rank input `tensor` scalar updates can be inserted by using an
  `index_depth` that matches `tf.rank(tensor)`:
@ -5339,7 +5339,7 @@ def tensor_scatter_nd_update(tensor, indices, updates, name=None):
  `outer_shape` and the `inner_shape`.

  `indices` indexes into the outer level of the input tensor (`outer_shape`).
-  and replaces the sub-array at that location with the coresponding item from
+  and replaces the sub-array at that location with the corresponding item from
  the `updates` list. The shape of each update is `inner_shape`.

  When updating a list of slices the shape constraints are:
@ -5372,7 +5372,7 @@ def tensor_scatter_nd_update(tensor, indices, updates, name=None):
  >>> updates = tf.constant([[1, 2, 3],
  ...                        [4, 5, 6]])

-  Alltogether this gives:
+  Altogether this gives:

  >>> tf.tensor_scatter_nd_update(tensor, indices, updates).numpy()
  array([[0, 0, 0],
@ -5397,7 +5397,7 @@ def tensor_scatter_nd_update(tensor, indices, updates, name=None):
    * Provide updates each with a shape matching the `inner_shape`:
      `[time, width, height, channels]`.

-  To relace the first two clips with ones:
+  To replace the first two clips with ones:

  >>> indices = [[0],[1]]
  >>> new_clips = tf.ones([2, time, width, height, channels])
@ -5420,7 +5420,7 @@ def tensor_scatter_nd_update(tensor, indices, updates, name=None):

  ### Folded indices

-  In simple cases it's convienient to think of `indices` and `updates` as
+  In simple cases it's convenient to think of `indices` and `updates` as
  lists, but this is not a strict requirement. Instead of a flat `num_updates`,
  the `indices` and `updates` can be folded into a `batch_shape`. This
  `batch_shape` is all axes of the `indices`, except for the innermost
--- a/tensorflow/python/ops/cond_v2.py
+++ b/tensorflow/python/ops/cond_v2.py
@ -125,7 +125,7 @@ def _IfGrad(op, *grads):  # pylint: disable=invalid-name
  false_grad_graph = _create_grad_func(
      false_graph, grads, util.unique_grad_fn_name(false_graph.name))

-  # Replaces output None grads with zeros if atleast one branch has non-None
+  # Replaces output None grads with zeros if at least one branch has non-None
  # grad at that index.
  _create_zeros_for_none_grads([true_graph, false_graph],
                               [true_grad_graph, false_grad_graph])
@ -206,7 +206,7 @@ def _build_cond(pred,
  computation.

  true_graph and false_graph need not have the same input types, but they must
-  have the same outpute types.
+  have the same output types.

  Args:
    pred: boolean Tensor
@ -552,7 +552,7 @@ def _make_inputs_match(branch_graphs, branch_inputs):


 def _create_zeros_for_none_grads(forward_graphs, grad_graphs):
-  """Creates zeros for None out grads if atleast one branch has non-None grad.
+  """Creates zeros for None out grads if at least one branch has non-None grad.

  Args:
    forward_graphs: List of forward FuncGraphs.
@ -932,7 +932,7 @@ class _CondGradFuncGraph(util.CondBranchFuncGraph):
    # If it is not a resource, we wrap it in an optional in the forward graph
    # and capture the optional normally. We then unwrap the captured optional
    # value in the gradient graph to get the raw intermediate value.
-    # If it is a resource, we trace the resource upto the input in the forward
+    # If it is a resource, we trace the resource up to the input in the forward
    # graph and capture that.

    if tensor.dtype == dtypes.resource:
@ -1034,7 +1034,7 @@ def _CaseGrad(op, *grads):  # pylint: disable=invalid-name
    branch_grad_graphs.append(
        _create_grad_func(branch_graph, grads,
                          util.unique_grad_fn_name(branch_graph.name)))
-  # Replaces output None grads with zeros if atleast one branch has non-None
+  # Replaces output None grads with zeros if at least one branch has non-None
  # grad at that index.
  _create_zeros_for_none_grads(branch_graphs, branch_grad_graphs)

@ -1120,7 +1120,7 @@ def _build_case(branch_index,
  computation.

  `branch_graphs` need not have the same input types, but they must
-  have the same outpute types.
+  have the same output types.

  Args:
    branch_index: integer Tensor
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@ -2881,7 +2881,7 @@ def group(*inputs, **kwargs):

  When operating in a v1-style graph context, ops are not executed in the same
  order as specified in the code; TensorFlow will attempt to execute ops in
-  parallel or in an order convienient to the result it is computing.  `tf.group`
+  parallel or in an order convenient to the result it is computing.  `tf.group`
  allows you to request that one or more results finish before execution
  continues.

--- a/tensorflow/python/ops/distributions/bijector_impl.py
+++ b/tensorflow/python/ops/distributions/bijector_impl.py
@ -1071,7 +1071,7 @@ class Bijector(object):
      return math_ops.range(-reduce_ndims, 0)

  def _check_valid_event_ndims(self, min_event_ndims, event_ndims):
-    """Check whether event_ndims is atleast min_event_ndims."""
+    """Check whether event_ndims is at least min_event_ndims."""
    event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims")
    event_ndims_ = tensor_util.constant_value(event_ndims)
    assertions = []
--- a/tensorflow/python/ops/gradients_util.py
+++ b/tensorflow/python/ops/gradients_util.py
@ -416,7 +416,7 @@ def _NonEagerInputs(op, xs_set):
  """Returns the inputs of op, crossing closure boundaries where necessary.

  Does not return any captured EagerTensors, i.e., the number of tensors
-  returned may be less than than the actual number of inputs.
+  returned may be less than the actual number of inputs.

  Args:
    op: Operation
@ -911,7 +911,7 @@ class AggregationMethod(object):
  be supported in future releases:

  * `EXPERIMENTAL_TREE`: Gradient terms are summed in pairs using
-    using the "AddN" op. This method of summing gradients may reduce
+    the "AddN" op. This method of summing gradients may reduce
    performance, but it can improve memory utilization because the
    gradients can be released earlier.

--- a/tensorflow/python/ops/losses/util.py
+++ b/tensorflow/python/ops/losses/util.py
@ -146,7 +146,7 @@ def scale_losses_by_sample_weight(losses, sample_weight):

@tf_contextlib.contextmanager
 def check_per_example_loss_rank(per_example_loss):
-  """Context manager that checks that the rank of per_example_loss is atleast 1.
+  """Context manager that checks that the rank of per_example_loss is at least 1.

  Args:
    per_example_loss: Per example loss tensor.
@ -183,7 +183,7 @@ def add_loss(loss, loss_collection=ops.GraphKeys.LOSSES):
    loss_collection: Optional collection to add the loss to.
  """
  # Since we have no way of figuring out when a training iteration starts or
-  # ends, holding on to a loss when executing eagerly is indistingishable from
+  # ends, holding on to a loss when executing eagerly is indistinguishable from
  # leaking memory. We instead leave the collection empty.
  if loss_collection and not context.executing_eagerly():
    ops.add_to_collection(loss_collection, loss)
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@ -487,7 +487,7 @@ def multiply(x, y, name=None):
  >>> tf.math.multiply(7,6)
  <tf.Tensor: shape=(), dtype=int32, numpy=42>

-  If `x.shape` is not thes same as `y.shape`, they will be broadcast to a
+  If `x.shape` is not the same as `y.shape`, they will be broadcast to a
  compatible shape. (More about broadcasting
  [here](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).)

@ -513,7 +513,7 @@ def multiply(x, y, name=None):

  Raises:

-   * InvalidArgumentError: When `x` and `y` have incomptatible shapes or types.
+   * InvalidArgumentError: When `x` and `y` have incompatible shapes or types.
  """

  return gen_math_ops.mul(x, y, name)
@ -1868,9 +1868,9 @@ def range(start, limit=None, delta=1, dtype=None, name="range"):  # pylint: disa
                           key=dtype_hierarchy.index)
    else:
      inferred_dtype = dtype
-    # Always try perform a cast even start/limit/delta are already tensors.
-    # This will revole the case where start/limit/delta's original's dtype
-    # is different from provided dtype.
+    # Always try to perform a cast even when start/limit/delta are already
+    # tensors. This will resolve the case where start/limit/delta's original's
+    # dtype is different from provided dtype.
    start = cast(start, inferred_dtype)
    limit = cast(limit, inferred_dtype)
    delta = cast(delta, inferred_dtype)
@ -4890,7 +4890,7 @@ def sqrt(x, name=None):  # pylint: disable=redefined-builtin
    array([[0.0+1.j],
           [4.0+0.j]])>

-  Note: In order to support complex complex, please provide an input tensor
+  Note: In order to support complex type, please provide an input tensor
  of `complex64` or `complex128`.

  Args:
@ -5044,7 +5044,7 @@ def floor(x, name=None):
  """Returns element-wise largest integer not greater than x.

  Both input range is `(-inf, inf)` and the
-  ouput range consists of all integer values.
+  output range consists of all integer values.

  For example:

--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@ -579,7 +579,7 @@ def _Conv2DGrad(op, grad):

  # We call the gen_nn_ops backprop functions instead of nn_ops backprop
  # functions for performance reasons in Eager mode. gen_nn_ops functions take a
-  # `explicit_paddings` parameter, but nn_ops functions do not. So if were were
+  # `explicit_paddings` parameter, but nn_ops functions do not. So if we were
  # to use the nn_ops functions, we would have to convert `padding` and
  # `explicit_paddings` into a single `padding` parameter, increasing overhead
  # in Eager mode.
--- a/tensorflow/python/ops/numpy_ops/g3doc/TensorFlow_NumPy_Text_Generation.ipynb
+++ b/tensorflow/python/ops/numpy_ops/g3doc/TensorFlow_NumPy_Text_Generation.ipynb
@ -70,7 +70,7 @@
   "source": [
    "This tutorial demonstrates how to generate text using a character-based RNN. We will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n",
    "\n",
-    "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware acclerator > GPU*. If running locally make sure TensorFlow version >= 2.4.\n",
+    "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware accelerator > GPU*. If running locally make sure TensorFlow version >= 2.4.\n",
    "\n",
    "This tutorial includes runnable code implemented using [tf.experimental.numpy](https://www.tensorflow.org/api_docs/python/tf/experimental/numpy). The following is sample output when the model in this tutorial trained for 30 epochs, and started with the string \"Q\":\n",
    "\n",
@ -360,7 +360,7 @@
    "id": "_33OHL3b84i0"
   },
   "source": [
-    "Each index of these vectors are processed as one time step. For the input at time step 0, the model receives the index for \"F\" and trys to predict the index for \"i\" as the next character. At the next timestep, it does the same thing but the `RNN` considers the previous step context in addition to the current input character."
+    "Each index of these vectors are processed as one time step. For the input at time step 0, the model receives the index for \"F\" and tries to predict the index for \"i\" as the next character. At the next timestep, it does the same thing but the `RNN` considers the previous step context in addition to the current input character."
   ]
  },
  {
--- a/tensorflow/python/ops/op_selector_test.py
+++ b/tensorflow/python/ops/op_selector_test.py
@ -99,7 +99,7 @@ class SelectTest(test.TestCase):
      a0 = constant_op.constant(1)
      b0 = constant_op.constant(2)
      c0 = math_ops.add(a0, b0)  # pylint: disable=unused-variable
-    # Should extract the tensors from tre graph.
+    # Should extract the tensors from the graph.
    self.assertEqual(len(op_selector.make_list_of_t(g0)), 3)
    # Should extract the tensors from the tuple
    self.assertEqual(len(op_selector.make_list_of_t((a0, b0))), 2)
--- a/tensorflow/python/ops/parallel_for/gradients_test.py
+++ b/tensorflow/python/ops/parallel_for/gradients_test.py
@ -529,7 +529,7 @@ class GradientsTest(test.TestCase):
    os.environ["TF_ENABLE_WINOGRAD_NONFUSED"] = "0"
    data_format = ("channels_first"
                   if test.is_gpu_available() else "channels_last")
-    # Note that we we are setting training=False here so that dropout produces
+    # Note that we are setting training=False here so that dropout produces
    # the same result with pfor and with while_loop.
    pfor_outputs, while_outputs = create_mnist_per_eg_grad(
        4, data_format, training=False)
@ -543,7 +543,7 @@ class GradientsTest(test.TestCase):
    os.environ["TF_ENABLE_WINOGRAD_NONFUSED"] = "0"
    data_format = ("channels_first"
                   if test.is_gpu_available() else "channels_last")
-    # Note that we we are setting training=False here so that dropout produces
+    # Note that we are setting training=False here so that dropout produces
    # the same result with pfor and with while_loop.
    pfor_outputs, while_outputs = create_mnist_per_eg_jacobian(
        2, data_format, training=False)
--- a/tensorflow/python/ops/parallel_for/pfor.py
+++ b/tensorflow/python/ops/parallel_for/pfor.py
@ -424,7 +424,7 @@ class WhileOp(object):
    return inp, stacked

  def _maybe_stacked(self, cache, inp):
-    """Heuristic to figue out if the converting inp leads to a stacked value.
+    """Heuristic to figure out if the converting inp leads to a stacked value.


    Args:
@ -1128,7 +1128,7 @@ class PForConfig(object):
    concrete_function = def_function.function(fn).get_concrete_function(
        *tensor_specs)

-    # Creates PlaceholderWithDefault and IdentityN nodes corresponding the the
+    # Creates PlaceholderWithDefault and IdentityN nodes corresponding the
    # reduction.
    pl_outputs = []
    with ops.control_dependencies(args):
--- a/tensorflow/python/ops/ragged/ragged_getitem.py
+++ b/tensorflow/python/ops/ragged/ragged_getitem.py
@ -347,7 +347,7 @@ def _slice_length(value_length, slice_key):

  Args:
    value_length: Scalar int `Tensor`: the length of the value being sliced.
-    slice_key: A `slice` object used to slice elements from the the value.
+    slice_key: A `slice` object used to slice elements from the value.

  Returns:
    The number of elements in the sliced value.
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@ -880,10 +880,10 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph):
     c. Pop a value from the captured placeholder and use it as the captured
        value for the forward pass tensor.

-  Tensors not in the forward graph are captured directly and become loop
-  invariants in the gradient graph, by adding the captured placeholder to the
-  list of outputs. This path is used, for instance, when custom_gradient
-  functions refer to tensors outside the loop body.
+  This only allows capturing tensors in the forward graph. A ValueError is
+  raised if an attempt is made to capture a tensor not in the forward graph.
+  To manually capture a tensor that is not in the forward graph, call `capture`
+  with `allowlisted=True`.

  Note: The `captures` dict does not contain the forward tensor since it is not
  directly captured. It contains the accumulator corresponding to this forward
@ -940,8 +940,8 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph):
      attrs=None,
      op_def=None,
      compute_device=True):
-    # For a reduction op, if op is in in the gradient body graph and its input
-    # is from the forward graph, moving op to the forward graph means we would
+    # For a reduction op, if op is in the gradient body graph and its input is
+    # from the forward graph, moving op to the forward graph means we would
    # store the tensor after the reduction as opposed to the tensor before
    # reduction, and therefore could significantly reduce memory consumption.
    # For now, we do this only for a few ops.
--- a/tensorflow/python/tools/api/generator/create_python_api.py
+++ b/tensorflow/python/tools/api/generator/create_python_api.py
@ -699,7 +699,7 @@ def main():
      metavar='O',
      type=str,
      nargs='+',
-      help='If a single file is passed in, then we we assume it contains a '
+      help='If a single file is passed in, then we assume it contains a '
      'semicolon-separated list of Python files that we expect this script to '
      'output. If multiple files are passed in, then we assume output files '
      'are listed directly as arguments.')
--- a/tensorflow/python/tpu/tpu_embedding.py
+++ b/tensorflow/python/tpu/tpu_embedding.py
@ -96,10 +96,10 @@ class TableConfig(
        `optimization_parameters` in `TPUEmbedding` constructor will be used.
        `learning_rate_fn` must be `None` if `learning_rate` is not `None.
      learning_rate_fn: string, use dynamic learning rate given by the function.
-        This function function will be passed the current global step. If
-        learning_rate and learning_rate_fn are both `None`, static learning rate
-        as specified in `optimization_parameters` is used. `learning_rate` must
-        be `None` if `learning_rate_fn` is not `None.
+        This function will be passed the current global step. If learning_rate
+        and learning_rate_fn are both `None`, static learning rate as specified
+        in `optimization_parameters` is used. `learning_rate` must be `None` if
+        `learning_rate_fn` is not `None.
      optimization_parameters: `AdagradParameters`, `AdamParameters`,
        `Stochasticgradientdescentparameters`. Specifies table level optimizer.
        If it's `None` global optimizer in `TPUEmbedding` constructor is used.
--- a/tensorflow/python/tpu/tpu_embedding_v2.py
+++ b/tensorflow/python/tpu/tpu_embedding_v2.py
@ -987,8 +987,8 @@ class TPUEmbedding(tracking.AutoTrackable):

    # In the following loop we insert casts so that everything is either int32
    # or float32. This is because op inputs which are lists of tensors must be
-    # of the same type within the list. Moreover the CPU implementions of these
-    # ops cast to these types anyway, so we don't lose any data by casting
+    # of the same type within the list. Moreover the CPU implementations of
+    # these ops cast to these types anyway, so we don't lose any data by casting
    # early.
    for inp, weight, (path, feature) in zip(
        flat_inputs, flat_weights, flat_features):
@ -1467,8 +1467,8 @@ def cpu_embedding_lookup(inputs, weights, tables, feature_config):
  Note that TPU specific options (such as `max_sequence_length`) in the
  configuration objects will be ignored.

-  In the following example we take take a trained model (see the documentation
-  for `tf.tpu.experimental.embedding.TPUEmbedding` for the context) and create a
+  In the following example we take a trained model (see the documentation for
+  `tf.tpu.experimental.embedding.TPUEmbedding` for the context) and create a
  saved model with a serving function that will perform the embedding lookup and
  pass the results to your model:

--- a/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py
+++ b/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py
@ -516,7 +516,7 @@ class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase):
    # In general this means that after the update, if we lookup feature 0 and 1
    # the values will be 0.3*num_replicas lower per entry and for feature 2 they
    # will be 0.1*num_replicas lower.
-    # The one issue that that these lookups contain padding values.
+    # The one issue is that these lookups contain padding values.
    # For core 0, we get the first 2 elements of the 4 element batch.
    # For feature 0, the indices are [[0, 0], [1, 0], [1, 1]] with max sequence
    # length of 2, which means that [0, 1] will be 0s.
--- a/tensorflow/python/tpu/tpu_embedding_v2_test.py
+++ b/tensorflow/python/tpu/tpu_embedding_v2_test.py
@ -152,7 +152,7 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase):
    second_checkpoint = util.Checkpoint(model=self.second_mid_level)
    second_checkpoint.restore(_get_tmpdir('restore', 'save-1'))

-    # Call retrieve here as a way to check what the TPU contains contains.
+    # Call retrieve here as a way to check what the TPU contains.
    # Calling the retrieve ops directly might make for a cleaner separation of
    # test and module, though.
    self.second_mid_level._retrieve_variables()
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@ -1323,7 +1323,7 @@ class _CoordinatedSession(_WrappedSession):
  raises an exception, the exception is reported to the coordinator.

  In addition, after each call to `run()` this session ask the coordinator if
-  the session should stop.  In that case it will will join all the threads
+  the session should stop.  In that case it will join all the threads
  registered with the coordinator before returning.

  If the coordinator was requested to stop with an exception, that exception
--- a/tensorflow/python/training/tracking/graph_view.py
+++ b/tensorflow/python/training/tracking/graph_view.py
@ -185,7 +185,7 @@ class ObjectGraphView(object):
  def attached_dependencies(self):
    """Returns list of dependencies that should be saved in the checkpoint.

-    These dependencies are not tracked by root, but are in the the checkpoint.
+    These dependencies are not tracked by root, but are in the checkpoint.
    This is defined when the user creates a Checkpoint with both root and kwargs
    set.