Apply tf1-tf2 renames to tensorflow/python/ops docstrings and comments.

No code changes, only doc-strings and comments. PiperOrigin-RevId: 244270926
2019-04-18 15:28:04 -07:00 · 2019-04-18 15:28:04 -07:00 · fd28b784b2
commit fd28b784b2
parent 96072813ec
61 changed files with 2531 additions and 2411 deletions
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@ -130,9 +130,8 @@ def expand_dims(input, axis=None, name=None, dim=None):

  Args:
    input: A `Tensor`.
-    axis: 0-D (scalar). Specifies the dimension index at which to
-      expand the shape of `input`. Must be in the range
-      `[-rank(input) - 1, rank(input)]`.
+    axis: 0-D (scalar). Specifies the dimension index at which to expand the
+      shape of `input`. Must be in the range `[-rank(input) - 1, rank(input)]`.
    name: The name of the output `Tensor` (optional).
    dim: 0-D (scalar). Equivalent to `axis`, to be deprecated.

@ -187,9 +186,8 @@ def expand_dims_v2(input, axis, name=None):

  Args:
    input: A `Tensor`.
-    axis: 0-D (scalar). Specifies the dimension index at which to
-      expand the shape of `input`. Must be in the range
-      `[-rank(input) - 1, rank(input)]`.
+    axis: 0-D (scalar). Specifies the dimension index at which to expand the
+      shape of `input`. Must be in the range `[-rank(input) - 1, rank(input)]`.
    name: The name of the output `Tensor` (optional).

  Returns:
@ -204,10 +202,9 @@ def expand_dims_v2(input, axis, name=None):

 # Aliases for some automatically-generated names.
 # pylint: disable=protected-access
-@deprecation.deprecated(
-    "2016-11-30",
-    "This op will be removed after the deprecation date. "
-    "Please switch to tf.setdiff1d().")
+@deprecation.deprecated("2016-11-30",
+                        "This op will be removed after the deprecation date. "
+                        "Please switch to tf.setdiff1d().")
 def listdiff(x, y, out_idx=None, name=None):
  return gen_array_ops.list_diff(x, y, out_idx, name)

@ -218,10 +215,9 @@ listdiff.__doc__ = gen_array_ops.list_diff.__doc__ + "\n" + listdiff.__doc__


 # pylint: disable=undefined-variable
-@deprecation.deprecated(
-    "2018-11-30",
-    "This op will be removed after the deprecation date. "
-    "Please switch to tf.sets.difference().")
+@deprecation.deprecated("2018-11-30",
+                        "This op will be removed after the deprecation date. "
+                        "Please switch to tf.sets.difference().")
@tf_export(v1=["setdiff1d"])
 def setdiff1d(x, y, index_dtype=dtypes.int32, name=None):
  return gen_array_ops.list_diff(x, y, index_dtype, name)
@ -325,8 +321,8 @@ def shape(input, name=None, out_type=dtypes.int32):
  Args:
    input: A `Tensor` or `SparseTensor`.
    name: A name for the operation (optional).
-    out_type: (Optional) The specified output type of the operation
-      (`int32` or `int64`). Defaults to `tf.int32`.
+    out_type: (Optional) The specified output type of the operation (`int32` or
+      `int64`). Defaults to `tf.int32`.

  Returns:
    A `Tensor` of type `out_type`.
@ -342,16 +338,16 @@ def shape_internal(input, name=None, optimize=True, out_type=dtypes.int32):
    input: A `Tensor` or `SparseTensor`.
    name: A name for the operation (optional).
    optimize: if true, encode the shape as a constant when possible.
-    out_type: (Optional) The specified output type of the operation
-      (`int32` or `int64`). Defaults to tf.int32.
+    out_type: (Optional) The specified output type of the operation (`int32` or
+      `int64`). Defaults to tf.int32.

  Returns:
    A `Tensor` of type `out_type`.

  """
  with ops.name_scope(name, "Shape", [input]) as name:
-    if isinstance(input, (sparse_tensor.SparseTensor,
-                          sparse_tensor.SparseTensorValue)):
+    if isinstance(
+        input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
      return gen_math_ops.cast(input.dense_shape, out_type)
    else:
      if not context.executing_eagerly():
@ -369,8 +365,8 @@ def shape_n(input, out_type=dtypes.int32, name=None):

  Args:
    input: A list of at least 1 `Tensor` object with the same type.
-    out_type: The specified output type of the operation
-      (`int32` or `int64`). Defaults to `tf.int32`(optional).
+    out_type: The specified output type of the operation (`int32` or `int64`).
+      Defaults to `tf.int32`(optional).
    name: A name for the operation (optional).

  Returns:
@ -407,8 +403,8 @@ def size(input, name=None, out_type=dtypes.int32):
  Args:
    input: A `Tensor` or `SparseTensor`.
    name: A name for the operation (optional).
-    out_type: (Optional) The specified non-quantized numeric output type
-      of the operation. Defaults to `tf.int32`.
+    out_type: (Optional) The specified non-quantized numeric output type of the
+      operation. Defaults to `tf.int32`.

  Returns:
    A `Tensor` of type `out_type`. Defaults to `tf.int32`.
@ -428,8 +424,8 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32):
    input: A `Tensor` or `SparseTensor`.
    name: A name for the operation (optional).
    optimize: if true, encode the size as a constant when possible.
-    out_type: (Optional) The specified non-quantized numeric output type
-      of the operation. Defaults to `tf.int32`.
+    out_type: (Optional) The specified non-quantized numeric output type of the
+      operation. Defaults to `tf.int32`.

  Returns:
    A `Tensor` of type `out_type`. Defaults to `tf.int32`.
@ -441,8 +437,8 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32):
    num_elements = np.prod(input._shape_tuple(), dtype=np_out_type)  # pylint: disable=protected-access
    return ops.convert_to_tensor(num_elements, dtype=out_type)
  with ops.name_scope(name, "Size", [input]) as name:
-    if isinstance(input, (sparse_tensor.SparseTensor,
-                          sparse_tensor.SparseTensorValue)):
+    if isinstance(
+        input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
      return gen_math_ops.prod(
          gen_math_ops.cast(input.dense_shape, out_type), 0, name=name)
    else:
@ -503,8 +499,8 @@ def rank_internal(input, name=None, optimize=True):
    A `Tensor` of type `int32`.
  """
  with ops.name_scope(name, "Rank", [input]) as name:
-    if isinstance(input, (sparse_tensor.SparseTensor,
-                          sparse_tensor.SparseTensorValue)):
+    if isinstance(
+        input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
      return gen_array_ops.size(input.dense_shape, name=name)
    else:
      input_tensor = ops.convert_to_tensor(input)
@ -519,12 +515,8 @@ _SLICE_TYPE_ERROR = (
    "tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid "
    "indices")

-_SUPPORTED_SLICE_DTYPES = (
-    dtypes.int32,
-    dtypes.int32_ref,
-    dtypes.int64,
-    dtypes.int64_ref
-)
+_SUPPORTED_SLICE_DTYPES = (dtypes.int32, dtypes.int32_ref, dtypes.int64,
+                           dtypes.int64_ref)


 def _check_index(idx):
@ -536,8 +528,7 @@ def _check_index(idx):
  # * any object with a dtype is supported
  # * any object with a dtype has a sizeable shape attribute.
  dtype = getattr(idx, "dtype", None)
-  if (dtype is None or
-      dtypes.as_dtype(dtype) not in _SUPPORTED_SLICE_DTYPES or
+  if (dtype is None or dtypes.as_dtype(dtype) not in _SUPPORTED_SLICE_DTYPES or
      idx.shape and len(idx.shape) == 1):
    # TODO(slebedev): IndexError seems more appropriate here, but it
    # will break `_slice_helper` contract.
@ -592,9 +583,8 @@ def _slice_helper(tensor, slice_spec, var=None):
  Args:
    tensor: An ops.Tensor object.
    slice_spec: The arguments to Tensor.__getitem__.
-    var: In the case of variable slice assignment, the Variable
-      object to slice (i.e. tensor is the read-only view of this
-      variable).
+    var: In the case of variable slice assignment, the Variable object to slice
+      (i.e. tensor is the read-only view of this variable).

  Returns:
    The appropriate slice of "tensor", based on "slice_spec".
@ -858,6 +848,7 @@ def strided_slice(input_,
  parent_name = name

  if not (var is None and isinstance(op, ops.EagerTensor)):
+
    def assign(val, name=None):
      """Closure that holds all the arguments to create an assignment."""

@ -913,8 +904,8 @@ def _SliceHelperVar(var, slice_spec):
  ```python
  import tensorflow as tf
  A = tf.Variable([[1,2,3], [4,5,6], [7,8,9]], dtype=tf.float32)
-  with tf.Session() as sess:
-    sess.run(tf.global_variables_initializer())
+  with tf.compat.v1.Session() as sess:
+    sess.run(tf.compat.v1.global_variables_initializer())
    print(sess.run(A[:2, :2]))  # => [[1,2], [4,5]]

    op = A[:2,:2].assign(22. * tf.ones((2, 2)))
@ -1049,8 +1040,8 @@ def stack(values, axis=0, name="stack"):
  if value_shape is not None:
    expanded_num_dims = len(value_shape) + 1
    if axis < -expanded_num_dims or axis >= expanded_num_dims:
-      raise ValueError("axis = %d not in [%d, %d)" % (axis, -expanded_num_dims,
-                                                      expanded_num_dims))
+      raise ValueError("axis = %d not in [%d, %d)" %
+                       (axis, -expanded_num_dims, expanded_num_dims))

  return gen_array_ops.pack(values, axis=axis, name=name)

@ -1079,8 +1070,8 @@ def _autopacking_helper(list_or_tuple, dtype, name):
      if ops.is_dense_tensor_like(elem):
        if dtype is not None and elem.dtype.base_dtype != dtype:
          raise TypeError("Cannot convert a list containing a tensor of dtype "
-                          "%s to %s (Tensor is: %r)" % (elem.dtype, dtype,
-                                                        elem))
+                          "%s to %s (Tensor is: %r)" %
+                          (elem.dtype, dtype, elem))
        converted_elems.append(elem)
        must_pack = True
      elif isinstance(elem, (list, tuple)):
@ -1110,8 +1101,8 @@ def _get_dtype_from_nested_lists(list_or_tuple):
  """Returns the dtype of any tensor-like object in `list_or_tuple`, if found.

  Args:
-    list_or_tuple: A list or tuple representing an object that can be
-      converted to a `tf.Tensor`.
+    list_or_tuple: A list or tuple representing an object that can be converted
+      to a `tf.Tensor`.

  Returns:
    The dtype of any tensor-like object in `list_or_tuple`, or `None` if no
@ -1128,11 +1119,13 @@ def _get_dtype_from_nested_lists(list_or_tuple):


 def _cast_nested_seqs_to_dtype(dtype):
+
  def _maybe_cast(elem):
    if ops.is_dense_tensor_like(elem):
      if dtype != elem.dtype.base_dtype:
        elem = gen_math_ops.cast(elem, dtype)
    return elem
+
  return _maybe_cast


@ -1182,10 +1175,10 @@ def unstack(value, num=None, axis=0, name="unstack"):

  Args:
    value: A rank `R > 0` `Tensor` to be unstacked.
-    num: An `int`. The length of the dimension `axis`. Automatically inferred
-      if `None` (the default).
-    axis: An `int`. The axis to unstack along. Defaults to the first
-      dimension. Negative values wrap around, so the valid range is `[-R, R)`.
+    num: An `int`. The length of the dimension `axis`. Automatically inferred if
+      `None` (the default).
+    axis: An `int`. The axis to unstack along. Defaults to the first dimension.
+      Negative values wrap around, so the valid range is `[-R, R)`.
    name: A name for the operation (optional).

  Returns:
@ -1280,10 +1273,10 @@ def concat(values, axis, name="concat"):
  Args:
    values: A list of `Tensor` objects or a single `Tensor`.
    axis: 0-D `int32` `Tensor`.  Dimension along which to concatenate. Must be
-      in the range `[-rank(values), rank(values))`. As in Python, indexing
-      for axis is 0-based. Positive axis in the rage of
-      `[0, rank(values))` refers to `axis`-th dimension. And negative axis
-      refers to `axis + rank(values)`-th dimension.
+      in the range `[-rank(values), rank(values))`. As in Python, indexing for
+      axis is 0-based. Positive axis in the rage of `[0, rank(values))` refers
+      to `axis`-th dimension. And negative axis refers to `axis +
+      rank(values)`-th dimension.
    name: A name for the operation (optional).

  Returns:
@ -1308,7 +1301,9 @@ def concat(values, axis, name="concat"):

@tf_export(v1=["boolean_mask"])
 def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
-  """Apply boolean mask to tensor.  Numpy equivalent is `tensor[mask]`.
+  """Apply boolean mask to tensor.
+
+  Numpy equivalent is `tensor[mask]`.

  ```python
  # 1-D example
@ -1329,9 +1324,9 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
    tensor:  N-D tensor.
    mask:  K-D boolean tensor, K <= N and K must be known statically.
    name:  A name for this operation (optional).
-    axis:  A 0-D int Tensor representing the axis in `tensor` to mask from.
-      By default, axis is 0 which will mask from the first dimension. Otherwise
-      K + axis <= N.
+    axis:  A 0-D int Tensor representing the axis in `tensor` to mask from. By
+      default, axis is 0 which will mask from the first dimension. Otherwise K +
+      axis <= N.

  Returns:
    (N-K+1)-dimensional tensor populated by entries in `tensor` corresponding
@ -1372,15 +1367,16 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
    shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)

    leading_size = gen_math_ops.prod(shape(tensor)[axis:axis + ndims_mask], [0])
-    tensor = reshape(tensor,
-                     concat([
-                         shape(tensor)[:axis], [leading_size],
-                         shape(tensor)[axis + ndims_mask:]
-                     ], 0))
+    tensor = reshape(
+        tensor,
+        concat([
+            shape(tensor)[:axis], [leading_size],
+            shape(tensor)[axis + ndims_mask:]
+        ], 0))
    first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
    tensor.set_shape(
-        tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim])
-        .concatenate(shape_tensor[axis + ndims_mask:]))
+        tensor_shape.as_shape(shape_tensor[:axis]).concatenate(
+            [first_dim]).concatenate(shape_tensor[axis + ndims_mask:]))

    mask = reshape(mask, [-1])
    return _apply_mask_1d(tensor, mask, axis)
@ -1532,13 +1528,13 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):

  Args:
    value: The `Tensor` to split.
-    num_or_size_splits: Either an integer indicating the number of
-      splits along split_dim or a 1-D integer `Tensor` or Python list containing
-      the sizes of each output tensor along split_dim. If a scalar then it must
-      evenly divide `value.shape[axis]`; otherwise the sum of sizes along the
-      split dimension must match that of the `value`.
+    num_or_size_splits: Either an integer indicating the number of splits along
+      split_dim or a 1-D integer `Tensor` or Python list containing the sizes of
+      each output tensor along split_dim. If a scalar then it must evenly divide
+      `value.shape[axis]`; otherwise the sum of sizes along the split dimension
+      must match that of the `value`.
    axis: An integer or scalar `int32` `Tensor`. The dimension along which to
-    split. Must be in the range `[-rank(value), rank(value))`. Defaults to 0.
+      split. Must be in the range `[-rank(value), rank(value))`. Defaults to 0.
    num: Optional, used to specify the number of outputs when it cannot be
      inferred from the shape of `size_splits`.
    name: A name for the operation (optional).
@ -1576,7 +1572,9 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):

@tf_export("transpose", v1=[])
 def transpose_v2(a, perm=None, conjugate=False, name="transpose"):
-  """Transposes `a`. Permutes the dimensions according to `perm`.
+  """Transposes `a`.
+
+  Permutes the dimensions according to `perm`.

  The returned tensor's dimension i will correspond to the input dimension
  `perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
@ -1633,7 +1631,7 @@ def transpose_v2(a, perm=None, conjugate=False, name="transpose"):
    a: A `Tensor`.
    perm: A permutation of the dimensions of `a`.
    conjugate: Optional bool. Setting it to `True` is mathematically equivalent
-      to tf.conj(tf.transpose(input)).
+      to tf.math.conj(tf.transpose(input)).
    name: A name for the operation (optional).

  Returns:
@ -1644,7 +1642,9 @@ def transpose_v2(a, perm=None, conjugate=False, name="transpose"):

@tf_export(v1=["transpose"])
 def transpose(a, perm=None, name="transpose", conjugate=False):
-  """Transposes `a`. Permutes the dimensions according to `perm`.
+  """Transposes `a`.
+
+  Permutes the dimensions according to `perm`.

  The returned tensor's dimension i will correspond to the input dimension
  `perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
@ -1702,15 +1702,15 @@ def transpose(a, perm=None, name="transpose", conjugate=False):
    perm: A permutation of the dimensions of `a`.
    name: A name for the operation (optional).
    conjugate: Optional bool. Setting it to `True` is mathematically equivalent
-      to tf.conj(tf.transpose(input)).
+      to tf.math.conj(tf.transpose(input)).

  Returns:
    A transposed `Tensor`.
  """
  with ops.name_scope(name, "transpose", [a]) as name:
    transpose_fn = (
-        gen_array_ops.conjugate_transpose
-        if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
+        gen_array_ops.conjugate_transpose if
+        (conjugate and a.dtype.is_complex) else gen_array_ops.transpose)
    if perm is None:
      a = ops.convert_to_tensor(a, name="a")
      if not a.get_shape().ndims:
@ -1781,7 +1781,7 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
    a: A `Tensor` with `rank >= 2`.
    name: A name for the operation (optional).
    conjugate: Optional bool. Setting it to `True` is mathematically equivalent
-      to tf.conj(tf.linalg.matrix_transpose(input)).
+      to tf.math.conj(tf.linalg.matrix_transpose(input)).

  Returns:
    A transposed batch matrix `Tensor`.
@ -1806,8 +1806,8 @@ def matrix_transpose(a, name="matrix_transpose", conjugate=False):
      perm = list(range(ndims - 2)) + [ndims - 1] + [ndims - 2]
    else:
      a_rank = rank(a)
-      perm = concat((gen_math_ops._range(0, a_rank - 2, 1),
-                     [a_rank - 1, a_rank - 2]), 0)
+      perm = concat(
+          (gen_math_ops._range(0, a_rank - 2, 1), [a_rank - 1, a_rank - 2]), 0)

    return transpose(a, perm=perm, conjugate=conjugate)

@ -1899,8 +1899,8 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True):
      `float64`, `int8`, `uint8`, `int16`, `uint16`, `int32`, `int64`,
      `complex64`, `complex128`, `bool` or `string`.
    name: A name for the operation (optional).
-    optimize: if true, attempt to statically determine the shape of 'tensor'
-    and encode it as a constant.
+    optimize: if true, attempt to statically determine the shape of 'tensor' and
+      encode it as a constant.

  Returns:
    A `Tensor` with all elements set to zero.
@ -1987,11 +1987,11 @@ def ones_like(tensor, dtype=None, name=None, optimize=True):
  Args:
    tensor: A `Tensor`.
    dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
-      `int8`, `uint8`, `int16`, `uint16`, `int32`, `int64`,
-      `complex64`, `complex128` or `bool`.
+      `int8`, `uint8`, `int16`, `uint16`, `int32`, `int64`, `complex64`,
+      `complex128` or `bool`.
    name: A name for the operation (optional).
-    optimize: if true, attempt to statically determine the shape of 'tensor'
-    and encode it as a constant.
+    optimize: if true, attempt to statically determine the shape of 'tensor' and
+      encode it as a constant.

  Returns:
    A `Tensor` with all elements set to 1.
@ -2101,10 +2101,10 @@ def placeholder(dtype, shape=None, name=None):
  For example:

  ```python
-  x = tf.placeholder(tf.float32, shape=(1024, 1024))
+  x = tf.compat.v1.placeholder(tf.float32, shape=(1024, 1024))
  y = tf.matmul(x, x)

-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
    print(sess.run(y))  # ERROR: will fail because x was not fed.

    rand_array = np.random.rand(1024, 1024)
@ -2141,8 +2141,8 @@ def placeholder_with_default(input, shape, name=None):  # pylint: disable=redefi

  Args:
    input: A `Tensor`. The default value to produce when output is not fed.
-    shape: A `tf.TensorShape` or list of `int`s. The (possibly partial) shape
-      of the tensor.
+    shape: A `tf.TensorShape` or list of `int`s. The (possibly partial) shape of
+      the tensor.
    name: A name for the operation (optional).

  Returns:
@ -2174,17 +2174,18 @@ def sparse_placeholder(dtype, shape=None, name=None):
  For example:

  ```python
-  x = tf.sparse.placeholder(tf.float32)
+  x = tf.compat.v1.sparse.placeholder(tf.float32)
  y = tf.sparse.reduce_sum(x)

-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
    print(sess.run(y))  # ERROR: will fail because x was not fed.

    indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64)
    values = np.array([1.0, 2.0], dtype=np.float32)
    shape = np.array([7, 9, 2], dtype=np.int64)
    print(sess.run(y, feed_dict={
-      x: tf.SparseTensorValue(indices, values, shape)}))  # Will succeed.
+      x: tf.compat.v1.SparseTensorValue(indices, values, shape)}))  # Will
+      succeed.
    print(sess.run(y, feed_dict={
      x: (indices, values, shape)}))  # Will succeed.

@ -2222,7 +2223,8 @@ def sparse_placeholder(dtype, shape=None, name=None):
          shape=[None],
          name=(name + "/values") if name is not None else None),
      indices=placeholder(
-          dtypes.int64, shape=[None, rank],
+          dtypes.int64,
+          shape=[None, rank],
          name=(name + "/indices") if name is not None else None),
      dense_shape=shape)

@ -2369,8 +2371,8 @@ def pad(tensor, paddings, mode="CONSTANT", name=None, constant_values=0):  # pyl
    paddings_constant = tensor_util.constant_value(
        result.op.inputs[1], partial=True)
    input_shape = result.op.inputs[0].shape
-    if (input_shape.ndims is not None and not result.shape.is_fully_defined()
-        and paddings_constant is not None):
+    if (input_shape.ndims is not None and
+        not result.shape.is_fully_defined() and paddings_constant is not None):
      new_shape = []
      for padding, dim in zip(paddings_constant, input_shape.as_list()):
        if padding is None or dim is None or any((x is None for x in padding)):
@ -2582,11 +2584,12 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
  Raises:
    TypeError: If either `hypothesis` or `truth` are not a `SparseTensor`.
  """
-  if not isinstance(hypothesis, (sparse_tensor.SparseTensor,
-                                 sparse_tensor.SparseTensorValue)):
+  if not isinstance(
+      hypothesis,
+      (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
    raise TypeError("Hypothesis must be a SparseTensor.")
-  if not isinstance(truth, (sparse_tensor.SparseTensor,
-                            sparse_tensor.SparseTensorValue)):
+  if not isinstance(
+      truth, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
    raise TypeError("Truth must be a SparseTensor.")

  return gen_array_ops.edit_distance(
@ -2710,8 +2713,8 @@ def required_space_to_batch_paddings(input_shape,
    result_paddings = stack(
        [[pad_start[i], pad_end[i]] for i in range(num_block_dims)],
        name="paddings")
-    result_crops = stack(
-        [[0, pad_end_extra[i]] for i in range(num_block_dims)], name="crops")
+    result_crops = stack([[0, pad_end_extra[i]] for i in range(num_block_dims)],
+                         name="crops")
    return result_paddings, result_crops


@ -2719,8 +2722,9 @@ def required_space_to_batch_paddings(input_shape,
@deprecation.deprecated_endpoints("space_to_batch")
 def space_to_batch(  # pylint: disable=missing-docstring
    input, paddings, block_size=None, name=None, block_shape=None):  # pylint: disable=redefined-builtin
-  block_size = deprecation.deprecated_argument_lookup(
-      "block_shape", block_shape, "block_size", block_size)
+  block_size = deprecation.deprecated_argument_lookup("block_shape",
+                                                      block_shape, "block_size",
+                                                      block_size)
  result = space_to_batch_nd(
      input,
      paddings=paddings,
@ -2777,8 +2781,9 @@ depth_to_space_v2.__doc__ = gen_array_ops.depth_to_space.__doc__

@tf_export(v1=["batch_to_space"])
 def batch_to_space(input, crops, block_size, name=None, block_shape=None):  # pylint: disable=redefined-builtin,missing-docstring
-  block_size = deprecation.deprecated_argument_lookup(
-      "block_shape", block_shape, "block_size", block_size)
+  block_size = deprecation.deprecated_argument_lookup("block_shape",
+                                                      block_shape, "block_size",
+                                                      block_size)
  result = batch_to_space_nd(
      input,
      crops=crops,
@ -2803,125 +2808,59 @@ def batch_to_space_v2(input, block_shape, crops, name=None):  # pylint: disable=
  is the reverse of SpaceToBatch.  See below for a precise description.

  Args:
-    input: A `Tensor`.
-      N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-      where spatial_shape has M dimensions.
-    block_shape: A `Tensor`. Must be one of the following types:
-      `int32`, `int64`. 1-D with shape `[M]`, all values must be >= 1.
-      For backwards compatibility with TF 1.0, this parameter may be an int, in
-      which case it is converted to
-      `numpy.array([block_shape, block_shape], dtype=numpy.int64)`.
-    crops: A `Tensor`. Must be one of the following types: `int32`, `int64`.
-      2-D with shape `[M, 2]`, all values must be >= 0.
-        `crops[i] = [crop_start, crop_end]` specifies the amount to crop from
-        input dimension `i + 1`, which corresponds to spatial dimension `i`.  It
-        is required that
-        `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
-
+    input: A `Tensor`. N-D with shape `input_shape = [batch] + spatial_shape +
+      remaining_shape`, where spatial_shape has M dimensions.
+    block_shape: A `Tensor`. Must be one of the following types: `int32`,
+      `int64`. 1-D with shape `[M]`, all values must be >= 1. For backwards
+      compatibility with TF 1.0, this parameter may be an int, in which case it
+      is converted to `numpy.array([block_shape, block_shape],
+      dtype=numpy.int64)`.
+    crops: A `Tensor`. Must be one of the following types: `int32`, `int64`. 2-D
+      with shape `[M, 2]`, all values must be >= 0. `crops[i] = [crop_start,
+      crop_end]` specifies the amount to crop from input dimension `i + 1`,
+      which corresponds to spatial dimension `i`.  It is required that
+      `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`.
      This operation is equivalent to the following steps:
-
-      1. Reshape `input` to `reshaped` of shape:
-           [block_shape[0], ..., block_shape[M-1],
-            batch / prod(block_shape),
-            input_shape[1], ..., input_shape[N-1]]
-
-      2. Permute dimensions of `reshaped` to produce `permuted` of shape
-           [batch / prod(block_shape),
-
-            input_shape[1], block_shape[0],
-            ...,
-            input_shape[M], block_shape[M-1],
-
-            input_shape[M+1], ..., input_shape[N-1]]
-
-      3. Reshape `permuted` to produce `reshaped_permuted` of shape
-           [batch / prod(block_shape),
-
-            input_shape[1] * block_shape[0],
-            ...,
-            input_shape[M] * block_shape[M-1],
-
-            input_shape[M+1],
-            ...,
-            input_shape[N-1]]
-
-      4. Crop the start and end of dimensions `[1, ..., M]` of
-         `reshaped_permuted` according to `crops` to produce the
-         output of shape:
-           [batch / prod(block_shape),
-
-            input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1],
-            ...,
-            input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1],
-
-            input_shape[M+1], ..., input_shape[N-1]]
-
-      Some examples:
-
-      (1) For the following input of shape `[4, 1, 1, 1]`,
-          `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
-
-      ```
-      [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-      ```
-
-      The output tensor has shape `[1, 2, 2, 1]` and value:
-
-      ```
-      x = [[[[1], [2]], [[3], [4]]]]
-      ```
-
-      (2) For the following input of shape `[4, 1, 1, 3]`,
-          `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
-
-      ```
-      [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]]
-      ```
-
-      The output tensor has shape `[1, 2, 2, 3]` and value:
-
-      ```
-      x = [[[[1, 2, 3], [4, 5, 6]],
-            [[7, 8, 9], [10, 11, 12]]]]
-      ```
-
-      (3) For the following input of shape `[4, 2, 2, 1]`,
-          `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:
-
-      ```
-      x = [[[[1], [3]], [[9], [11]]],
-           [[[2], [4]], [[10], [12]]],
-           [[[5], [7]], [[13], [15]]],
-           [[[6], [8]], [[14], [16]]]]
-      ```
-
-      The output tensor has shape `[1, 4, 4, 1]` and value:
-
-      ```
-      x = [[[1],   [2],  [3],  [4]],
-           [[5],   [6],  [7],  [8]],
-           [[9],  [10], [11],  [12]],
-           [[13], [14], [15],  [16]]]
-      ```
-
-      (4) For the following input of shape `[8, 1, 3, 1]`,
-          `block_shape = [2, 2]`, and `crops = [[0, 0], [2, 0]]`:
-
-      ```
-      x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-           [[[0], [2], [4]]], [[[0], [10], [12]]],
-           [[[0], [5], [7]]], [[[0], [13], [15]]],
-           [[[0], [6], [8]]], [[[0], [14], [16]]]]
-      ```
-
-      The output tensor has shape `[2, 2, 4, 1]` and value:
-
-      ```
-      x = [[[[1],   [2],  [3],  [4]],
-            [[5],   [6],  [7],  [8]]],
-           [[[9],  [10], [11],  [12]],
-            [[13], [14], [15],  [16]]]]
-      ```
+      1. Reshape `input` to `reshaped` of shape: [block_shape[0], ...,
+        block_shape[M-1], batch / prod(block_shape), input_shape[1], ...,
+        input_shape[N-1]]  2. Permute dimensions of `reshaped` to produce
+        `permuted` of shape [batch / prod(block_shape),  input_shape[1],
+        block_shape[0], ..., input_shape[M], block_shape[M-1],
+        input_shape[M+1], ..., input_shape[N-1]]  3. Reshape `permuted` to
+        produce `reshaped_permuted` of shape [batch / prod(block_shape),
+        input_shape[1] * block_shape[0], ..., input_shape[M] * block_shape[M-1],
+        input_shape[M+1], ..., input_shape[N-1]]  4. Crop the start and end of
+        dimensions `[1, ..., M]` of `reshaped_permuted` according to `crops` to
+        produce the
+         output of shape: [batch / prod(block_shape),  input_shape[1] *
+           block_shape[0] - crops[0,0] - crops[0,1], ..., input_shape[M] *
+           block_shape[M-1] - crops[M-1,0] - crops[M-1,1],  input_shape[M+1],
+           ..., input_shape[N-1]]
+      Some examples:  (1) For the following input of shape `[4, 1, 1, 1]`,
+          `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:  ``` [[[[1]]],
+            [[[2]]], [[[3]]], [[[4]]]] ```
+      The output tensor has shape `[1, 2, 2, 1]` and value:  ``` x = [[[[1],
+        [2]], [[3], [4]]]] ```  (2) For the following input of shape `[4, 1, 1,
+        3]`,
+          `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:  ``` [[[1, 2,
+            3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] ```
+      The output tensor has shape `[1, 2, 2, 3]` and value:  ``` x = [[[[1, 2,
+        3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] ```  (3) For the following
+        input of shape `[4, 2, 2, 1]`,
+          `block_shape = [2, 2]`, and `crops = [[0, 0], [0, 0]]`:  ``` x =
+            [[[[1], [3]], [[9], [11]]], [[[2], [4]], [[10], [12]]], [[[5], [7]],
+            [[13], [15]]], [[[6], [8]], [[14], [16]]]] ```
+      The output tensor has shape `[1, 4, 4, 1]` and value:  ``` x = [[[1],
+        [2],  [3],  [4]], [[5],   [6],  [7],  [8]], [[9],  [10], [11],  [12]],
+        [[13], [14], [15],  [16]]] ```  (4) For the following input of shape
+        `[8, 1, 3, 1]`,
+          `block_shape = [2, 2]`, and `crops = [[0, 0], [2, 0]]`:  ``` x =
+            [[[[0], [1], [3]]], [[[0], [9], [11]]], [[[0], [2], [4]]], [[[0],
+            [10], [12]]], [[[0], [5], [7]]], [[[0], [13], [15]]], [[[0], [6],
+            [8]]], [[[0], [14], [16]]]] ```
+      The output tensor has shape `[2, 2, 4, 1]` and value:  ``` x = [[[[1],
+        [2],  [3],  [4]], [[5],   [6],  [7],  [8]]], [[[9],  [10], [11],  [12]],
+        [[13], [14], [15],  [16]]]] ```
    name: A name for the operation (optional).

  Returns:
@ -2930,10 +2869,8 @@ def batch_to_space_v2(input, block_shape, crops, name=None):  # pylint: disable=
  if isinstance(block_shape, int):
    block_shape = np.array([block_shape, block_shape], dtype=np.int64)

-  return batch_to_space_nd(input=input,
-                           block_shape=block_shape,
-                           crops=crops,
-                           name=name)
+  return batch_to_space_nd(
+      input=input, block_shape=block_shape, crops=crops, name=name)


@tf_export("one_hot")
@ -3037,16 +2974,17 @@ def one_hot(indices,
    TypeError: If dtype of either `on_value` or `off_value` don't match `dtype`
    TypeError: If dtype of `on_value` and `off_value` don't match one another
  """
-  with ops.name_scope(name, "one_hot",
-                      [indices, depth, on_value, off_value, axis,
-                       dtype]) as name:
+  with ops.name_scope(
+      name, "one_hot",
+      [indices, depth, on_value, off_value, axis, dtype]) as name:
    on_exists = on_value is not None
    off_exists = off_value is not None

-    on_dtype = (ops.convert_to_tensor(on_value).dtype.base_dtype if on_exists
-                else None)
-    off_dtype = (ops.convert_to_tensor(off_value).dtype.base_dtype if off_exists
-                 else None)
+    on_dtype = (
+        ops.convert_to_tensor(on_value).dtype.base_dtype if on_exists else None)
+    off_dtype = (
+        ops.convert_to_tensor(off_value).dtype.base_dtype
+        if off_exists else None)

    if on_exists or off_exists:
      if dtype is not None:
@ -3126,6 +3064,7 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None):
      Default is the maximum value in `lengths`.
    dtype: output type of the resulting tensor.
    name: name of the op.
+
  Returns:
    A mask tensor of shape `lengths.shape + (maxlen,)`, cast to specified dtype.
  Raises:
@ -3188,10 +3127,10 @@ def squeeze(input, axis=None, name=None, squeeze_dims=None):

  Args:
    input: A `Tensor`. The `input` to squeeze.
-    axis: An optional list of `ints`. Defaults to `[]`.
-      If specified, only squeezes the dimensions listed. The dimension
-      index starts at 0. It is an error to squeeze a dimension that is not 1.
-      Must be in the range `[-rank(input), rank(input))`.
+    axis: An optional list of `ints`. Defaults to `[]`. If specified, only
+      squeezes the dimensions listed. The dimension index starts at 0. It is an
+      error to squeeze a dimension that is not 1. Must be in the range
+      `[-rank(input), rank(input))`.
    name: A name for the operation (optional).
    squeeze_dims: Deprecated keyword argument that is now axis.

@ -3203,8 +3142,8 @@ def squeeze(input, axis=None, name=None, squeeze_dims=None):
  Raises:
    ValueError: When both `squeeze_dims` and `axis` are specified.
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "squeeze_dims", squeeze_dims)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "squeeze_dims",
+                                                squeeze_dims)
  if np.isscalar(axis):
    axis = [axis]
  return gen_array_ops.squeeze(input, axis, name)
@ -3272,10 +3211,12 @@ def where(condition, x=None, y=None, name=None):

 # pylint: disable=redefined-builtin
@tf_export(v1=["reverse_sequence"])
-@deprecation.deprecated_args(
-    None, "seq_dim is deprecated, use seq_axis instead", "seq_dim")
-@deprecation.deprecated_args(
-    None, "batch_dim is deprecated, use batch_axis instead", "batch_dim")
+@deprecation.deprecated_args(None,
+                             "seq_dim is deprecated, use seq_axis instead",
+                             "seq_dim")
+@deprecation.deprecated_args(None,
+                             "batch_dim is deprecated, use batch_axis instead",
+                             "batch_dim")
 def reverse_sequence(input,
                     seq_lengths,
                     seq_axis=None,
@ -3302,8 +3243,11 @@ reverse_sequence.__doc__ = deprecation.rewrite_argument_docstring(


@tf_export("reverse_sequence", v1=[])
-def reverse_sequence_v2(
-    input, seq_lengths, seq_axis=None, batch_axis=None, name=None):
+def reverse_sequence_v2(input,
+                        seq_lengths,
+                        seq_axis=None,
+                        batch_axis=None,
+                        name=None):
  return gen_array_ops.reverse_sequence(
      input=input,
      seq_lengths=seq_lengths,
@ -3414,10 +3358,19 @@ def gather(params,

@tf_export("gather", v1=[])
@dispatch.add_dispatch_support
-def gather_v2(params, indices, validate_indices=None, axis=None,
-              batch_dims=0, name=None):
-  return gather(params, indices, validate_indices=validate_indices, name=name,
-                axis=axis, batch_dims=batch_dims)
+def gather_v2(params,
+              indices,
+              validate_indices=None,
+              axis=None,
+              batch_dims=0,
+              name=None):
+  return gather(
+      params,
+      indices,
+      validate_indices=validate_indices,
+      name=name,
+      axis=axis,
+      batch_dims=batch_dims)


 gather.__doc__ = gather_v2.__doc__ = gen_array_ops.gather_v2.__doc__
@ -3725,8 +3678,7 @@ def gather_nd(params, indices, name=None, batch_dims=0):
    else:
      return gen_array_ops.gather_nd(params, indices, name=name)
  else:
-    return batch_gather_nd(
-        params, indices, batch_dims=batch_dims, name=name)
+    return batch_gather_nd(params, indices, batch_dims=batch_dims, name=name)


@tf_export("gather_nd", v1=[])
@ -3782,7 +3734,8 @@ def batch_gather_nd(params, indices, batch_dims, name=None):
    batch_dim_list = unstack(batch_shape, axis=0)
    dim_ranges = [
        gen_math_ops.cast(gen_math_ops._range(0, x, 1), indices.dtype)
-        for x in batch_dim_list]
+        for x in batch_dim_list
+    ]
    mesh_list = meshgrid(*dim_ranges, indexing="ij") if dim_ranges else []
    # Then we flatten and stack the tensors to form a (B1.B2) by 2 matrix.
    flat_list = [reshape(x, shape=(-1,)) for x in mesh_list]
@ -3791,10 +3744,13 @@ def batch_gather_nd(params, indices, batch_dims, name=None):
    # concat -> index_grid [B1.B2, 2] with indices [i1, ..., iK, C]
    # So we reshape them both to [(B1.B2), i1, ..., iK, *]
    index_grid_shape = shape(index_grid)
-    index_grid = reshape(index_grid,
-                         concat([index_grid_shape[:1],
-                                 ones(index_internal_ndims, dtype=dtypes.int32),
-                                 index_grid_shape[1:]], axis=0))
+    index_grid = reshape(
+        index_grid,
+        concat([
+            index_grid_shape[:1],
+            ones(index_internal_ndims, dtype=dtypes.int32), index_grid_shape[1:]
+        ],
+               axis=0))
    tile_shape = concat(((1,), indices_internal_shape, (1,)), axis=0)
    index_grid = tile(index_grid, multiples=tile_shape)
    # index_grid now has shape [(B1.B2), i1, ..., iK, 2]
@ -3820,27 +3776,30 @@ def batch_gather_nd(params, indices, batch_dims, name=None):
    "2017-10-25",
    "`tf.quantize_v2` is deprecated, please use `tf.quantization.quantize` "
    "instead.")  # pylint: disable=missing-docstring
-def quantize_v2(input,  # pylint: disable=redefined-builtin
-                min_range,
-                max_range,
-                T,
-                mode="MIN_COMBINED",
-                name=None,
-                round_mode="HALF_AWAY_FROM_ZERO"):
-  return gen_array_ops.quantize_v2(input,
-                                   min_range,
-                                   max_range,
-                                   T=T,
-                                   mode=mode,
-                                   name=name,
-                                   round_mode=round_mode)
+def quantize_v2(
+    input,  # pylint: disable=redefined-builtin
+    min_range,
+    max_range,
+    T,
+    mode="MIN_COMBINED",
+    name=None,
+    round_mode="HALF_AWAY_FROM_ZERO"):
+  return gen_array_ops.quantize_v2(
+      input,
+      min_range,
+      max_range,
+      T=T,
+      mode=mode,
+      name=name,
+      round_mode=round_mode)


 quantize_v2.__doc__ = """Please use `tf.quantization.quantize` instead."""


-# We want to expose tf.quantize instead of tf.quantize_v2; we can deprecate
-# tf.quantize_v2 in next version of TensorFlow.
+# We want to expose tf.quantization.quantize instead of
+# tf.quantization.quantize; we can deprecate tf.quantization.quantize in next
+# version of TensorFlow.
@tf_export("quantization.quantize", v1=["quantization.quantize", "quantize"])
@deprecation.deprecated_endpoints("quantize")
 def quantize(input,  # pylint: disable=redefined-builtin
@ -3925,13 +3884,7 @@ quantize.__doc__ = gen_array_ops.quantize_v2.__doc__


@tf_export("image.extract_patches")
-def extract_image_patches_v2(
-    images,
-    sizes,
-    strides,
-    rates,
-    padding,
-    name=None):
+def extract_image_patches_v2(images, sizes, strides, rates, padding, name=None):
  # pylint: disable=line-too-long
  r"""Extract `patches` from `images` and put them in the \"depth\" output dimension.

@ -3960,13 +3913,13 @@ def extract_image_patches_v2(
    the output patches.
  """
  # pylint: enable=line-too-long
-  return gen_array_ops.extract_image_patches(
-      images, sizes, strides, rates, padding, name)
+  return gen_array_ops.extract_image_patches(images, sizes, strides, rates,
+                                             padding, name)


@tf_export(v1=["image.extract_image_patches", "extract_image_patches"])
-@deprecation.deprecated_args(
-    None, "ksizes is deprecated, use sizes instead", "ksizes")
+@deprecation.deprecated_args(None, "ksizes is deprecated, use sizes instead",
+                             "ksizes")
 def extract_image_patches(  # pylint: disable=missing-docstring
    images,
    ksizes=None,
@ -3975,10 +3928,10 @@ def extract_image_patches(  # pylint: disable=missing-docstring
    padding=None,
    name=None,
    sizes=None):
-  ksizes = deprecation.deprecated_argument_lookup(
-      "sizes", sizes, "ksizes", ksizes)
-  return gen_array_ops.extract_image_patches(
-      images, ksizes, strides, rates, padding, name)
+  ksizes = deprecation.deprecated_argument_lookup("sizes", sizes, "ksizes",
+                                                  ksizes)
+  return gen_array_ops.extract_image_patches(images, ksizes, strides, rates,
+                                             padding, name)


 extract_image_patches.__doc__ = gen_array_ops.extract_image_patches.__doc__
--- a/tensorflow/python/ops/batch_norm_benchmark.py
+++ b/tensorflow/python/ops/batch_norm_benchmark.py
@ -48,7 +48,7 @@ def batch_norm_op(tensor, mean, variance, beta, gamma, scale):


 # Note that the naive implementation is much slower:
-# batch_norm = (tensor - mean) * tf.rsqrt(variance + 0.001)
+# batch_norm = (tensor - mean) * tf.math.rsqrt(variance + 0.001)
 # if scale:
 #   batch_norm *= gamma
 # return batch_norm + beta
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@ -159,7 +159,7 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_negative(x)]):
+  with tf.control_dependencies([tf.compat.v1.assert_negative(x)]):
    output = tf.reduce_sum(x)
  ```

@ -233,7 +233,7 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_positive(x)]):
+  with tf.control_dependencies([tf.compat.v1.assert_positive(x)]):
    output = tf.reduce_sum(x)
  ```

@ -308,7 +308,7 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_non_negative(x)]):
+  with tf.control_dependencies([tf.compat.v1.assert_non_negative(x)]):
    output = tf.reduce_sum(x)
  ```

@ -385,7 +385,7 @@ def assert_non_positive(x, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_non_positive(x)]):
+  with tf.control_dependencies([tf.compat.v1.assert_non_positive(x)]):
    output = tf.reduce_sum(x)
  ```

@ -461,7 +461,7 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_equal(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -603,7 +603,7 @@ def assert_none_equal(
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_none_equal(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_none_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -709,7 +709,7 @@ def assert_near(
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_near(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_near(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -820,7 +820,7 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_less(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_less(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -905,7 +905,7 @@ def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_less_equal(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_less_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -989,7 +989,7 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_greater(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_greater(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -1076,7 +1076,7 @@ def assert_greater_equal(x, y, data=None, summarize=None, message=None,
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_greater_equal(x, y)]):
+  with tf.control_dependencies([tf.compat.v1.assert_greater_equal(x, y)]):
    output = tf.reduce_sum(x)
  ```

@ -1206,7 +1206,7 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_rank(x, 2)]):
+  with tf.control_dependencies([tf.compat.v1.assert_rank(x, 2)]):
    output = tf.reduce_sum(x)
  ```

@ -1303,7 +1303,7 @@ def assert_rank_at_least(
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_rank_at_least(x, 2)]):
+  with tf.control_dependencies([tf.compat.v1.assert_rank_at_least(x, 2)]):
    output = tf.reduce_sum(x)
  ```

@ -1467,7 +1467,7 @@ def assert_rank_in(
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_rank_in(x, (2, 4))]):
+  with tf.control_dependencies([tf.compat.v1.assert_rank_in(x, (2, 4))]):
    output = tf.reduce_sum(x)
  ```

@ -1549,7 +1549,7 @@ def assert_integer(x, message=None, name=None):
  Example of adding a dependency to an operation:

  ```python
-  with tf.control_dependencies([tf.assert_integer(x)]):
+  with tf.control_dependencies([tf.compat.v1.assert_integer(x)]):
    output = tf.reduce_sum(x)
  ```

@ -1886,7 +1886,7 @@ def ensure_shape(x, shape, name=None):

  For example:
  ```python
-  x = tf.placeholder(tf.int32)
+  x = tf.compat.v1.placeholder(tf.int32)
  print(x.shape)
  ==> TensorShape(None)
  y = x * 2
@ -1897,7 +1897,7 @@ def ensure_shape(x, shape, name=None):
  print(y.shape)
  ==> TensorShape([Dimension(None), Dimension(3), Dimension(3)])

-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
    # Raises tf.errors.InvalidArgumentError, because the shape (3,) is not
    # compatible with the shape (None, 3, 3)
    sess.run(y, feed_dict={x: [1, 2, 3]})
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@ -116,7 +116,7 @@ def confusion_matrix(labels,
  For example:

  ```python
-    tf.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
+    tf.math.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
        [[0 0 0 0 0]
         [0 0 1 0 0]
         [0 0 1 0 0]
@ -226,7 +226,7 @@ def confusion_matrix_v1(labels,
  For example:

  ```python
-    tf.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
+    tf.math.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
        [[0 0 0 0 0]
         [0 0 1 0 0]
         [0 0 1 0 0]
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@ -247,9 +247,11 @@ def _Enter(data,
      result.set_shape(data.get_shape())
    return result
  elif isinstance(data, composite_tensor.CompositeTensor):
+
    def enter_component(t):
-      return _Enter(t, frame_name, is_constant, parallel_iterations,
-                    use_ref, use_input_shape)
+      return _Enter(t, frame_name, is_constant, parallel_iterations, use_ref,
+                    use_input_shape)
+
    return nest.map_structure(enter_component, data, expand_composites=True)
  else:
    raise TypeError("Type %s not supported" % type(data))
@ -409,12 +411,14 @@ def merge(inputs, name=None):
        nest.assert_same_structure(inputs[0], v, expand_composites=True)

      flat_inputs = [nest.flatten(v, expand_composites=True) for v in inputs]
-      merged_results = [gen_control_flow_ops.merge(component)
-                        for component in zip(*flat_inputs)]
+      merged_results = [
+          gen_control_flow_ops.merge(component)
+          for component in zip(*flat_inputs)
+      ]
      flat_merged = [tensor for (tensor, _) in merged_results]
      chosen_index = merged_results[0][1]
-      merged_inputs = nest.pack_sequence_as(inputs[0], flat_merged,
-                                            expand_composites=True)
+      merged_inputs = nest.pack_sequence_as(
+          inputs[0], flat_merged, expand_composites=True)
      return (merged_inputs, chosen_index)


@ -566,6 +570,7 @@ def _AddNextAndBackEdge(m, v, enforce_shape_invariant=True):
    # pylint: disable=protected-access
    def update_component(m_component, v_component):
      m_component.op._update_input(1, v_component)
+
    if isinstance(m, ops.IndexedSlices):
      v = math_ops._as_indexed_slices(v, optimize=False)
    # pylint: enable=protected-access
@ -1488,8 +1493,10 @@ class ControlFlowContext(object):
  def ExitResult(self, result):
    """Make a list of tensors available in the outer context."""
    if self._outer_context:
-      nest.map_structure(lambda x: self._outer_context.AddName(x.name), result,
-                         expand_composites=True)
+      nest.map_structure(
+          lambda x: self._outer_context.AddName(x.name),
+          result,
+          expand_composites=True)

  def GetWhileContext(self):
    """Return the while context containing this context."""
@ -1797,8 +1804,8 @@ class CondContext(ControlFlowContext):
      # Use pivot as the proxy for this op.
      return with_dependencies([v], self._pivot)
    else:
-      v = nest.map_structure(_convert_tensorarray_to_flow, v,
-                             expand_composites=True)
+      v = nest.map_structure(
+          _convert_tensorarray_to_flow, v, expand_composites=True)
      return self._ProcessOutputTensor(ops.convert_to_tensor(v))

  def BuildCondBranch(self, fn):
@ -1814,14 +1821,13 @@ class CondContext(ControlFlowContext):
        if original_result is None:
          return no_op(), None
        else:
-          original_result = nest.map_structure(array_ops.identity,
-                                               original_result,
-                                               expand_composites=True)
+          original_result = nest.map_structure(
+              array_ops.identity, original_result, expand_composites=True)
    if original_result is None:
      return None, None

-    result = nest.map_structure(self._BuildCondTensor, original_result,
-                                expand_composites=True)
+    result = nest.map_structure(
+        self._BuildCondTensor, original_result, expand_composites=True)
    if not isinstance(result, (list, _basetuple)):
      result = [result]
    return original_result, result
@ -1988,8 +1994,7 @@ def cond(pred,

    # Check that the return values of the two branches have the same structure.
    try:
-      nest.assert_same_structure(orig_res_t, orig_res_f,
-                                 expand_composites=True)
+      nest.assert_same_structure(orig_res_t, orig_res_f, expand_composites=True)
    except TypeError as e:
      raise TypeError(
          "Incompatible return types of true_fn and false_fn: {}".format(e))
@ -2024,8 +2029,8 @@ def cond(pred,
      ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_t)
      ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_f)

-    merges = nest.pack_sequence_as(structure=orig_res_t, flat_sequence=merges,
-                                   expand_composites=True)
+    merges = nest.pack_sequence_as(
+        structure=orig_res_t, flat_sequence=merges, expand_composites=True)

    # Singleton lists and tuples are automatically unpacked if strict == False.
    if not strict:
@ -2044,12 +2049,12 @@ def _cast_indexed_slice_indices(structure, flat_a, flat_b):

  Args:
    structure: The nested structure that was flattened.
-    flat_a: A flattened list of `Tensors` whose structure matches
-        `structure`.  Will be modified in place to cast `IndexedSlices`
-        indices tensors to int64, where necessary.
-    flat_a: A flattened list of `Tensors` whose structure matches
-        `structure`.  Will be modified in place to cast `IndexedSlices`
-        indices tensors to int64, where necessary.
+    flat_a: A flattened list of `Tensors` whose structure matches `structure`.
+      Will be modified in place to cast `IndexedSlices` indices tensors to
+      int64, where necessary.
+    flat_a: A flattened list of `Tensors` whose structure matches `structure`.
+      Will be modified in place to cast `IndexedSlices` indices tensors to
+      int64, where necessary.
  """
  # Find the locations (in flat_a and flat_b) of the IndexedSlices'
  # indices tensors.
@ -2080,10 +2085,7 @@ def _cast_indexed_slice_indices(structure, flat_a, flat_b):


@tf_export("cond", v1=[])
-def cond_for_tf_v2(pred,
-                   true_fn=None,
-                   false_fn=None,
-                   name=None):
+def cond_for_tf_v2(pred, true_fn=None, false_fn=None, name=None):
  """Return `true_fn()` if the predicate `pred` is true else `false_fn()`.

  `true_fn` and `false_fn` both return lists of output tensors. `true_fn` and
@ -2944,15 +2946,15 @@ class WhileContext(ControlFlowContext):
            return x
          return array_ops.identity(x)

-        body_result = nest.map_structure(map_fn, body_result,
-                                         expand_composites=True)
+        body_result = nest.map_structure(
+            map_fn, body_result, expand_composites=True)

    # Compare the structure types of input and output of body.
    # For backwards compatibility, the first layer is forced to a list
    # during this comparison, because inputs are typically lists and
    # outputs of the body are typically tuples.
-    nest.assert_same_structure(list(packed_vars_for_body), list(body_result),
-                               expand_composites=True)
+    nest.assert_same_structure(
+        list(packed_vars_for_body), list(body_result), expand_composites=True)

    # Store body_result to keep track of TensorArrays returned by body
    original_body_result = body_result
@ -3196,9 +3198,10 @@ def while_loop_v2(cond,
  n = 10000
  x = tf.constant(list(range(n)))
  c = lambda i, x: i < n
-  b = lambda i, x: (tf.Print(i + 1, [i]), tf.Print(x + 1, [i], "x:"))
+  b = lambda i, x: (tf.compat.v1.Print(i + 1, [i]), tf.compat.v1.Print(x + 1,
+  [i], "x:"))
  i, out = tf.while_loop(c, b, (0, x))
-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
      print(sess.run(i))  # prints [0] ... [9999]

      # The following line may increment the counter and x in parallel.
@ -3382,9 +3385,10 @@ def while_loop(cond,
  n = 10000
  x = tf.constant(list(range(n)))
  c = lambda i, x: i < n
-  b = lambda i, x: (tf.Print(i + 1, [i]), tf.Print(x + 1, [i], "x:"))
+  b = lambda i, x: (tf.compat.v1.Print(i + 1, [i]), tf.compat.v1.Print(x + 1,
+  [i], "x:"))
  i, out = tf.while_loop(c, b, (0, x))
-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
      print(sess.run(i))  # prints [0] ... [9999]

      # The following line may increment the counter and x in parallel.
@ -3461,6 +3465,7 @@ def while_loop(cond,
        if isinstance(x, tensor_array_ops.TensorArray):
          return x
        return ops.convert_to_tensor(x)
+
      loop_vars = nest.map_structure(convert, loop_vars)
      if maximum_iterations is not None:
        return loop_vars[1]
@ -3471,10 +3476,12 @@ def while_loop(cond,
      if maximum_iterations is not None:
        shape_invariants = (tensor_shape.TensorShape([]), shape_invariants)

-      nest.assert_same_structure(loop_vars, shape_invariants,
-                                 expand_composites=False)
+      nest.assert_same_structure(
+          loop_vars, shape_invariants, expand_composites=False)
      shape_invariants = nest.map_structure(
-          _get_shape_invariant, loop_vars, shape_invariants,
+          _get_shape_invariant,
+          loop_vars,
+          shape_invariants,
          expand_composites=False)

    loop_context = WhileContext(
@ -4036,8 +4043,8 @@ class XLAControlFlowContext(ControlFlowContext):
  def to_control_flow_context_def(self, context_def, export_scope=None):
    # pylint: disable=useless-super-delegation
    # NOTE(slebedev): the method is required by `ControlFlowContext`.
-    super(XLAControlFlowContext, self).to_control_flow_context_def(
-        context_def, export_scope)
+    super(XLAControlFlowContext,
+          self).to_control_flow_context_def(context_def, export_scope)

  def IsXLAContext(self):
    return True
--- a/tensorflow/python/ops/critical_section_ops.py
+++ b/tensorflow/python/ops/critical_section_ops.py
@ -124,7 +124,7 @@ class CriticalSection(object):
  will not ensure serial execution:

  ```python
-  v = tf.get_variable("v", initializer=0.0, use_resource=True)
+  v = tf.compat.v1.get_variable("v", initializer=0.0, use_resource=True)
  def accumulate(up):
    x = v.read_value()
    with tf.control_dependencies([x]):
--- a/tensorflow/python/ops/ctc_ops.py
+++ b/tensorflow/python/ops/ctc_ops.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """CTC (Connectionist Temporal Classification) Operations."""

 from __future__ import absolute_import
@ -43,10 +42,13 @@ from tensorflow.python.util.tf_export import tf_export

 # pylint: disable=protected-access, invalid-name
@tf_export(v1=["nn.ctc_loss"])
-def ctc_loss(labels, inputs=None, sequence_length=None,
+def ctc_loss(labels,
+             inputs=None,
+             sequence_length=None,
             preprocess_collapse_repeated=False,
             ctc_merge_repeated=True,
-             ignore_longer_outputs_than_inputs=False, time_major=True,
+             ignore_longer_outputs_than_inputs=False,
+             time_major=True,
             logits=None):
  """Computes the CTC (Connectionist Temporal Classification) Loss.

@ -119,28 +121,24 @@ def ctc_loss(labels, inputs=None, sequence_length=None,

  Args:
    labels: An `int32` `SparseTensor`.
-      `labels.indices[i, :] == [b, t]` means `labels.values[i]` stores
-      the id for (batch b, time t).
-      `labels.values[i]` must take on values in `[0, num_labels)`.
-      See `core/ops/ctc_ops.cc` for more details.
+      `labels.indices[i, :] == [b, t]` means `labels.values[i]` stores the id
+        for (batch b, time t). `labels.values[i]` must take on values in `[0,
+        num_labels)`. See `core/ops/ctc_ops.cc` for more details.
    inputs: 3-D `float` `Tensor`.
-      If time_major == False, this will be a `Tensor` shaped:
-        `[batch_size, max_time, num_classes]`.
+      If time_major == False, this will be a `Tensor` shaped: `[batch_size,
+        max_time, num_classes]`.
      If time_major == True (default), this will be a `Tensor` shaped:
-        `[max_time, batch_size, num_classes]`.
-      The logits.
-    sequence_length: 1-D `int32` vector, size `[batch_size]`.
-      The sequence lengths.
-    preprocess_collapse_repeated: Boolean.  Default: False.
-      If True, repeated labels are collapsed prior to the CTC calculation.
+        `[max_time, batch_size, num_classes]`. The logits.
+    sequence_length: 1-D `int32` vector, size `[batch_size]`. The sequence
+      lengths.
+    preprocess_collapse_repeated: Boolean.  Default: False. If True, repeated
+      labels are collapsed prior to the CTC calculation.
    ctc_merge_repeated: Boolean.  Default: True.
-    ignore_longer_outputs_than_inputs: Boolean. Default: False.
-      If True, sequences with longer outputs than inputs will be ignored.
-    time_major: The shape format of the `inputs` Tensors.
-      If True, these `Tensors` must be shaped `[max_time, batch_size,
-      num_classes]`.
-      If False, these `Tensors` must be shaped `[batch_size, max_time,
-      num_classes]`.
+    ignore_longer_outputs_than_inputs: Boolean. Default: False. If True,
+      sequences with longer outputs than inputs will be ignored.
+    time_major: The shape format of the `inputs` Tensors. If True, these
+      `Tensors` must be shaped `[max_time, batch_size, num_classes]`. If False,
+      these `Tensors` must be shaped `[batch_size, max_time, num_classes]`.
      Using `time_major = True` (default) is a bit more efficient because it
      avoids transposes at the beginning of the ctc_loss calculation.  However,
      most TensorFlow data is batch-major, so by this function also accepts
@ -160,8 +158,8 @@ def ctc_loss(labels, inputs=None, sequence_length=None,
    raise TypeError("Expected labels (first argument) to be a SparseTensor")

  # For internal calculations, we transpose to [time, batch, num_classes]
-  inputs = deprecation.deprecated_argument_lookup(
-      "logits", logits, "inputs", inputs)
+  inputs = deprecation.deprecated_argument_lookup("logits", logits, "inputs",
+                                                  inputs)
  if not time_major:
    inputs = array_ops.transpose(inputs, [1, 0, 2])  # (B,T,N) => (T,B,N)

@ -196,7 +194,8 @@ def _CTCLossGrad(op, grad_loss, _):
  # so we make sure we prevent silently incorrect results by raising
  # an error if the second derivative is requested via prevent_gradient.
  grad_without_gradient = array_ops.prevent_gradient(
-      op.outputs[1], message="Currently there is no way to take the second "
+      op.outputs[1],
+      message="Currently there is no way to take the second "
      " derivative of ctc_loss due to the fused implementation's interaction "
      " with tf.gradients()")
  # Return gradient for inputs and None for
@ -221,10 +220,10 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
    * `A B B B B` if `merge_repeated=False`.

  Args:
-    inputs: 3-D `float` `Tensor` sized
-      `[max_time, batch_size, num_classes]`.  The logits.
-    sequence_length: 1-D `int32` vector containing sequence lengths,
-      having size `[batch_size]`.
+    inputs: 3-D `float` `Tensor` sized `[max_time, batch_size, num_classes]`.
+      The logits.
+    sequence_length: 1-D `int32` vector containing sequence lengths, having size
+      `[batch_size]`.
    merge_repeated: Boolean.  Default: True.

  Returns:
@ -249,13 +248,16 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
  outputs = gen_ctc_ops.ctc_greedy_decoder(
      inputs, sequence_length, merge_repeated=merge_repeated)
  (decoded_ix, decoded_val, decoded_shape, log_probabilities) = outputs
-  return ([sparse_tensor.SparseTensor(decoded_ix, decoded_val, decoded_shape)],
-          log_probabilities)
+  return ([sparse_tensor.SparseTensor(decoded_ix, decoded_val,
+                                      decoded_shape)], log_probabilities)


@tf_export(v1=["nn.ctc_beam_search_decoder"])
-def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
-                            top_paths=1, merge_repeated=True):
+def ctc_beam_search_decoder(inputs,
+                            sequence_length,
+                            beam_width=100,
+                            top_paths=1,
+                            merge_repeated=True):
  """Performs beam search decoding on the logits given in input.

  **Note** The `ctc_greedy_decoder` is a special case of the
@ -271,10 +273,10 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
    * `A B B B` if `merge_repeated = False`.

  Args:
-    inputs: 3-D `float` `Tensor`, size
-      `[max_time x batch_size x num_classes]`.  The logits.
-    sequence_length: 1-D `int32` vector containing sequence lengths,
-      having size `[batch_size]`.
+    inputs: 3-D `float` `Tensor`, size `[max_time x batch_size x num_classes]`.
+      The logits.
+    sequence_length: 1-D `int32` vector containing sequence lengths, having size
+      `[batch_size]`.
    beam_width: An int scalar >= 0 (beam search beam width).
    top_paths: An int scalar >= 0, <= beam_width (controls output size).
    merge_repeated: Boolean.  Default: True.
@ -300,17 +302,22 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,

  decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = (
      gen_ctc_ops.ctc_beam_search_decoder(
-          inputs, sequence_length, beam_width=beam_width, top_paths=top_paths,
+          inputs,
+          sequence_length,
+          beam_width=beam_width,
+          top_paths=top_paths,
          merge_repeated=merge_repeated))

-  return (
-      [sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
-       in zip(decoded_ixs, decoded_vals, decoded_shapes)],
-      log_probabilities)
+  return ([
+      sparse_tensor.SparseTensor(ix, val, shape)
+      for (ix, val, shape) in zip(decoded_ixs, decoded_vals, decoded_shapes)
+  ], log_probabilities)


@tf_export("nn.ctc_beam_search_decoder", v1=["nn.ctc_beam_search_decoder_v2"])
-def ctc_beam_search_decoder_v2(inputs, sequence_length, beam_width=100,
+def ctc_beam_search_decoder_v2(inputs,
+                               sequence_length,
+                               beam_width=100,
                               top_paths=1):
  """Performs beam search decoding on the logits given in input.

@ -319,10 +326,10 @@ def ctc_beam_search_decoder_v2(inputs, sequence_length, beam_width=100,
  that decoder is faster for this special case).

  Args:
-    inputs: 3-D `float` `Tensor`, size
-      `[max_time, batch_size, num_classes]`.  The logits.
-    sequence_length: 1-D `int32` vector containing sequence lengths,
-      having size `[batch_size]`.
+    inputs: 3-D `float` `Tensor`, size `[max_time, batch_size, num_classes]`.
+      The logits.
+    sequence_length: 1-D `int32` vector containing sequence lengths, having size
+      `[batch_size]`.
    beam_width: An int scalar >= 0 (beam search beam width).
    top_paths: An int scalar >= 0, <= beam_width (controls output size).

@ -347,9 +354,12 @@ def ctc_beam_search_decoder_v2(inputs, sequence_length, beam_width=100,

  # Note, merge_repeated is an invalid optimization that is removed from the
  # public API: it returns low probability paths.
-  return ctc_beam_search_decoder(inputs, sequence_length=sequence_length,
-                                 beam_width=beam_width, top_paths=top_paths,
-                                 merge_repeated=False)
+  return ctc_beam_search_decoder(
+      inputs,
+      sequence_length=sequence_length,
+      beam_width=beam_width,
+      top_paths=top_paths,
+      merge_repeated=False)


 ops.NotDifferentiable("CTCGreedyDecoder")
@ -388,8 +398,8 @@ def _ctc_state_trans(label_seq):
    label_to_blank = array_ops.stack([blank_states, label_states], 1)

    # Scatter transitions that don't depend on sequence.
-    indices = array_ops.concat(
-        [start_to_label, blank_to_label, label_to_blank], 0)
+    indices = array_ops.concat([start_to_label, blank_to_label, label_to_blank],
+                               0)
    values = array_ops.ones([_get_dim(indices, 0)])
    trans = array_ops.scatter_nd(
        indices, values, shape=[num_states, num_states])
@ -398,8 +408,8 @@ def _ctc_state_trans(label_seq):
    # Label to label transitions. Disallow transitions between repeated labels
    # with no blank state in between.
    batch_idx = array_ops.zeros_like(label_states[2:])
-    indices = array_ops.stack(
-        [batch_idx, label_states[2:], label_states[1:-1]], 1)
+    indices = array_ops.stack([batch_idx, label_states[2:], label_states[1:-1]],
+                              1)
    indices = array_ops.tile(
        array_ops.expand_dims(indices, 0), [batch_size, 1, 1])
    batch_idx = array_ops.expand_dims(math_ops.range(batch_size), 1) * [1, 0, 0]
@ -431,14 +441,14 @@ def ctc_state_log_probs(seq_lengths, max_seq_length):
  num_duration_states = 2
  num_states = num_duration_states * num_label_states
  log_0 = math_ops.cast(
-      math_ops.log(math_ops.cast(0, dtypes.float64) + 1e-307),
-      dtypes.float32)
+      math_ops.log(math_ops.cast(0, dtypes.float64) + 1e-307), dtypes.float32)

  initial_state_log_probs = array_ops.one_hot(
      indices=array_ops.zeros([batch_size], dtype=dtypes.int32),
      depth=num_states,
      on_value=0.0,
-      off_value=log_0, axis=1)
+      off_value=log_0,
+      axis=1)

  label_final_state_mask = array_ops.one_hot(
      seq_lengths, depth=num_label_states, axis=0)
@ -446,8 +456,8 @@ def ctc_state_log_probs(seq_lengths, max_seq_length):
      [num_duration_states, 1, batch_size])
  final_state_mask = duration_final_state_mask * label_final_state_mask
  final_state_log_probs = (1.0 - final_state_mask) * log_0
-  final_state_log_probs = array_ops.reshape(
-      final_state_log_probs, [num_states, batch_size])
+  final_state_log_probs = array_ops.reshape(final_state_log_probs,
+                                            [num_states, batch_size])

  return initial_state_log_probs, array_ops.transpose(final_state_log_probs)

@ -475,13 +485,14 @@ def _state_to_olabel(labels, num_labels, states):
  label_states = states[:, :, 1:num_label_states]
  blank_states = states[:, :, num_label_states:]
  one_hot = array_ops.one_hot(
-      labels - 1, depth=(num_labels - 1),
-      on_value=0.0, off_value=math_ops.log(0.0))
+      labels - 1,
+      depth=(num_labels - 1),
+      on_value=0.0,
+      off_value=math_ops.log(0.0))
  one_hot = array_ops.expand_dims(one_hot, axis=0)
  label_states = array_ops.expand_dims(label_states, axis=3)
  label_olabels = math_ops.reduce_logsumexp(label_states + one_hot, axis=2)
-  blank_olabels = math_ops.reduce_logsumexp(
-      blank_states, axis=2, keepdims=True)
+  blank_olabels = math_ops.reduce_logsumexp(blank_states, axis=2, keepdims=True)
  return array_ops.concat([blank_olabels, label_olabels], axis=-1)


@ -500,8 +511,8 @@ def _state_to_olabel_unique(labels, num_labels, states, unique):
  batch_size = states.shape[1]
  num_states = num_label_states - 1
  batch_state_major = array_ops.transpose(mul_reduce, perm=[1, 2, 0])
-  batch_state_major = array_ops.reshape(
-      batch_state_major, [batch_size * num_states, num_frames])
+  batch_state_major = array_ops.reshape(batch_state_major,
+                                        [batch_size * num_states, num_frames])
  batch_offset = math_ops.range(batch_size, dtype=unique_y.dtype) * num_labels
  indices = unique_y + array_ops.expand_dims(batch_offset, axis=-1)
  indices = array_ops.reshape(indices, [-1, 1])
@ -512,13 +523,11 @@ def _state_to_olabel_unique(labels, num_labels, states, unique):
  scatter = array_ops.reshape(scatter, [batch_size, num_labels, num_frames])
  scatter = array_ops.where(
      math_ops.equal(scatter, 0.0),
-      array_ops.fill(array_ops.shape(scatter), math_ops.log(0.0)),
-      scatter)
+      array_ops.fill(array_ops.shape(scatter), math_ops.log(0.0)), scatter)
  label_olabels = array_ops.transpose(scatter, [2, 0, 1])
  label_olabels = label_olabels[:, :, 1:]

-  blank_olabels = math_ops.reduce_logsumexp(
-      blank_states, axis=2, keepdims=True)
+  blank_olabels = math_ops.reduce_logsumexp(blank_states, axis=2, keepdims=True)

  return array_ops.concat([blank_olabels, label_olabels], axis=-1)

@ -534,12 +543,12 @@ def ctc_loss_and_grad(logits, labels, label_length, logit_length, unique=None):
  Args:
    logits: tensor of shape [frames, batch_size, num_labels]
    labels: tensor of shape [batch_size, max_label_seq_length]
-    label_length: tensor of shape [batch_size]
-      Length of reference label sequence in labels.
-    logit_length: tensor of shape [batch_size]
-      Length of input sequence in logits.
-    unique: (optional) unique label indices as computed by unique(labels)
-      If supplied, enables an implementation that is faster and more memory
+    label_length: tensor of shape [batch_size] Length of reference label
+      sequence in labels.
+    logit_length: tensor of shape [batch_size] Length of input sequence in
+      logits.
+    unique: (optional) unique label indices as computed by unique(labels) If
+      supplied, enables an implementation that is faster and more memory
      efficient on TPU.

  Returns:
@ -563,8 +572,8 @@ def ctc_loss_and_grad(logits, labels, label_length, logit_length, unique=None):
      sequence_length=logit_length)

  if unique:
-    olabel_log_probs = _state_to_olabel_unique(
-        labels, num_labels, fwd_bwd_log_probs, unique)
+    olabel_log_probs = _state_to_olabel_unique(labels, num_labels,
+                                               fwd_bwd_log_probs, unique)
  else:
    olabel_log_probs = _state_to_olabel(labels, num_labels, fwd_bwd_log_probs)

@ -585,9 +594,14 @@ def _ctc_loss_shape(op):


@tf_export("nn.ctc_loss", v1=["nn.ctc_loss_v2"])
-def ctc_loss_v2(labels, logits, label_length, logit_length,
-                logits_time_major=True, unique=None,
-                blank_index=None, name=None):
+def ctc_loss_v2(labels,
+                logits,
+                label_length,
+                logit_length,
+                logits_time_major=True,
+                unique=None,
+                blank_index=None,
+                name=None):
  """Computes CTC (Connectionist Temporal Classification) loss.

  This op implements the CTC loss as presented in the article:
@ -598,7 +612,8 @@ def ctc_loss_v2(labels, logits, label_length, logit_length,
  pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf)

  Notes:
-      - Same as the "Classic CTC" in TensorFlow 1.x's tf.nn.ctc_loss setting of
+      - Same as the "Classic CTC" in TensorFlow 1.x's tf.compat.v1.nn.ctc_loss
+      setting of
        preprocess_collapse_repeated=False, ctc_merge_repeated=True
      - Labels may be supplied as either a dense, zero-padded tensor with a
        vector of label sequence lengths OR as a SparseTensor.
@ -612,22 +627,22 @@ def ctc_loss_v2(labels, logits, label_length, logit_length,

  Args:
    labels: tensor of shape [batch_size, max_label_seq_length] or SparseTensor
-    logits: tensor of shape [frames, batch_size, num_labels],
-      if logits_time_major == False, shape is [batch_size, frames, num_labels].
+    logits: tensor of shape [frames, batch_size, num_labels], if
+      logits_time_major == False, shape is [batch_size, frames, num_labels].
    label_length: tensor of shape [batch_size], None if labels is SparseTensor
      Length of reference label sequence in labels.
-    logit_length: tensor of shape [batch_size]
-      Length of input sequence in logits.
-    logits_time_major: (optional) If True (default), logits is shaped
-      [time, batch, logits]. If False, shape is [batch, time, logits]
+    logit_length: tensor of shape [batch_size] Length of input sequence in
+      logits.
+    logits_time_major: (optional) If True (default), logits is shaped [time,
+      batch, logits]. If False, shape is [batch, time, logits]
    unique: (optional) Unique label indices as computed by
-      ctc_unique_labels(labels).  If supplied, enable a faster, memory
-      efficient implementation on TPU.
+      ctc_unique_labels(labels).  If supplied, enable a faster, memory efficient
+      implementation on TPU.
    blank_index: (optional) Set the class index to use for the blank label.
      Negative values will start from num_classes, ie, -1 will reproduce the
-      ctc_loss behavior of using num_classes - 1 for the blank symbol.
-      There is some memory/performance overhead to switching from the default
-      of 0 as an additional shifted copy of the logits may be created.
+      ctc_loss behavior of using num_classes - 1 for the blank symbol. There is
+      some memory/performance overhead to switching from the default of 0 as an
+      additional shifted copy of the logits may be created.
    name: A name for this `Op`. Defaults to "ctc_loss_dense".

  Returns:
@ -644,37 +659,43 @@ def ctc_loss_v2(labels, logits, label_length, logit_length,
    if blank_index != _get_dim(logits, 2) - 1:
      logits = array_ops.concat([
          logits[:, :, :blank_index],
-          logits[:, :, blank_index+1:],
-          logits[:, :, blank_index:blank_index+1],
-      ], axis=2)
+          logits[:, :, blank_index + 1:],
+          logits[:, :, blank_index:blank_index + 1],
+      ],
+                                axis=2)
      labels = sparse_tensor.SparseTensor(
          labels.indices,
-          array_ops.where(labels.values < blank_index,
-                          labels.values,
-                          labels.values - 1),
-          labels.dense_shape)
+          array_ops.where(labels.values < blank_index, labels.values,
+                          labels.values - 1), labels.dense_shape)

-    return ctc_loss(labels=labels,
-                    inputs=logits,
-                    sequence_length=logit_length,
-                    time_major=logits_time_major)
+    return ctc_loss(
+        labels=labels,
+        inputs=logits,
+        sequence_length=logit_length,
+        time_major=logits_time_major)

  if blank_index is None:
    blank_index = 0

-  return ctc_loss_dense(labels=labels,
-                        logits=logits,
-                        label_length=label_length,
-                        logit_length=logit_length,
-                        logits_time_major=logits_time_major,
-                        unique=unique,
-                        blank_index=blank_index,
-                        name=name)
+  return ctc_loss_dense(
+      labels=labels,
+      logits=logits,
+      label_length=label_length,
+      logit_length=logit_length,
+      logits_time_major=logits_time_major,
+      unique=unique,
+      blank_index=blank_index,
+      name=name)


-def ctc_loss_dense(labels, logits, label_length, logit_length,
-                   logits_time_major=True, unique=None,
-                   blank_index=0, name=None):
+def ctc_loss_dense(labels,
+                   logits,
+                   label_length,
+                   logit_length,
+                   logits_time_major=True,
+                   unique=None,
+                   blank_index=0,
+                   name=None):
  """Computes CTC (Connectionist Temporal Classification) loss.

  This op implements the CTC loss as presented in the article:
@ -694,8 +715,8 @@ def ctc_loss_dense(labels, logits, label_length, logit_length,
  ](https://ieeexplore.ieee.org/iel7/8260578/8268903/08268944.pdf)

  Notes:
-    Significant differences from tf.nn.ctc_loss:
-      Supports GPU and TPU (tf.nn.ctc_loss supports CPU only):
+    Significant differences from tf.compat.v1.nn.ctc_loss:
+      Supports GPU and TPU (tf.compat.v1.nn.ctc_loss supports CPU only):
        For batched operations, GPU and TPU are significantly faster than using
        ctc_loss on CPU.
        This implementation runs on CPU, but significantly slower than ctc_loss.
@ -714,21 +735,21 @@ def ctc_loss_dense(labels, logits, label_length, logit_length,

  Args:
    labels: tensor of shape [batch_size, max_label_seq_length]
-    logits: tensor of shape [frames, batch_size, num_labels],
-      if logits_time_major == False, shape is [batch_size, frames, num_labels].
-    label_length: tensor of shape [batch_size]
-      Length of reference label sequence in labels.
-    logit_length: tensor of shape [batch_size]
-      Length of input sequence in logits.
-    logits_time_major: (optional) If True (default), logits is shaped
-      [time, batch, logits]. If False, shape is [batch, time, logits]
-    unique: (optional) Unique label indices as computed by unique(labels).
-      If supplied, enable a faster, memory efficient implementation on TPU.
+    logits: tensor of shape [frames, batch_size, num_labels], if
+      logits_time_major == False, shape is [batch_size, frames, num_labels].
+    label_length: tensor of shape [batch_size] Length of reference label
+      sequence in labels.
+    logit_length: tensor of shape [batch_size] Length of input sequence in
+      logits.
+    logits_time_major: (optional) If True (default), logits is shaped [time,
+      batch, logits]. If False, shape is [batch, time, logits]
+    unique: (optional) Unique label indices as computed by unique(labels). If
+      supplied, enable a faster, memory efficient implementation on TPU.
    blank_index: (optional) Set the class index to use for the blank label.
      Negative values will start from num_classes, ie, -1 will reproduce the
-      ctc_loss behavior of using num_classes - 1 for the blank symbol.
-      There is some memory/performance overhead to switching from the default
-      of 0 as an additional shifted copy of the logits may be created.
+      ctc_loss behavior of using num_classes - 1 for the blank symbol. There is
+      some memory/performance overhead to switching from the default of 0 as an
+      additional shifted copy of the logits may be created.
    name: A name for this `Op`. Defaults to "ctc_loss_dense".

  Returns:
@ -749,10 +770,11 @@ def ctc_loss_dense(labels, logits, label_length, logit_length,
      if blank_index < 0:
        blank_index += _get_dim(logits, 2)
      logits = array_ops.concat([
-          logits[:, :, blank_index:blank_index+1],
+          logits[:, :, blank_index:blank_index + 1],
          logits[:, :, :blank_index],
-          logits[:, :, blank_index+1:],
-      ], axis=2)
+          logits[:, :, blank_index + 1:],
+      ],
+                                axis=2)
      labels = array_ops.where(labels < blank_index, labels + 1, labels)

    args = [logits, labels, label_length, logit_length]
@ -762,9 +784,10 @@ def ctc_loss_dense(labels, logits, label_length, logit_length,
      args.extend([unique_y, unique_idx])

    # TODO(tombagby): Update to tfe.defun
-    @function.Defun(*[x.dtype for x in args],
-                    python_grad_func=_ctc_loss_grad,
-                    shape_func=_ctc_loss_shape)
+    @function.Defun(
+        *[x.dtype for x in args],
+        python_grad_func=_ctc_loss_grad,
+        shape_func=_ctc_loss_shape)
    def compute_ctc_loss(logits_t, labels_t, label_length_t, logit_length_t,
                         *unique_t):
      """Compute CTC loss."""
@ -802,16 +825,16 @@ def collapse_repeated(labels, seq_length, name=None):
    and int tensor of shape [batch] with new sequence lengths.
  """

-  with ops.name_scope(name, "collapse_repeated_labels",
-                      [labels, seq_length]):
+  with ops.name_scope(name, "collapse_repeated_labels", [labels, seq_length]):
    labels = ops.convert_to_tensor(labels, name="labels")
    seq_length = ops.convert_to_tensor(seq_length, name="seq_length")

    # Mask labels that don't equal previous label.
-    label_mask = array_ops.concat(
-        [array_ops.ones_like(labels[:, :1], dtypes.bool),
-         math_ops.not_equal(labels[:, 1:], labels[:, :-1])],
-        axis=1)
+    label_mask = array_ops.concat([
+        array_ops.ones_like(labels[:, :1], dtypes.bool),
+        math_ops.not_equal(labels[:, 1:], labels[:, :-1])
+    ],
+                                  axis=1)

    # Filter labels that aren't in the original sequence.
    maxlen = _get_dim(labels, 1)
@ -851,8 +874,7 @@ def dense_labels_to_sparse(dense, length):

  Args:
    dense: tensor of shape [batch, max_length]
-    length: int tensor of shape [batch]
-      The length of each sequence in dense.
+    length: int tensor of shape [batch] The length of each sequence in dense.

  Returns:
    tf.SparseTensor with values only for the valid elements of sequences.
@ -867,7 +889,8 @@ def dense_labels_to_sparse(dense, length):
      array_ops.boolean_mask(flat_indices, flat_mask), 1)
  values = array_ops.boolean_mask(flat_values, flat_mask)
  sparse = sparse_tensor.SparseTensor(
-      indices=indices, values=math_ops.cast(values, dtypes.int32),
+      indices=indices,
+      values=math_ops.cast(values, dtypes.int32),
      dense_shape=array_ops.shape(flat_values, out_type=dtypes.int64))
  reshaped = sparse_ops.sparse_reshape(sparse, array_ops.shape(dense))
  max_length = math_ops.reduce_max(length)
@ -876,14 +899,15 @@ def dense_labels_to_sparse(dense, length):
      values=reshaped.values,
      dense_shape=[
          math_ops.cast(reshaped.dense_shape[0], dtypes.int64),
-          math_ops.cast(max_length, dtypes.int64)])
+          math_ops.cast(max_length, dtypes.int64)
+      ])


@tf_export("nn.ctc_unique_labels")
 def ctc_unique_labels(labels, name=None):
  """Get unique labels and indices for batched labels for `tf.nn.ctc_loss`.

-  For use with `tf.nn.ctc_loss_v2` optional argument `unique`: This op can be
+  For use with `tf.nn.ctc_loss` optional argument `unique`: This op can be
  used to preprocess labels in input pipeline to for better speed/memory use
  computing the ctc loss on TPU.

@ -904,25 +928,24 @@ def ctc_unique_labels(labels, name=None):

  with ops.name_scope(name, "ctc_unique_labels", [labels]):
    labels = ops.convert_to_tensor(labels, name="labels")
+
    def _unique(x):
      u = array_ops.unique(x)
-      y = array_ops.pad(
-          u.y, [[0, _get_dim(u.idx, 0) - _get_dim(u.y, 0)]])
+      y = array_ops.pad(u.y, [[0, _get_dim(u.idx, 0) - _get_dim(u.y, 0)]])
      y = math_ops.cast(y, dtypes.int64)
      return [y, u.idx]
-    return map_fn.map_fn(
-        _unique, labels, dtype=[dtypes.int64, dtypes.int32])
+
+    return map_fn.map_fn(_unique, labels, dtype=[dtypes.int64, dtypes.int32])


 def _sum_states(idx, states):
  """Take logsumexp for each unique state out of all label states.

  Args:
-    idx: tensor of shape [batch, label_length]
-      For each sequence, indices into a set of unique labels as computed by
-      calling unique.
-    states: tensor of shape [frames, batch, label_length]
-      Log probabilities for each label state.
+    idx: tensor of shape [batch, label_length] For each sequence, indices into a
+      set of unique labels as computed by calling unique.
+    states: tensor of shape [frames, batch, label_length] Log probabilities for
+      each label state.

  Returns:
    tensor of shape [frames, batch_size, label_length], log probabilites summed
@ -934,7 +957,10 @@ def _sum_states(idx, states):
    num_states = _get_dim(states, 2)
    states = array_ops.expand_dims(states, axis=2)
    one_hot = array_ops.one_hot(
-        idx, depth=num_states, on_value=0.0, off_value=math_ops.log(0.0),
+        idx,
+        depth=num_states,
+        on_value=0.0,
+        off_value=math_ops.log(0.0),
        axis=1)
    return math_ops.reduce_logsumexp(states + one_hot, axis=-1)

@ -945,8 +971,8 @@ def _forward_backward_log(state_trans_log_probs, initial_state_log_probs,
  """Forward-backward algorithm computed in log domain.

  Args:
-    state_trans_log_probs: tensor of shape [states, states] or
-      if different transition matrix per batch [batch_size, states, states]
+    state_trans_log_probs: tensor of shape [states, states] or if different
+      transition matrix per batch [batch_size, states, states]
    initial_state_log_probs: tensor of shape [batch_size, states]
    final_state_log_probs: tensor of shape [batch_size, states]
    observed_log_probs: tensor of shape [frames, batch_size, states]
@ -982,8 +1008,8 @@ def _forward_backward_log(state_trans_log_probs, initial_state_log_probs,
    state_log_prob -= log_prob_sum
    return state_log_prob

-  fwd = _scan(_forward, observed_log_probs, initial_state_log_probs,
-              inclusive=True)
+  fwd = _scan(
+      _forward, observed_log_probs, initial_state_log_probs, inclusive=True)

  def _backward(accs, elems):
    """Calculate log probs and cumulative sum masked for sequence length."""
@ -1009,9 +1035,11 @@ def _forward_backward_log(state_trans_log_probs, initial_state_log_probs,
  mask = array_ops.sequence_mask(sequence_length, maxlen, dtypes.float32)
  mask = array_ops.transpose(mask, perm=[1, 0])

-  bwd, cum_log_sum = _scan(_backward, (observed_log_probs, mask),
-                           (final_state_log_probs, zero_log_sum),
-                           reverse=True, inclusive=True)
+  bwd, cum_log_sum = _scan(
+      _backward, (observed_log_probs, mask),
+      (final_state_log_probs, zero_log_sum),
+      reverse=True,
+      inclusive=True)

  fwd_bwd_log_probs = fwd[1:] + bwd[1:]
  fwd_bwd_log_probs_sum = math_ops.reduce_logsumexp(
@ -1045,9 +1073,9 @@ def _scan(fn, elems, initial, reverse=False, inclusive=False, final_only=False):
      scan(lambda a, e: a + (e[0] * e[1]), (elems1, elems2), 0.0)

  Args:
-    fn: callable, fn(accumulators, element) return new accumulator values.
-      The (possibly nested) sequence of accumulators is the same as `initial`
-      and the return value must have the same structure.
+    fn: callable, fn(accumulators, element) return new accumulator values. The
+      (possibly nested) sequence of accumulators is the same as `initial` and
+      the return value must have the same structure.
    elems: A (possibly nested) tensor which will be unpacked along the first
      dimension. The resulting slices will be the second argument to fn. The
      first dimension of all nested input tensors must be the same.
@ -1055,8 +1083,8 @@ def _scan(fn, elems, initial, reverse=False, inclusive=False, final_only=False):
      values for the accumulators.
    reverse: (optional) True enables scan and output elems in reverse order.
    inclusive: (optional) True includes the initial accumulator values in the
-      output. Length of output will be len(elem sequence) + 1. Not meaningful
-      if final_only is True.
+      output. Length of output will be len(elem sequence) + 1. Not meaningful if
+      final_only is True.
    final_only: (optional) When True, return only the final accumulated values,
      not the concatenation of accumulated values for each input.

@ -1102,13 +1130,16 @@ def _scan(fn, elems, initial, reverse=False, inclusive=False, final_only=False):
      new_out = []
    else:
      update_i = i + 1 if inclusive and not reverse else i
-      new_out = [inplace_ops.alias_inplace_update(x, update_i, y)
-                 for x, y in zip(out, flat_accum)]
+      new_out = [
+          inplace_ops.alias_inplace_update(x, update_i, y)
+          for x, y in zip(out, flat_accum)
+      ]
    i = i - 1 if reverse else i + 1
    return [i, num_elems] + new_out + flat_accum

-  init_i = (array_ops.shape(flat_elems[0])[0] - 1 if reverse
-            else constant_op.constant(0, dtype=dtypes.int32))
+  init_i = (
+      array_ops.shape(flat_elems[0])[0] -
+      1 if reverse else constant_op.constant(0, dtype=dtypes.int32))
  outputs = []
  if not final_only:
    num_outputs = array_ops.shape(flat_elems[0])[0] + (1 if inclusive else 0)
@ -1117,8 +1148,8 @@ def _scan(fn, elems, initial, reverse=False, inclusive=False, final_only=False):
          [[num_outputs], array_ops.shape(initial_accum)], 0)
      out = inplace_ops.empty(out_shape, dtype=initial_accum.dtype, init=True)
      if inclusive:
-        out = inplace_ops.alias_inplace_add(
-            out, init_i + (1 if reverse else 0), initial_accum)
+        out = inplace_ops.alias_inplace_add(out, init_i + (1 if reverse else 0),
+                                            initial_accum)
      outputs.append(out)
  loop_in = [init_i, num_elems] + outputs + flat_initial
  hostmem = [
--- a/tensorflow/python/ops/custom_gradient.py
+++ b/tensorflow/python/ops/custom_gradient.py
@ -89,7 +89,7 @@ def custom_gradient(f):

  ```python
  def log1pexp(x):
-    return tf.log(1 + tf.exp(x))
+    return tf.math.log(1 + tf.exp(x))
  ```

  Due to numerical instability, the gradient this function evaluated at x=100 is
@ -110,7 +110,7 @@ def custom_gradient(f):
    e = tf.exp(x)
    def grad(dy):
      return dy * (1 - 1 / (1 + e))
-    return tf.log(1 + e), grad
+    return tf.math.log(1 + e), grad
  ```

  With this definition, the gradient at x=100 will be correctly evaluated as
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@ -129,8 +129,8 @@ class QueueBase(object):
  handle single elements, versions that support enqueuing and
  dequeuing a batch of elements at once.

-  See `tf.FIFOQueue` and
-  `tf.RandomShuffleQueue` for concrete
+  See `tf.queue.FIFOQueue` and
+  `tf.queue.RandomShuffleQueue` for concrete
  implementations of this class, and instructions on how to create
  them.
  """
@ -625,7 +625,7 @@ def _shared_name(shared_name):
 class RandomShuffleQueue(QueueBase):
  """A queue implementation that dequeues elements in a random order.

-  See `tf.QueueBase` for a description of the methods on
+  See `tf.queue.QueueBase` for a description of the methods on
  this class.
  """

@ -674,7 +674,7 @@ class RandomShuffleQueue(QueueBase):
        with the same length as `dtypes`, or `None`.  If specified the dequeue
        methods return a dictionary with the names as keys.
      seed: A Python integer. Used to create a random seed. See
-        `tf.set_random_seed`
+        `tf.compat.v1.set_random_seed`
        for behavior.
      shared_name: (Optional.) If non-empty, this queue will be shared under
        the given name across multiple sessions.
@ -711,7 +711,7 @@ class RandomShuffleQueue(QueueBase):
 class FIFOQueue(QueueBase):
  """A queue implementation that dequeues elements in first-in first-out order.

-  See `tf.QueueBase` for a description of the methods on
+  See `tf.queue.QueueBase` for a description of the methods on
  this class.
  """

@ -774,7 +774,7 @@ class PaddingFIFOQueue(QueueBase):
  A `PaddingFIFOQueue` may contain components with dynamic shape, while also
  supporting `dequeue_many`.  See the constructor for more details.

-  See `tf.QueueBase` for a description of the methods on
+  See `tf.queue.QueueBase` for a description of the methods on
  this class.
  """

@ -847,7 +847,7 @@ class PaddingFIFOQueue(QueueBase):
 class PriorityQueue(QueueBase):
  """A queue implementation that dequeues elements in prioritized order.

-  See `tf.QueueBase` for a description of the methods on
+  See `tf.queue.QueueBase` for a description of the methods on
  this class.
  """

--- a/tensorflow/python/ops/distributions/bijector_test_util.py
+++ b/tensorflow/python/ops/distributions/bijector_test_util.py
@ -74,7 +74,7 @@ def assert_scalar_congruency(bijector,
      huge `n`.
    n:  Number of samples to draw for the checks.
    rtol:  Positive number.  Used for the Jacobian check.
-    sess:  `tf.Session`.  Defaults to the default session.
+    sess:  `tf.compat.v1.Session`.  Defaults to the default session.

  Raises:
    AssertionError:  If tests fail.
--- a/tensorflow/python/ops/distributions/gamma.py
+++ b/tensorflow/python/ops/distributions/gamma.py
@ -90,7 +90,7 @@ class Gamma(distribution.Distribution):
  the samples that are smaller than `np.finfo(dtype).tiny` are rounded
  to this value, so it appears more often than it should.
  This should only be noticeable when the `concentration` is very small, or the
-  `rate` is very large. See note in `tf.random_gamma` docstring.
+  `rate` is very large. See note in `tf.random.gamma` docstring.

  Samples of this distribution are reparameterized (pathwise differentiable).
  The derivatives are computed using the approach described in the paper
@ -213,7 +213,7 @@ class Gamma(distribution.Distribution):
    return tensor_shape.scalar()

  @distribution_util.AppendDocstring(
-      """Note: See `tf.random_gamma` docstring for sampling details and
+      """Note: See `tf.random.gamma` docstring for sampling details and
      caveats.""")
  def _sample_n(self, n, seed=None):
    return random_ops.random_gamma(
--- a/tensorflow/python/ops/distributions/transformed_distribution.py
+++ b/tensorflow/python/ops/distributions/transformed_distribution.py
@ -182,9 +182,9 @@ class TransformedDistribution(distribution_lib.Distribution):
    distribution=ds.Normal(loc=0., scale=1.),
    bijector=ds.bijectors.Inline(
      forward_fn=tf.exp,
-      inverse_fn=tf.log,
+      inverse_fn=tf.math.log,
      inverse_log_det_jacobian_fn=(
-        lambda y: -tf.reduce_sum(tf.log(y), axis=-1)),
+        lambda y: -tf.reduce_sum(tf.math.log(y), axis=-1)),
    name="LogNormalTransformedDistribution")
  ```

--- a/tensorflow/python/ops/distributions/util.py
+++ b/tensorflow/python/ops/distributions/util.py
@ -36,9 +36,12 @@ from tensorflow.python.ops import nn
 from tensorflow.python.util import tf_inspect


-def assert_integer_form(
-    x, data=None, summarize=None, message=None,
-    int_dtype=None, name="assert_integer_form"):
+def assert_integer_form(x,
+                        data=None,
+                        summarize=None,
+                        message=None,
+                        int_dtype=None,
+                        name="assert_integer_form"):
  """Assert that x has integer components (or floats equal to integers).

  Args:
@ -69,8 +72,12 @@ def assert_integer_form(
      except KeyError:
        raise TypeError("Unrecognized type {}".format(x.dtype.name))
    return check_ops.assert_equal(
-        x, math_ops.cast(math_ops.cast(x, int_dtype), x.dtype),
-        data=data, summarize=summarize, message=message, name=name)
+        x,
+        math_ops.cast(math_ops.cast(x, int_dtype), x.dtype),
+        data=data,
+        summarize=summarize,
+        message=message,
+        name=name)


 def assert_symmetric(matrix):
@ -91,8 +98,8 @@ def embed_check_nonnegative_integer_form(
    if not x.dtype.is_integer:
      assertions += [
          assert_integer_form(
-              x, message="'{}' cannot contain fractional components.".format(
-                  x)),
+              x,
+              message="'{}' cannot contain fractional components.".format(x)),
      ]
    return control_flow_ops.with_dependencies(assertions, x)

@ -114,16 +121,18 @@ def same_dynamic_shape(a, b):
  # static shape inference may break the equality comparison between
  # shape(a) and shape(b) in math_ops.equal.
  def all_shapes_equal():
-    return math_ops.reduce_all(math_ops.equal(
-        array_ops.concat([array_ops.shape(a), array_ops.shape(b)], 0),
-        array_ops.concat([array_ops.shape(b), array_ops.shape(a)], 0)))
+    return math_ops.reduce_all(
+        math_ops.equal(
+            array_ops.concat(
+                [array_ops.shape(a), array_ops.shape(b)], 0),
+            array_ops.concat(
+                [array_ops.shape(b), array_ops.shape(a)], 0)))

  # One of the shapes isn't fully defined, so we need to use the dynamic
  # shape.
  return control_flow_ops.cond(
      math_ops.equal(array_ops.rank(a), array_ops.rank(b)),
-      all_shapes_equal,
-      lambda: constant_op.constant(False))
+      all_shapes_equal, lambda: constant_op.constant(False))


 def maybe_get_static_value(x, dtype=None):
@ -162,13 +171,13 @@ def get_logits_and_probs(logits=None,
  Args:
    logits: Floating-point `Tensor` representing log-odds.
    probs: Floating-point `Tensor` representing probabilities.
-    multidimensional: Python `bool`, default `False`.
-      If `True`, represents whether the last dimension of `logits` or `probs`,
-      a `[N1, N2, ...  k]` dimensional tensor, representing the
-      logit or probability of `shape[-1]` classes.
-    validate_args: Python `bool`, default `False`. When `True`, either assert
-      `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension
-      of `probs` sums to one.
+    multidimensional: Python `bool`, default `False`. If `True`, represents
+      whether the last dimension of `logits` or `probs`, a `[N1, N2, ...  k]`
+      dimensional tensor, representing the logit or probability of `shape[-1]`
+      classes.
+    validate_args: Python `bool`, default `False`. When `True`, either assert `0
+      <= probs <= 1` (if not `multidimensional`) or that the last dimension of
+      `probs` sums to one.
    name: A name for this operation (optional).
    dtype: `tf.DType` to prefer when converting args to `Tensor`s.

@ -213,8 +222,10 @@ def get_logits_and_probs(logits=None,
                  message="probs does not sum to 1.")
          ]
        else:
-          dependencies += [check_ops.assert_less_equal(
-              probs, one, message="probs has components greater than 1.")]
+          dependencies += [
+              check_ops.assert_less_equal(
+                  probs, one, message="probs has components greater than 1.")
+          ]
        probs = control_flow_ops.with_dependencies(dependencies, probs)

    with ops.name_scope("logits"):
@ -288,8 +299,7 @@ def _is_integer_like_by_dtype(dt):


 def embed_check_categorical_event_shape(
-    categorical_param,
-    name="embed_check_categorical_event_shape"):
+    categorical_param, name="embed_check_categorical_event_shape"):
  """Embeds checks that categorical distributions don't have too many classes.

  A categorical-type distribution is one which, e.g., returns the class label
@ -341,8 +351,8 @@ def embed_check_categorical_event_shape(
    # For more details, see:
    # https://en.wikipedia.org/wiki/Floating-point_arithmetic#Internal_representation
    x_dtype = x.dtype.base_dtype
-    max_event_size = (_largest_integer_by_dtype(x_dtype)
-                      if x_dtype.is_floating else 0)
+    max_event_size = (
+        _largest_integer_by_dtype(x_dtype) if x_dtype.is_floating else 0)
    if max_event_size == 0:
      raise TypeError("Unable to validate size of unrecognized dtype "
                      "({}).".format(x_dtype.name))
@ -357,34 +367,36 @@ def embed_check_categorical_event_shape(
        raise ValueError("A categorical-distribution parameter must have at "
                         "least 2 events.")
      if event_size > max_event_size:
-        raise ValueError(
-            "Number of classes exceeds `dtype` precision, i.e., "
-            "{} implies shape ({}) cannot exceed {}.".format(
-                x_dtype.name, event_size, max_event_size))
+        raise ValueError("Number of classes exceeds `dtype` precision, i.e., "
+                         "{} implies shape ({}) cannot exceed {}.".format(
+                             x_dtype.name, event_size, max_event_size))
      return x
    else:
      event_size = array_ops.shape(x, name="x_shape")[-1]
      return control_flow_ops.with_dependencies([
          check_ops.assert_rank_at_least(
-              x, 1, message=("A categorical-distribution parameter must have "
-                             "at least 1 dimension.")),
+              x,
+              1,
+              message=("A categorical-distribution parameter must have "
+                       "at least 1 dimension.")),
          check_ops.assert_greater_equal(
-              array_ops.shape(x)[-1], 2,
+              array_ops.shape(x)[-1],
+              2,
              message=("A categorical-distribution parameter must have at "
                       "least 2 events.")),
          check_ops.assert_less_equal(
-              event_size, max_event_size,
+              event_size,
+              max_event_size,
              message="Number of classes exceeds `dtype` precision, "
-                      "i.e., {} dtype cannot exceed {} shape.".format(
-                          x_dtype.name, max_event_size)),
+              "i.e., {} dtype cannot exceed {} shape.".format(
+                  x_dtype.name, max_event_size)),
      ], x)


-def embed_check_integer_casting_closed(
-    x,
-    target_dtype,
-    assert_nonnegative=True,
-    name="embed_check_casting_closed"):
+def embed_check_integer_casting_closed(x,
+                                       target_dtype,
+                                       assert_nonnegative=True,
+                                       name="embed_check_casting_closed"):
  """Ensures integers remain unaffected despite casting to/from int/float types.

  Example integer-types: `uint8`, `int32`, `bool`.
@ -416,19 +428,18 @@ def embed_check_integer_casting_closed(

  with ops.name_scope(name, values=[x]):
    x = ops.convert_to_tensor(x, name="x")
-    if (not _is_integer_like_by_dtype(x.dtype)
-        and not x.dtype.is_floating):
+    if (not _is_integer_like_by_dtype(x.dtype) and not x.dtype.is_floating):
      raise TypeError("{}.dtype must be floating- or "
                      "integer-type.".format(x.dtype.name))
-    if (not _is_integer_like_by_dtype(target_dtype)
-        and not target_dtype.is_floating):
+    if (not _is_integer_like_by_dtype(target_dtype) and
+        not target_dtype.is_floating):
      raise TypeError("target_dtype ({}) must be floating- or "
                      "integer-type.".format(target_dtype.name))
-    if (not _is_integer_like_by_dtype(x.dtype)
-        and not _is_integer_like_by_dtype(target_dtype)):
+    if (not _is_integer_like_by_dtype(x.dtype) and
+        not _is_integer_like_by_dtype(target_dtype)):
      raise TypeError("At least one of {}.dtype ({}) and target_dtype ({}) "
-                      "must be integer-type.".format(
-                          x, x.dtype.name, target_dtype.name))
+                      "must be integer-type.".format(x, x.dtype.name,
+                                                     target_dtype.name))

    assertions = []
    if assert_nonnegative:
@ -442,26 +453,28 @@ def embed_check_integer_casting_closed(
      # Since this check implies the magnitude check below, we need only it.
      assertions += [
          assert_integer_form(
-              x, int_dtype=target_dtype,
+              x,
+              int_dtype=target_dtype,
              message="Elements must be {}-equivalent.".format(
                  target_dtype.name)),
      ]
    else:
-      if (_largest_integer_by_dtype(x.dtype)
-          > _largest_integer_by_dtype(target_dtype)):
+      if (_largest_integer_by_dtype(x.dtype) >
+          _largest_integer_by_dtype(target_dtype)):
        # Cast may lose integer precision.
        assertions += [
            check_ops.assert_less_equal(
-                x, _largest_integer_by_dtype(target_dtype),
+                x,
+                _largest_integer_by_dtype(target_dtype),
                message=("Elements cannot exceed {}.".format(
                    _largest_integer_by_dtype(target_dtype)))),
        ]
-      if (not assert_nonnegative and
-          (_smallest_integer_by_dtype(x.dtype)
-           < _smallest_integer_by_dtype(target_dtype))):
+      if (not assert_nonnegative and (_smallest_integer_by_dtype(
+          x.dtype) < _smallest_integer_by_dtype(target_dtype))):
        assertions += [
            check_ops.assert_greater_equal(
-                x, _smallest_integer_by_dtype(target_dtype),
+                x,
+                _smallest_integer_by_dtype(target_dtype),
                message=("Elements cannot be smaller than {}.".format(
                    _smallest_integer_by_dtype(target_dtype)))),
        ]
@ -547,11 +560,10 @@ def matrix_diag_transform(matrix, transform=None, name=None):
  Args:
    matrix:  Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are
      equal.
-    transform:  Element-wise function mapping `Tensors` to `Tensors`. To
-      be applied to the diagonal of `matrix`. If `None`, `matrix` is returned
+    transform:  Element-wise function mapping `Tensors` to `Tensors`. To be
+      applied to the diagonal of `matrix`. If `None`, `matrix` is returned
      unchanged. Defaults to `None`.
-    name:  A name to give created ops.
-      Defaults to "matrix_diag_transform".
+    name:  A name to give created ops. Defaults to "matrix_diag_transform".

  Returns:
    A `Tensor` with same shape and `dtype` as `matrix`.
@ -583,7 +595,7 @@ def rotate_transpose(x, shift, name="rotate_transpose"):
  Example:

  ```python
-  x = tf.random_normal([1, 2, 3, 4])  # Tensor of shape [1, 2, 3, 4].
+  x = tf.random.normal([1, 2, 3, 4])  # Tensor of shape [1, 2, 3, 4].
  rotate_transpose(x, -1).shape == [2, 3, 4, 1]
  rotate_transpose(x, -2).shape == [3, 4, 1, 2]
  rotate_transpose(x,  1).shape == [4, 1, 2, 3]
@ -612,10 +624,12 @@ def rotate_transpose(x, shift, name="rotate_transpose"):
    shift_value_static = tensor_util.constant_value(shift)
    ndims = x.get_shape().ndims
    if ndims is not None and shift_value_static is not None:
-      if ndims < 2: return x
+      if ndims < 2:
+        return x
      shift_value_static = np.sign(shift_value_static) * (
          abs(shift_value_static) % ndims)
-      if shift_value_static == 0: return x
+      if shift_value_static == 0:
+        return x
      perm = np.roll(np.arange(ndims), shift_value_static)
      return array_ops.transpose(x, perm=perm)
    else:
@ -633,19 +647,16 @@ def rotate_transpose(x, shift, name="rotate_transpose"):
      # Finally, we transform shift by modulo length so it can be specified
      # independently from the array upon which it operates (like python).
      ndims = array_ops.rank(x)
-      shift = array_ops.where(math_ops.less(shift, 0),
-                              math_ops.mod(-shift, ndims),
-                              ndims - math_ops.mod(shift, ndims))
+      shift = array_ops.where(
+          math_ops.less(shift, 0), math_ops.mod(-shift, ndims),
+          ndims - math_ops.mod(shift, ndims))
      first = math_ops.range(0, shift)
      last = math_ops.range(shift, ndims)
      perm = array_ops.concat([last, first], 0)
      return array_ops.transpose(x, perm=perm)


-def pick_vector(cond,
-                true_vector,
-                false_vector,
-                name="pick_vector"):
+def pick_vector(cond, true_vector, false_vector, name="pick_vector"):
  """Picks possibly different length row `Tensor`s based on condition.

  Value `Tensor`s should have exactly one dimension.
@ -659,13 +670,9 @@ def pick_vector(cond,
    true_vector: `Tensor` of one dimension. Returned when cond is `True`.
    false_vector: `Tensor` of one dimension. Returned when cond is `False`.
    name: Python `str`. The name to give this op.
-
-  Example:
-
-  ```python
-  pick_vector(tf.less(0, 5), tf.range(10, 12), tf.range(15, 18))  # [10, 11]
-  pick_vector(tf.less(5, 0), tf.range(10, 12), tf.range(15, 18))  # [15, 16, 17]
-  ```
+  Example:  ```python pick_vector(tf.less(0, 5), tf.range(10, 12), tf.range(15,
+    18))  # [10, 11] pick_vector(tf.less(5, 0), tf.range(10, 12), tf.range(15,
+    18))  # [15, 16, 17] ```

  Returns:
    true_or_false_vector: `Tensor`.
@ -687,17 +694,17 @@ def pick_vector(cond,
    false_vector = ops.convert_to_tensor(false_vector, name="false_vector")
    if true_vector.dtype != false_vector.dtype:
      raise TypeError(
-          "%s.dtype=%s does not match %s.dtype=%s"
-          % (true_vector, true_vector.dtype,
-             false_vector, false_vector.dtype))
+          "%s.dtype=%s does not match %s.dtype=%s" %
+          (true_vector, true_vector.dtype, false_vector, false_vector.dtype))
    n = array_ops.shape(true_vector)[0]
    return array_ops.slice(
        array_ops.concat([true_vector, false_vector], 0),
        [array_ops.where(cond, 0, n)], [array_ops.where(cond, n, -1)])


-def prefer_static_broadcast_shape(
-    shape1, shape2, name="prefer_static_broadcast_shape"):
+def prefer_static_broadcast_shape(shape1,
+                                  shape2,
+                                  name="prefer_static_broadcast_shape"):
  """Convenience function which statically broadcasts shape when possible.

  Args:
@ -710,6 +717,7 @@ def prefer_static_broadcast_shape(
      statically), or as a `Tensor`.
  """
  with ops.name_scope(name, values=[shape1, shape2]):
+
    def make_shape_tensor(x):
      return ops.convert_to_tensor(x, name="shape", dtype=dtypes.int32)

@ -892,14 +900,11 @@ def fill_triangular(x, upper=False, name=None):
    else:
      x_list = [x[..., n:], array_ops.reverse(x, axis=[ndims - 1])]
    new_shape = (
-        static_final_shape.as_list()
-        if static_final_shape.is_fully_defined()
+        static_final_shape.as_list() if static_final_shape.is_fully_defined()
        else array_ops.concat([array_ops.shape(x)[:-1], [n, n]], axis=0))
    x = array_ops.reshape(array_ops.concat(x_list, axis=-1), new_shape)
    x = array_ops.matrix_band_part(
-        x,
-        num_lower=(0 if upper else -1),
-        num_upper=(-1 if upper else 0))
+        x, num_lower=(0 if upper else -1), num_upper=(-1 if upper else 0))
    x.set_shape(static_final_shape)
    return x

@ -1041,17 +1046,17 @@ def tridiag(below=None, diag=None, above=None, name=None):
    return _add(below, diag, above)


-def reduce_weighted_logsumexp(
-    logx,
-    w=None,
-    axis=None,
-    keep_dims=False,
-    return_sign=False,
-    name=None):
+def reduce_weighted_logsumexp(logx,
+                              w=None,
+                              axis=None,
+                              keep_dims=False,
+                              return_sign=False,
+                              name=None):
  """Computes `log(abs(sum(weight * exp(elements across tensor dimensions))))`.

  If all weights `w` are known to be positive, it is more efficient to directly
-  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.log(w))` is more
+  use `reduce_logsumexp`, i.e., `tf.reduce_logsumexp(logx + tf.math.log(w))` is
+  more
  efficient than `du.reduce_weighted_logsumexp(logx, w)`.

  Reduces `input_tensor` along the dimensions given in `axis`.
@ -1094,9 +1099,9 @@ def reduce_weighted_logsumexp(
  Args:
    logx: The tensor to reduce. Should have numeric type.
    w: The weight tensor. Should have numeric type identical to `logx`.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
    keep_dims: If true, retains reduced dimensions with length 1.
    return_sign: If `True`, returns the sign of the result.
    name: A name for the operation (optional).
@ -1121,8 +1126,7 @@ def reduce_weighted_logsumexp(
    # this is ok follows from the fact that we're actually free to subtract any
    # value we like, so long as we add it back after taking the `log(sum(...))`.
    max_log_absw_x = array_ops.where(
-        math_ops.is_inf(max_log_absw_x),
-        array_ops.zeros_like(max_log_absw_x),
+        math_ops.is_inf(max_log_absw_x), array_ops.zeros_like(max_log_absw_x),
        max_log_absw_x)
    wx_over_max_absw_x = (
        math_ops.sign(w) * math_ops.exp(log_absw_x - max_log_absw_x))
@ -1187,8 +1191,9 @@ def softplus_inverse(x, name=None):
    too_large_value = x
    # This `where` will ultimately be a NOP because we won't select this
    # codepath whenever we used the surrogate `ones_like`.
-    x = array_ops.where(math_ops.logical_or(is_too_small, is_too_large),
-                        array_ops.ones_like(x), x)
+    x = array_ops.where(
+        math_ops.logical_or(is_too_small, is_too_large), array_ops.ones_like(x),
+        x)
    y = x + math_ops.log(-math_ops.expm1(-x))  # == log(expm1(x))
    return array_ops.where(is_too_small, too_small_value,
                           array_ops.where(is_too_large, too_large_value, y))
@ -1206,15 +1211,17 @@ def dimension_size(x, axis):
  return array_ops.shape(x)[axis]


-def process_quadrature_grid_and_probs(
-    quadrature_grid_and_probs, dtype, validate_args, name=None):
+def process_quadrature_grid_and_probs(quadrature_grid_and_probs,
+                                      dtype,
+                                      validate_args,
+                                      name=None):
  """Validates quadrature grid, probs or computes them as necessary.

  Args:
    quadrature_grid_and_probs: Python pair of `float`-like `Tensor`s
      representing the sample points and the corresponding (possibly
      normalized) weight.  When `None`, defaults to:
-      `np.polynomial.hermite.hermgauss(deg=8)`.
+        `np.polynomial.hermite.hermgauss(deg=8)`.
    dtype: The expected `dtype` of `grid` and `probs`.
    validate_args: Python `bool`, default `False`. When `True` distribution
      parameters are checked for validity despite possibly degrading runtime
@ -1244,8 +1251,7 @@ def process_quadrature_grid_and_probs(

    grid, probs = tuple(quadrature_grid_and_probs)
    grid = ops.convert_to_tensor(grid, name="grid", dtype=dtype)
-    probs = ops.convert_to_tensor(probs, name="unnormalized_probs",
-                                  dtype=dtype)
+    probs = ops.convert_to_tensor(probs, name="unnormalized_probs", dtype=dtype)
    probs /= linalg_ops.norm(probs, ord=1, axis=-1, keepdims=True, name="probs")

    def _static_event_size(x):
@ -1281,13 +1287,13 @@ def pad(x, axis, front=False, back=False, value=0, count=1, name=None):
      (Negative indexing is supported.)
    front: Python `bool`; if `True` the beginning of the `axis` dimension is
      padded with `value`, `count` times. If `False` no front padding is made.
-    back: Python `bool`; if `True` the end of the `axis` dimension is
-      padded with `value`, `count` times. If `False` no end padding is made.
+    back: Python `bool`; if `True` the end of the `axis` dimension is padded
+      with `value`, `count` times. If `False` no end padding is made.
    value: Scalar `int`-like `Tensor` representing the actual value added to the
      front and/or back of the `axis` dimension of `x`.
    count: Scalar `int`-like `Tensor` representing number of elements added to
-      the front and/or back of the `axis` dimension of `x`. E.g., if
-      `front = back = True` then `2 * count` elements are added.
+      the front and/or back of the `axis` dimension of `x`. E.g., if `front =
+      back = True` then `2 * count` elements are added.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
@ -1306,8 +1312,9 @@ def pad(x, axis, front=False, back=False, value=0, count=1, name=None):
          count.dtype.name))
    if not front and not back:
      raise ValueError("At least one of `front`, `back` must be `True`.")
-    ndims = (x.shape.ndims if x.shape.ndims is not None
-             else array_ops.rank(x, name="ndims"))
+    ndims = (
+        x.shape.ndims if x.shape.ndims is not None else array_ops.rank(
+            x, name="ndims"))
    axis = ops.convert_to_tensor(axis, name="axis")
    axis_ = tensor_util.constant_value(axis)
    if axis_ is not None:
@ -1317,11 +1324,10 @@ def pad(x, axis, front=False, back=False, value=0, count=1, name=None):
      count_ = tensor_util.constant_value(count)
      if axis_ >= 0 or x.shape.ndims is not None:
        head = x.shape[:axis]
-        middle = tensor_shape.TensorShape(
-            None if count_ is None
-            else (tensor_shape.dimension_at_index(
-                x.shape, axis) + count_ * (front + back)))
-        tail = x.shape[axis+1:]
+        middle = tensor_shape.TensorShape(None if count_ is None else (
+            tensor_shape.dimension_at_index(x.shape, axis) + count_ *
+            (front + back)))
+        tail = x.shape[axis + 1:]
        final_shape = head.concatenate(middle.concatenate(tail))
      else:
        final_shape = None
@ -1331,8 +1337,8 @@ def pad(x, axis, front=False, back=False, value=0, count=1, name=None):
    x = array_ops.pad(
        x,
        paddings=array_ops.one_hot(
-            indices=array_ops.stack([axis if front else -1,
-                                     axis if back else -1]),
+            indices=array_ops.stack(
+                [axis if front else -1, axis if back else -1]),
            depth=ndims,
            axis=0,
            on_value=count,
@ -1407,8 +1413,8 @@ class AppendDocstring(object):
    Args:
      additional_note: Python string added as additional docstring to public
        version of function.
-      kwargs_dict: Python string/string dictionary representing
-        specific kwargs expanded from the **kwargs input.
+      kwargs_dict: Python string/string dictionary representing specific kwargs
+        expanded from the **kwargs input.

    Raises:
      ValueError: if kwargs_dict.key contains whitespace.
@ -1420,20 +1426,20 @@ class AppendDocstring(object):
      for key in sorted(kwargs_dict.keys()):
        value = kwargs_dict[key]
        if any(x.isspace() for x in key):
-          raise ValueError(
-              "Parameter name \"%s\" contains whitespace." % key)
+          raise ValueError("Parameter name \"%s\" contains whitespace." % key)
        value = value.lstrip()
        if "\n" in value:
          raise ValueError(
              "Parameter description for \"%s\" contains newlines." % key)
        bullets.append("*  `%s`: %s" % (key, value))
-      self._additional_note += ("\n\n##### `kwargs`:\n\n" +
-                                "\n".join(bullets))
+      self._additional_note += ("\n\n##### `kwargs`:\n\n" + "\n".join(bullets))

  def __call__(self, fn):
+
    @functools.wraps(fn)
    def _fn(*args, **kwargs):
      return fn(*args, **kwargs)
+
    if _fn.__doc__ is None:
      _fn.__doc__ = self._additional_note
    else:
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@ -45,8 +45,8 @@ def _clip(params, ids, max_norm):
  Args:
    params: A `Tensor` of embeddings retrieved by `gather`.
    ids: The `ids` argument that was passed to `gather`.
-    max_norm: If not `None`, each embedding is clipped if its l2-norm is
-      larger than this value.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
+      than this value.

  Returns:
    A `Tensor` with the same type as `params`.
@ -76,8 +76,7 @@ def _clip(params, ids, max_norm):
  return clip_ops.clip_by_norm(
      params,
      max_norm,
-      axes=(list(range(ids_rank, params_rank))
-            if ids_static and params_static
+      axes=(list(range(ids_rank, params_rank)) if ids_static and params_static
            else math_ops.range(ids_rank, params_rank)))


@ -105,8 +104,8 @@ def _embedding_lookup_and_transform(params,
    partition_strategy: See embedding_lookup.
    name: See embedding_lookup.
    max_norm: See embedding_lookup.
-    transform_fn: An optional function to apply to each retrieved embedding.
-      If max_norm is provided, transform_fn is applied to the norm-limited
+    transform_fn: An optional function to apply to each retrieved embedding. If
+      max_norm is provided, transform_fn is applied to the norm-limited
      embeddings.

  Returns:
@ -130,8 +129,8 @@ def _embedding_lookup_and_transform(params,
    ids = ops.convert_to_tensor(ids, name="ids")
    if np == 1 and (not transform_fn or ids.get_shape().ndims == 1):
      with ops.colocate_with(params[0]):
-        result = _clip(array_ops.gather(params[0], ids, name=name),
-                       ids, max_norm)
+        result = _clip(
+            array_ops.gather(params[0], ids, name=name), ids, max_norm)
        if transform_fn:
          result = transform_fn(result)
      # Make sure the final result does not have colocation contraints on the
@ -155,11 +154,11 @@ def _embedding_lookup_and_transform(params,
        # Compute num_total_ids as the sum of dim-0 of params, then assign to
        # partitions based on a constant number of ids per partition. Optimize
        # if we already know the full shape statically.
-        dim_0_size = tensor_shape.Dimension(tensor_shape.dimension_value(
-            params[0].get_shape()[0]))
+        dim_0_size = tensor_shape.Dimension(
+            tensor_shape.dimension_value(params[0].get_shape()[0]))
        for p in xrange(1, np):
-          dim_0_size += tensor_shape.Dimension(tensor_shape.dimension_value(
-              params[p].get_shape()[0]))
+          dim_0_size += tensor_shape.Dimension(
+              tensor_shape.dimension_value(params[p].get_shape()[0]))
        if dim_0_size.value:
          num_total_ids = constant_op.constant(dim_0_size.value, flat_ids.dtype)
        else:
@ -176,9 +175,9 @@ def _embedding_lookup_and_transform(params,
        ids_per_partition = num_total_ids // np
        extras = num_total_ids % np

-        p_assignments = math_ops.maximum(
-            flat_ids // (ids_per_partition + 1),
-            (flat_ids - extras) // ids_per_partition)
+        p_assignments = math_ops.maximum(flat_ids // (ids_per_partition + 1),
+                                         (flat_ids - extras) //
+                                         ids_per_partition)

        # Emulate a conditional using a boolean indicator tensor
        new_ids = array_ops.where(p_assignments < extras,
@ -233,9 +232,8 @@ def _embedding_lookup_and_transform(params,
        element_shape_d = array_ops.shape(ret)[1:]

      # Reshape to reverse the flattening of ids.
-      ret = array_ops.reshape(ret,
-                              array_ops.concat(
-                                  [array_ops.shape(ids), element_shape_d], 0))
+      ret = array_ops.reshape(
+          ret, array_ops.concat([array_ops.shape(ids), element_shape_d], 0))

      # Normally the reshape is sufficient, but setting shape explicitly
      # teaches shape inference that params[1:].get_shape() matters
@ -261,7 +259,8 @@ def embedding_lookup(
  tensors in `params`.  It is a generalization of
  `tf.gather`, where `params` is
  interpreted as a partitioning of a large embedding tensor.  `params` may be
-  a `PartitionedVariable` as returned by using `tf.get_variable()` with a
+  a `PartitionedVariable` as returned by using `tf.compat.v1.get_variable()`
+  with a
  partitioner.

  If `len(params) > 1`, each element `id` of `ids` is partitioned between
@ -283,8 +282,8 @@ def embedding_lookup(
  tensor. The returned tensor has shape `shape(ids) + shape(params)[1:]`.

  Args:
-    params: A single tensor representing the complete embedding tensor,
-      or a list of P tensors all of same shape except for the first dimension,
+    params: A single tensor representing the complete embedding tensor, or a
+      list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
@ -298,8 +297,8 @@ def embedding_lookup(
      in `indices` are always validated to be within range.  If assigned to GPU,
      out-of-bound indices result in safe but unspecified behavior, which may
      include raising an error.
-    max_norm: If not `None`, each embedding is clipped if its l2-norm is
-      larger than this value.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
+      than this value.

  Returns:
    A `Tensor` with the same type as the tensors in `params`.
@ -317,18 +316,15 @@ def embedding_lookup(


@tf_export("nn.embedding_lookup", v1=[])
-def embedding_lookup_v2(
-    params,
-    ids,
-    max_norm=None,
-    name=None):
+def embedding_lookup_v2(params, ids, max_norm=None, name=None):
  """Looks up `ids` in a list of embedding tensors.

  This function is used to perform parallel lookups on the list of
  tensors in `params`.  It is a generalization of
  `tf.gather`, where `params` is
  interpreted as a partitioning of a large embedding tensor.  `params` may be
-  a `PartitionedVariable` as returned by using `tf.get_variable()` with a
+  a `PartitionedVariable` as returned by using `tf.compat.v1.get_variable()`
+  with a
  partitioner.

  If `len(params) > 1`, each element `id` of `ids` is partitioned between
@ -346,15 +342,15 @@ def embedding_lookup_v2(
  tensor. The returned tensor has shape `shape(ids) + shape(params)[1:]`.

  Args:
-    params: A single tensor representing the complete embedding tensor,
-      or a list of P tensors all of same shape except for the first dimension,
+    params: A single tensor representing the complete embedding tensor, or a
+      list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the 'div' `partition_strategy`.
    ids: A `Tensor` with type `int32` or `int64` containing the ids to be looked
      up in `params`.
-    max_norm: If not `None`, each embedding is clipped if its l2-norm is
-      larger than this value.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
+      than this value.
    name: A name for the operation (optional).

  Returns:
@ -363,8 +359,7 @@ def embedding_lookup_v2(
  Raises:
    ValueError: If `params` is empty.
  """
-  return embedding_lookup(params, ids, "div", name,
-                          max_norm=max_norm)
+  return embedding_lookup(params, ids, "div", name, max_norm=max_norm)


@tf_export(v1=["nn.embedding_lookup_sparse"])
@ -385,8 +380,8 @@ def embedding_lookup_sparse(params,
  is the sum of the size of params along dimension 0.

  Args:
-    params: A single tensor representing the complete embedding tensor,
-      or a list of P tensors all of same shape except for the first dimension,
+    params: A single tensor representing the complete embedding tensor, or a
+      list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
@ -400,13 +395,12 @@ def embedding_lookup_sparse(params,
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
-      and "sum" are supported.
-      "sum" computes the weighted sum of the embedding results for each row.
-      "mean" is the weighted sum divided by the total weight.
-      "sqrtn" is the weighted sum divided by the square root of the sum of the
-      squares of the weights.
-    max_norm: If not `None`, each embedding is clipped if its l2-norm is
-      larger than this value, before combining.
+      and "sum" are supported. "sum" computes the weighted sum of the embedding
+      results for each row. "mean" is the weighted sum divided by the total
+      weight. "sqrtn" is the weighted sum divided by the square root of the sum
+      of the squares of the weights.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
+      than this value, before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
@ -559,8 +553,8 @@ def embedding_lookup_sparse_v2(params,
  is the sum of the size of params along dimension 0.

  Args:
-    params: A single tensor representing the complete embedding tensor,
-      or a list of P tensors all of same shape except for the first dimension,
+    params: A single tensor representing the complete embedding tensor, or a
+      list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for ``"div"`` `partition_strategy`.
@ -570,13 +564,12 @@ def embedding_lookup_sparse_v2(params,
      indicate all weights should be taken to be 1. If specified, `sp_weights`
      must have exactly the same shape and indices as `sp_ids`.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
-      and "sum" are supported.
-      "sum" computes the weighted sum of the embedding results for each row.
-      "mean" is the weighted sum divided by the total weight.
-      "sqrtn" is the weighted sum divided by the square root of the sum of the
-      squares of the weights.
-    max_norm: If not `None`, each embedding is clipped if its l2-norm is
-      larger than this value, before combining.
+      and "sum" are supported. "sum" computes the weighted sum of the embedding
+      results for each row. "mean" is the weighted sum divided by the total
+      weight. "sqrtn" is the weighted sum divided by the square root of the sum
+      of the squares of the weights.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is larger
+      than this value, before combining.
    name: Optional name for the op.

  Returns:
@ -619,8 +612,8 @@ def embedding_lookup_sparse_v2(params,
      neither `None` nor `SparseTensor`.
    ValueError: If `combiner` is not one of {"mean", "sqrtn", "sum"}.
  """
-  return embedding_lookup_sparse(
-      params, sp_ids, sp_weights, "div", name, combiner, max_norm)
+  return embedding_lookup_sparse(params, sp_ids, sp_weights, "div", name,
+                                 combiner, max_norm)


@tf_export("nn.safe_embedding_lookup_sparse", v1=[])
@ -636,7 +629,8 @@ def safe_embedding_lookup_sparse_v2(embedding_weights,
  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
-  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
+  may be a `PartitionedVariable` as returned by using
+  `tf.compat.v1.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
@ -690,17 +684,18 @@ def safe_embedding_lookup_sparse_v2(embedding_weights,
 def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
-                                 combiner='mean',
+                                 combiner="mean",
                                 default_id=None,
                                 name=None,
-                                 partition_strategy='div',
+                                 partition_strategy="div",
                                 max_norm=None):
  """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
-  may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a
+  may be a `PartitionedVariable` as returned by using
+  `tf.compat.v1.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
@ -712,25 +707,24 @@ def safe_embedding_lookup_sparse(embedding_weights,

  Args:
    embedding_weights:  A list of `P` float `Tensor`s or values representing
-        partitioned embedding `Tensor`s.  Alternatively, a `PartitionedVariable`
-        created by partitioning along dimension 0.  The total unpartitioned
-        shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the
-        vocab size and `e_1, ..., e_m` are the embedding dimensions.
+      partitioned embedding `Tensor`s.  Alternatively, a `PartitionedVariable`
+      created by partitioning along dimension 0.  The total unpartitioned shape
+      should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size
+      and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
-        ids. `d_0` is typically batch size.
+      ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
-        float weights corresponding to `sparse_ids`, or `None` if all weights
-        are be assumed to be 1.0.
+      float weights corresponding to `sparse_ids`, or `None` if all weights are
+      be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
-        entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean"
-        the default.
+      entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the
+      default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
-    partition_strategy: A string specifying the partitioning strategy.
-        Currently `"div"` and `"mod"` are supported. Default is `"div"`.
+    partition_strategy: A string specifying the partitioning strategy. Currently
+      `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not `None`, all embeddings are l2-normalized to max_norm before
-        combining.
-
+      combining.

  Returns:
    Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.
@ -739,13 +733,13 @@ def safe_embedding_lookup_sparse(embedding_weights,
    ValueError: if `embedding_weights` is empty.
  """
  if embedding_weights is None:
-    raise ValueError('Missing embedding_weights %s.' % embedding_weights)
+    raise ValueError("Missing embedding_weights %s." % embedding_weights)
  if isinstance(embedding_weights, variables.PartitionedVariable):
    embedding_weights = list(embedding_weights)  # get underlying Variables.
  if not isinstance(embedding_weights, list):
    embedding_weights = [embedding_weights]
  if len(embedding_weights) < 1:
-    raise ValueError('Missing embedding_weights %s.' % embedding_weights)
+    raise ValueError("Missing embedding_weights %s." % embedding_weights)

  dtype = sparse_weights.dtype if sparse_weights is not None else None
  embedding_weights = [
@ -755,36 +749,34 @@ def safe_embedding_lookup_sparse(embedding_weights,
      for w in embedding_weights
  ]

-  with ops.name_scope(name, 'embedding_lookup',
-                      embedding_weights + [sparse_ids,
-                                           sparse_weights]) as scope:
+  with ops.name_scope(name, "embedding_lookup", embedding_weights +
+                      [sparse_ids, sparse_weights]) as scope:
    # Reshape higher-rank sparse ids and weights to linear segment ids.
    original_shape = sparse_ids.dense_shape
    original_rank_dim = tensor_shape.dimension_value(
        sparse_ids.dense_shape.get_shape()[0])
    original_rank = (
        array_ops.size(original_shape)
-        if original_rank_dim is None
-        else original_rank_dim)
+        if original_rank_dim is None else original_rank_dim)
    sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
        math_ops.reduce_prod(
            array_ops.slice(original_shape, [0], [original_rank - 1])),
-        array_ops.gather(original_shape, original_rank - 1)])
+        array_ops.gather(original_shape, original_rank - 1)
+    ])
    if sparse_weights is not None:
-      sparse_weights = sparse_tensor.SparseTensor(
-          sparse_ids.indices,
-          sparse_weights.values, sparse_ids.dense_shape)
+      sparse_weights = sparse_tensor.SparseTensor(sparse_ids.indices,
+                                                  sparse_weights.values,
+                                                  sparse_ids.dense_shape)

    # Prune invalid ids and weights.
    sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights)
-    if combiner != 'sum':
+    if combiner != "sum":
      sparse_ids, sparse_weights = _prune_invalid_weights(
          sparse_ids, sparse_weights)

    # Fill in dummy values for empty features, if necessary.
-    sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids,
-                                                                 default_id or
-                                                                 0)
+    sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
+        sparse_ids, default_id or 0)
    if sparse_weights is not None:
      sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0)

@ -804,10 +796,8 @@ def safe_embedding_lookup_sparse(embedding_weights,
          array_ops.reshape(is_row_empty, [-1, 1]),
          array_ops.stack([1, array_ops.shape(result)[1]]))

-      result = array_ops.where(is_row_empty,
-                               array_ops.zeros_like(result),
-                               result,
-                               name=scope)
+      result = array_ops.where(
+          is_row_empty, array_ops.zeros_like(result), result, name=scope)

    # Reshape back from linear ids back into higher-dimensional dense result.
    final_result = array_ops.reshape(
@ -818,9 +808,10 @@ def safe_embedding_lookup_sparse(embedding_weights,
                [original_rank - 1]),
            array_ops.slice(array_ops.shape(result), [1], [-1])
        ], 0))
-    final_result.set_shape(tensor_shape.unknown_shape(
-        (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(
-            result.get_shape()[1:]))
+    final_result.set_shape(
+        tensor_shape.unknown_shape(
+            (tensor_shape.Dimension(original_rank_dim) - 1).value).concatenate(
+                result.get_shape()[1:]))
    return final_result


--- a/tensorflow/python/ops/gradient_checker_test.py
+++ b/tensorflow/python/ops/gradient_checker_test.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tf.test.compute_gradient and tf.compute_gradient_error."""
+"""Tests for tf.compat.v1.test.compute_gradient and tf.compute_gradient_error."""

 from __future__ import absolute_import
 from __future__ import division
--- a/tensorflow/python/ops/histogram_ops.py
+++ b/tensorflow/python/ops/histogram_ops.py
@ -63,7 +63,7 @@ def histogram_fixed_width_bins(values,
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

-  with tf.get_default_session() as sess:
+  with tf.compat.v1.get_default_session() as sess:
    indices = tf.histogram_fixed_width_bins(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(indices) => [0, 0, 1, 2, 4]
@ -127,7 +127,7 @@ def histogram_fixed_width(values,
  value_range = [0.0, 5.0]
  new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]

-  with tf.get_default_session() as sess:
+  with tf.compat.v1.get_default_session() as sess:
    hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
    variables.global_variables_initializer().run()
    sess.run(hist) => [2, 1, 1, 0, 2]
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@ -124,8 +124,8 @@ def _Check3DImage(image, require_static=True):

  Args:
    image: 3-D Tensor of shape [height, width, channels]
-    require_static: If `True`, requires that all dimensions of `image` are
-      known and non-zero.
+    require_static: If `True`, requires that all dimensions of `image` are known
+      and non-zero.

  Raises:
    ValueError: if `image.shape` is not a 3-vector.
@ -137,8 +137,8 @@ def _Check3DImage(image, require_static=True):
  try:
    image_shape = image.get_shape().with_rank(3)
  except ValueError:
-    raise ValueError(
-        "'image' (shape %s) must be three-dimensional." % image.shape)
+    raise ValueError("'image' (shape %s) must be three-dimensional." %
+                     image.shape)
  if require_static and not image_shape.is_fully_defined():
    raise ValueError("'image' (shape %s) must be fully defined." % image_shape)
  if any(x == 0 for x in image_shape):
@ -203,8 +203,8 @@ def _CheckAtLeast3DImage(image, require_static=True):

  Args:
    image: >= 3-D Tensor of size [*, height, width, depth]
-    require_static: If `True`, requires that all dimensions of `image` are
-      known and non-zero.
+    require_static: If `True`, requires that all dimensions of `image` are known
+      and non-zero.

  Raises:
    ValueError: if image.shape is not a [>= 3] vector.
@ -223,8 +223,8 @@ def _CheckAtLeast3DImage(image, require_static=True):
  if require_static and not image_shape.is_fully_defined():
    raise ValueError('\'image\' must be fully defined.')
  if any(x == 0 for x in image_shape):
-    raise ValueError(
-        'all dims of \'image.shape\' must be > 0: %s' % image_shape)
+    raise ValueError('all dims of \'image.shape\' must be > 0: %s' %
+                     image_shape)
  if not image_shape.is_fully_defined():
    return [
        check_ops.assert_positive(
@ -263,11 +263,10 @@ def random_flip_up_down(image, seed=None):
  dimension, which is `height`.  Otherwise output the image as-is.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    seed: A Python integer. Used to create a random seed. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.

  Returns:
    A tensor of the same type and shape as `image`.
@ -285,11 +284,10 @@ def random_flip_left_right(image, seed=None):
  second dimension, which is `width`.  Otherwise output the image as-is.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    seed: A Python integer. Used to create a random seed. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.

  Returns:
    A tensor of the same type and shape as `image`.
@ -304,12 +302,11 @@ def _random_flip(image, flip_index, seed, scope_name):
  """Randomly (50% chance) flip an image along axis `flip_index`.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    flip_index: Dimension along which to flip image. Vertical: 0, Horizontal: 1
    seed: A Python integer. Used to create a random seed. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    scope_name: Name of the scope in which the ops are added.

  Returns:
@ -329,17 +326,16 @@ def _random_flip(image, flip_index, seed, scope_name):
          mirror_cond,
          lambda: array_ops.reverse(image, [flip_index]),
          lambda: image,
-          name=scope
-      )
+          name=scope)
      return fix_image_flip_shape(image, result)
    elif shape.ndims == 4:
      batch_size = array_ops.shape(image)[0]
-      uniform_random = random_ops.random_uniform(
-          [batch_size], 0, 1.0, seed=seed
-      )
+      uniform_random = random_ops.random_uniform([batch_size],
+                                                 0,
+                                                 1.0,
+                                                 seed=seed)
      flips = math_ops.round(
-          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])
-      )
+          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1]))
      flips = math_ops.cast(flips, image.dtype)
      flipped_input = array_ops.reverse(image, [flip_index + 1])
      return flips * flipped_input + (1 - flips) * image
@ -356,8 +352,8 @@ def flip_left_right(image):
  See also `reverse()`.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.

  Returns:
    A tensor of the same type and shape as `image`.
@ -377,8 +373,8 @@ def flip_up_down(image):
  See also `reverse()`.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.

  Returns:
    A tensor of the same type and shape as `image`.
@ -397,8 +393,8 @@ def _flip(image, flip_index, scope_name):
  See also `reverse()`.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    flip_index: 0 For vertical, 1 for horizontal.

  Returns:
@ -414,7 +410,7 @@ def _flip(image, flip_index, scope_name):
    if shape.ndims == 3 or shape.ndims is None:
      return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))
    elif shape.ndims == 4:
-      return array_ops.reverse(image, [flip_index+1])
+      return array_ops.reverse(image, [flip_index + 1])
    else:
      raise ValueError('\'image\' must have either 3 or 4 dimensions.')

@ -424,8 +420,8 @@ def rot90(image, k=1, name=None):
  """Rotate image(s) counter-clockwise by 90 degrees.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    k: A scalar integer. The number of times the image is rotated by 90 degrees.
    name: A name for this operation (optional).

@ -501,6 +497,7 @@ def _rot90_4D(images, k, name_scope):

  def _rot180():
    return array_ops.reverse_v2(images, [1, 2])
+
  def _rot270():
    return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])

@ -519,8 +516,8 @@ def transpose(image, name=None):
  """Transpose image(s) by swapping the height and width dimension.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    name: A name for this operation (optional).

  Returns:
@ -611,16 +608,16 @@ def central_crop(image, central_fraction):
    # dimensions are statically defined.
    if dynamic_h:
      img_hd = math_ops.cast(img_h, dtypes.float64)
-      bbox_h_start = math_ops.cast(
-          (img_hd - img_hd * central_fraction) / 2, dtypes.int32)
+      bbox_h_start = math_ops.cast((img_hd - img_hd * central_fraction) / 2,
+                                   dtypes.int32)
    else:
      img_hd = float(img_h)
      bbox_h_start = int((img_hd - img_hd * central_fraction) / 2)

    if dynamic_w:
      img_wd = math_ops.cast(img_w, dtypes.float64)
-      bbox_w_start = math_ops.cast(
-          (img_wd - img_wd * central_fraction) / 2, dtypes.int32)
+      bbox_w_start = math_ops.cast((img_wd - img_wd * central_fraction) / 2,
+                                   dtypes.int32)
    else:
      img_wd = float(img_w)
      bbox_w_start = int((img_wd - img_wd * central_fraction) / 2)
@ -641,15 +638,12 @@ def central_crop(image, central_fraction):
    if rank == 3:
      image.set_shape([
          None if dynamic_h else bbox_h_size,
-          None if dynamic_w else bbox_w_size,
-          img_d
+          None if dynamic_w else bbox_w_size, img_d
      ])
    else:
      image.set_shape([
-          img_bs,
-          None if dynamic_h else bbox_h_size,
-          None if dynamic_w else bbox_w_size,
-          img_d
+          img_bs, None if dynamic_h else bbox_h_size,
+          None if dynamic_w else bbox_w_size, img_d
      ])
    return image

@ -667,8 +661,8 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
  `target_height` by `target_width`.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    offset_height: Number of rows of zeros to add on top.
    offset_width: Number of columns of zeros to add on the left.
    target_height: Height of output image.
@ -748,12 +742,12 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
  `offset_height + target_height, offset_width + target_width`.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    offset_height: Vertical coordinate of the top-left corner of the result in
-                   the input.
+      the input.
    offset_width: Horizontal coordinate of the top-left corner of the result in
-                  the input.
+      the input.
    target_height: Height of the result.
    target_width: Width of the result.

@ -833,8 +827,8 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
  dimension.

  Args:
-    image: 4-D Tensor of shape `[batch, height, width, channels]` or
-           3-D Tensor of shape `[height, width, channels]`.
+    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    target_height: Target height.
    target_width: Target width.

@ -997,17 +991,18 @@ def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
          math_ops.cast(current_width, dtypes.float32))
      scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
      scaled_height_const = math_ops.cast(
-          math_ops.round(
-              scale_factor * math_ops.cast(current_height, dtypes.float32)),
+          math_ops.round(scale_factor *
+                         math_ops.cast(current_height, dtypes.float32)),
          dtypes.int32)
      scaled_width_const = math_ops.cast(
-          math_ops.round(
-              scale_factor * math_ops.cast(current_width, dtypes.float32)),
+          math_ops.round(scale_factor *
+                         math_ops.cast(current_width, dtypes.float32)),
          dtypes.int32)

      # NOTE: Reset the size and other constants used later.
      size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
-                                   dtypes.int32, name='size')
+                                   dtypes.int32,
+                                   name='size')
      size_const_as_shape = tensor_util.constant_value_as_shape(size)
      new_height_const = size_const_as_shape.dims[0].value
      new_width_const = size_const_as_shape.dims[1].value
@ -1044,7 +1039,7 @@ def resize_images(images,

  Resized images will be distorted if their original aspect ratio is not
  the same as `size`.  To avoid distortions see
-  `tf.image.resize_image_with_pad`.
+  `tf.compat.v1.image.resize_image_with_pad`.

  `method` can be one of:

@ -1461,8 +1456,7 @@ def random_brightness(image, max_delta, seed=None):
    image: An image or images to adjust.
    max_delta: float, must be non-negative.
    seed: A Python integer. Used to create a random seed. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.

  Returns:
    The brightness-adjusted image(s).
@ -1489,7 +1483,7 @@ def random_contrast(image, lower, upper, seed=None):
    lower: float.  Lower bound for the random contrast factor.
    upper: float.  Upper bound for the random contrast factor.
    seed: A Python integer. Used to create a random seed. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.

  Returns:
    The contrast-adjusted image(s).
@ -1856,10 +1850,10 @@ def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
    image: RGB image or images. Size of the last dimension must be 3.
    min_jpeg_quality: Minimum jpeg encoding quality to use.
    max_jpeg_quality: Maximum jpeg encoding quality to use.
-    seed: An operation-specific seed. It will be used in conjunction
-      with the graph-level seed to determine the real seeds that will be
-      used in this operation. Please see the documentation of
-      set_random_seed for its interaction with the graph-level random seed.
+    seed: An operation-specific seed. It will be used in conjunction with the
+      graph-level seed to determine the real seeds that will be used in this
+      operation. Please see the documentation of set_random_seed for its
+      interaction with the graph-level random seed.

  Returns:
    Adjusted image(s), same shape and DType as `image`.
@ -1867,8 +1861,8 @@ def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
  Raises:
    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
  """
-  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or
-      min_jpeg_quality > 100 or max_jpeg_quality > 100):
+  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
+      max_jpeg_quality > 100):
    raise ValueError('jpeg encoding range must be between 0 and 100.')

  if min_jpeg_quality >= max_jpeg_quality:
@ -2030,28 +2024,50 @@ def _is_png(contents, name=None):
    substr = string_ops.substr(contents, 0, 3)
    return math_ops.equal(substr, b'\211PN', name=name)

-tf_export('io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg',
-          v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
-              gen_image_ops.decode_and_crop_jpeg)

-tf_export('io.decode_bmp', 'image.decode_bmp',
-          v1=['io.decode_bmp', 'image.decode_bmp'])(gen_image_ops.decode_bmp)
-tf_export('io.decode_gif', 'image.decode_gif',
-          v1=['io.decode_gif', 'image.decode_gif'])(gen_image_ops.decode_gif)
-tf_export('io.decode_jpeg', 'image.decode_jpeg',
-          v1=['io.decode_jpeg', 'image.decode_jpeg'])(gen_image_ops.decode_jpeg)
-tf_export('io.decode_png', 'image.decode_png',
-          v1=['io.decode_png', 'image.decode_png'])(gen_image_ops.decode_png)
+tf_export(
+    'io.decode_and_crop_jpeg',
+    'image.decode_and_crop_jpeg',
+    v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
+        gen_image_ops.decode_and_crop_jpeg)

-tf_export('io.encode_jpeg', 'image.encode_jpeg',
-          v1=['io.encode_jpeg', 'image.encode_jpeg'])(gen_image_ops.encode_jpeg)
-tf_export('io.extract_jpeg_shape', 'image.extract_jpeg_shape',
-          v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
-              gen_image_ops.extract_jpeg_shape)
+tf_export(
+    'io.decode_bmp',
+    'image.decode_bmp',
+    v1=['io.decode_bmp', 'image.decode_bmp'])(
+        gen_image_ops.decode_bmp)
+tf_export(
+    'io.decode_gif',
+    'image.decode_gif',
+    v1=['io.decode_gif', 'image.decode_gif'])(
+        gen_image_ops.decode_gif)
+tf_export(
+    'io.decode_jpeg',
+    'image.decode_jpeg',
+    v1=['io.decode_jpeg', 'image.decode_jpeg'])(
+        gen_image_ops.decode_jpeg)
+tf_export(
+    'io.decode_png',
+    'image.decode_png',
+    v1=['io.decode_png', 'image.decode_png'])(
+        gen_image_ops.decode_png)
+
+tf_export(
+    'io.encode_jpeg',
+    'image.encode_jpeg',
+    v1=['io.encode_jpeg', 'image.encode_jpeg'])(
+        gen_image_ops.encode_jpeg)
+tf_export(
+    'io.extract_jpeg_shape',
+    'image.extract_jpeg_shape',
+    v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
+        gen_image_ops.extract_jpeg_shape)


-@tf_export('io.decode_image', 'image.decode_image',
-           v1=['io.decode_image', 'image.decode_image'])
+@tf_export(
+    'io.decode_image',
+    'image.decode_image',
+    v1=['io.decode_image', 'image.decode_image'])
 def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None):
  """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`.

@ -2120,10 +2136,11 @@ def decode_image(contents, channels=None, dtype=dtypes.uint8, name=None):
    def _png():
      """Decodes a PNG image."""
      return convert_image_dtype(
-          gen_image_ops.decode_png(contents, channels,
-                                   dtype=dtypes.uint8
-                                   if dtype == dtypes.uint8
-                                   else dtypes.uint16), dtype)
+          gen_image_ops.decode_png(
+              contents,
+              channels,
+              dtype=dtypes.uint8 if dtype == dtypes.uint8 else dtypes.uint16),
+          dtype)

    def check_png():
      """Checks if an image is PNG."""
@ -2166,9 +2183,8 @@ def total_variation(images, name=None):
  https://en.wikipedia.org/wiki/Total_variation_denoising

  Args:
-    images: 4-D Tensor of shape `[batch, height, width, channels]` or
-            3-D Tensor of shape `[height, width, channels]`.
-
+    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
+      of shape `[height, width, channels]`.
    name: A name for the operation (optional).

  Raises:
@ -2261,7 +2277,7 @@ def sample_distorted_bounding_box_v2(image_size,
      # Draw the bounding box in an image summary.
      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                    bbox_for_draw)
-      tf.summary.image('images_with_box', image_with_box)
+      tf.compat.v1.summary.image('images_with_box', image_with_box)

      # Employ the bounding box to distort the image.
      distorted_image = tf.slice(image, begin, size)
@ -2274,34 +2290,29 @@ def sample_distorted_bounding_box_v2(image_size,

  Args:
    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
-      `int16`, `int32`, `int64`.
-      1-D, containing `[height, width, channels]`.
-    bounding_boxes: A `Tensor` of type `float32`.
-      3-D with shape `[batch, N, 4]` describing the N bounding boxes
-      associated with the image.
-    seed: An optional `int`. Defaults to `0`.
-      If `seed` is set to non-zero, the random number generator is seeded by
-      the given `seed`.  Otherwise, it is seeded by a random seed.
-    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`.
-      The cropped area of the image must contain at least this
-      fraction of any bounding box supplied. The value of this parameter should
-      be non-negative. In the case of 0, the cropped area does not need to
-      overlap any of the bounding boxes supplied.
+      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
+    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
+      describing the N bounding boxes associated with the image.
+    seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the
+      random number generator is seeded by the given `seed`.  Otherwise, it is
+      seeded by a random seed.
+    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
+      cropped area of the image must contain at least this fraction of any
+      bounding box supplied. The value of this parameter should be non-negative.
+      In the case of 0, the cropped area does not need to overlap any of the
+      bounding boxes supplied.
    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
-      1.33]`.
-      The cropped area of the image must have an aspect `ratio =
-      width / height` within this range.
-    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`.
-      The cropped area of the image must contain a fraction of the
-      supplied image within this range.
-    max_attempts: An optional `int`. Defaults to `100`.
-      Number of attempts at generating a cropped region of the image
-      of the specified constraints. After `max_attempts` failures, return the
-      entire image.
+      1.33]`. The cropped area of the image must have an aspect `ratio = width /
+      height` within this range.
+    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+      cropped area of the image must contain a fraction of the supplied image
+      within this range.
+    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
+      generating a cropped region of the image of the specified constraints.
+      After `max_attempts` failures, return the entire image.
    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
-      Controls behavior if no bounding boxes supplied.
-      If true, assume an implicit bounding box covering the whole input. If
-      false, raise an error.
+      Controls behavior if no bounding boxes supplied. If true, assume an
+      implicit bounding box covering the whole input. If false, raise an error.
    name: A name for the operation (optional).

  Returns:
@ -2318,15 +2329,17 @@ def sample_distorted_bounding_box_v2(image_size,
    Provide as input to `tf.image.draw_bounding_boxes`.
  """
  seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0)
-  return sample_distorted_bounding_box(
-      image_size, bounding_boxes, seed1, seed2, min_object_covered,
-      aspect_ratio_range, area_range, max_attempts,
-      use_image_if_no_bounding_boxes, name)
+  return sample_distorted_bounding_box(image_size, bounding_boxes, seed1, seed2,
+                                       min_object_covered, aspect_ratio_range,
+                                       area_range, max_attempts,
+                                       use_image_if_no_bounding_boxes, name)


@tf_export(v1=['image.sample_distorted_bounding_box'])
-@deprecation.deprecated(date=None, instructions='`seed2` arg is deprecated.'
-                        'Use sample_distorted_bounding_box_v2 instead.')
+@deprecation.deprecated(
+    date=None,
+    instructions='`seed2` arg is deprecated.'
+    'Use sample_distorted_bounding_box_v2 instead.')
 def sample_distorted_bounding_box(image_size,
                                  bounding_boxes,
                                  seed=None,
@ -2370,7 +2383,7 @@ def sample_distorted_bounding_box(image_size,
      # Draw the bounding box in an image summary.
      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
                                                    bbox_for_draw)
-      tf.summary.image('images_with_box', image_with_box)
+      tf.compat.v1.summary.image('images_with_box', image_with_box)

      # Employ the bounding box to distort the image.
      distorted_image = tf.slice(image, begin, size)
@ -2383,41 +2396,31 @@ def sample_distorted_bounding_box(image_size,

  Args:
    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
-      `int16`, `int32`, `int64`.
-      1-D, containing `[height, width, channels]`.
-    bounding_boxes: A `Tensor` of type `float32`.
-      3-D with shape `[batch, N, 4]` describing the N bounding boxes
-      associated with the image.
-    seed: An optional `int`. Defaults to `0`.
-      If either `seed` or `seed2` are set to non-zero, the random number
-      generator is seeded by the given `seed`.  Otherwise, it is seeded by a
-        random
-      seed.
-    seed2: An optional `int`. Defaults to `0`.
-      A second seed to avoid seed collision.
-    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`.
-      The cropped area of the image must contain at least this
-      fraction of any bounding box supplied. The value of this parameter should
-        be
-      non-negative. In the case of 0, the cropped area does not need to overlap
-      any of the bounding boxes supplied.
+      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
+    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
+      describing the N bounding boxes associated with the image.
+    seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are
+      set to non-zero, the random number generator is seeded by the given
+      `seed`.  Otherwise, it is seeded by a random seed.
+    seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed
+      collision.
+    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
+      cropped area of the image must contain at least this fraction of any
+      bounding box supplied. The value of this parameter should be non-negative.
+      In the case of 0, the cropped area does not need to overlap any of the
+      bounding boxes supplied.
    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
-      1.33]`.
-      The cropped area of the image must have an aspect ratio =
-      width / height within this range.
-    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`.
-      The cropped area of the image must contain a fraction of the
-      supplied image within this range.
-    max_attempts: An optional `int`. Defaults to `100`.
-      Number of attempts at generating a cropped region of the image
-      of the specified constraints. After `max_attempts` failures, return the
-        entire
-      image.
+      1.33]`. The cropped area of the image must have an aspect ratio = width /
+      height within this range.
+    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
+      cropped area of the image must contain a fraction of the supplied image
+      within this range.
+    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
+      generating a cropped region of the image of the specified constraints.
+      After `max_attempts` failures, return the entire image.
    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
-      Controls behavior if no bounding boxes supplied.
-      If true, assume an implicit bounding box covering the whole input. If
-        false,
-      raise an error.
+      Controls behavior if no bounding boxes supplied. If true, assume an
+      implicit bounding box covering the whole input. If false, raise an error.
    name: A name for the operation (optional).

  Returns:
@ -2536,8 +2539,8 @@ def non_max_suppression_padded(boxes,
      overlap too much with respect to IOU.
    score_threshold: A float representing the threshold for deciding when to
      remove boxes based on score.
-    pad_to_max_output_size: bool.  If True, size of `selected_indices` output
-      is padded to `max_output_size`.
+    pad_to_max_output_size: bool.  If True, size of `selected_indices` output is
+      padded to `max_output_size`.
    name: A name for the operation (optional).

  Returns:
@ -2551,12 +2554,16 @@ def non_max_suppression_padded(boxes,
    score_threshold = ops.convert_to_tensor(
        score_threshold, name='score_threshold')
    if compat.forward_compatible(2018, 8, 7) or pad_to_max_output_size:
-      return gen_image_ops.non_max_suppression_v4(
-          boxes, scores, max_output_size, iou_threshold, score_threshold,
-          pad_to_max_output_size)
+      return gen_image_ops.non_max_suppression_v4(boxes, scores,
+                                                  max_output_size,
+                                                  iou_threshold,
+                                                  score_threshold,
+                                                  pad_to_max_output_size)
    else:
-      return gen_image_ops.non_max_suppression_v3(
-          boxes, scores, max_output_size, iou_threshold, score_threshold)
+      return gen_image_ops.non_max_suppression_v3(boxes, scores,
+                                                  max_output_size,
+                                                  iou_threshold,
+                                                  score_threshold)


@tf_export('image.non_max_suppression_overlaps')
@ -2605,8 +2612,8 @@ def non_max_suppression_with_overlaps(overlaps,
    # pylint: enable=protected-access


-_rgb_to_yiq_kernel = [[0.299, 0.59590059,
-                       0.2115], [0.587, -0.27455667, -0.52273617],
+_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115],
+                      [0.587, -0.27455667, -0.52273617],
                      [0.114, -0.32134392, 0.31119955]]


@ -2620,7 +2627,7 @@ def rgb_to_yiq(images):

  Args:
    images: 2-D or higher rank. Image data to convert. Last dimension must be
-    size 3.
+      size 3.

  Returns:
    images: tensor with the same shape as `images`.
@ -2647,7 +2654,7 @@ def yiq_to_rgb(images):

  Args:
    images: 2-D or higher rank. Image data to convert. Last dimension must be
-    size 3.
+      size 3.

  Returns:
    images: tensor with the same shape as `images`.
@ -2659,8 +2666,8 @@ def yiq_to_rgb(images):
  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])


-_rgb_to_yuv_kernel = [[0.299, -0.14714119,
-                       0.61497538], [0.587, -0.28886916, -0.51496512],
+_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538],
+                      [0.587, -0.28886916, -0.51496512],
                      [0.114, 0.43601035, -0.10001026]]


@ -2674,7 +2681,7 @@ def rgb_to_yuv(images):

  Args:
    images: 2-D or higher rank. Image data to convert. Last dimension must be
-    size 3.
+      size 3.

  Returns:
    images: tensor with the same shape as `images`.
@ -2701,7 +2708,7 @@ def yuv_to_rgb(images):

  Args:
    images: 2-D or higher rank. Image data to convert. Last dimension must be
-    size 3.
+      size 3.

  Returns:
    images: tensor with the same shape as `images`.
@ -2735,23 +2742,26 @@ def _verify_compatible_image_shapes(img1, img2):
  shape1[-3:].assert_is_compatible_with(shape2[-3:])

  if shape1.ndims is not None and shape2.ndims is not None:
-    for dim1, dim2 in zip(reversed(shape1.dims[:-3]),
-                          reversed(shape2.dims[:-3])):
+    for dim1, dim2 in zip(
+        reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])):
      if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
-        raise ValueError(
-            'Two images are not compatible: %s and %s' % (shape1, shape2))
+        raise ValueError('Two images are not compatible: %s and %s' %
+                         (shape1, shape2))

  # Now assign shape tensors.
  shape1, shape2 = array_ops.shape_n([img1, img2])

  # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
  checks = []
-  checks.append(control_flow_ops.Assert(
-      math_ops.greater_equal(array_ops.size(shape1), 3),
-      [shape1, shape2], summarize=10))
-  checks.append(control_flow_ops.Assert(
-      math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
-      [shape1, shape2], summarize=10))
+  checks.append(
+      control_flow_ops.Assert(
+          math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2],
+          summarize=10))
+  checks.append(
+      control_flow_ops.Assert(
+          math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
+          [shape1, shape2],
+          summarize=10))
  return shape1, shape2, checks


@ -2808,6 +2818,7 @@ def psnr(a, b, max_val, name=None):
    with ops.control_dependencies(checks):
      return array_ops.identity(psnr_val)

+
 _SSIM_K1 = 0.01
 _SSIM_K2 = 0.03

@ -2830,9 +2841,9 @@ def _ssim_helper(x, y, reducer, max_val, compensation=1.0):
  Arguments:
    x: First set of images.
    y: Second set of images.
-    reducer: Function that computes 'local' averages from set of images.
-      For non-covolutional version, this is usually tf.reduce_mean(x, [1, 2]),
-      and for convolutional version, this is usually tf.nn.avg_pool or
+    reducer: Function that computes 'local' averages from set of images. For
+      non-covolutional version, this is usually tf.reduce_mean(x, [1, 2]), and
+      for convolutional version, this is usually tf.nn.avg_pool2d or
      tf.nn.conv2d with weighted-sum kernel.
    max_val: The dynamic range (i.e., the difference between the maximum
      possible allowed value and the minimum allowed value).
@ -2841,8 +2852,8 @@ def _ssim_helper(x, y, reducer, max_val, compensation=1.0):
  Returns:
    A pair containing the luminance measure, and the contrast-structure measure.
  """
-  c1 = (_SSIM_K1 * max_val) ** 2
-  c2 = (_SSIM_K2 * max_val) ** 2
+  c1 = (_SSIM_K1 * max_val)**2
+  c2 = (_SSIM_K2 * max_val)**2

  # SSIM luminance measure is
  # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).
@ -2910,10 +2921,17 @@ def _ssim_per_channel(img1, img2, max_val=1.0):

  shape1, shape2 = array_ops.shape_n([img1, img2])
  checks = [
-      control_flow_ops.Assert(math_ops.reduce_all(math_ops.greater_equal(
-          shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8),
-      control_flow_ops.Assert(math_ops.reduce_all(math_ops.greater_equal(
-          shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8)]
+      control_flow_ops.Assert(
+          math_ops.reduce_all(
+              math_ops.greater_equal(shape1[-3:-1], filter_size)),
+          [shape1, filter_size],
+          summarize=8),
+      control_flow_ops.Assert(
+          math_ops.reduce_all(
+              math_ops.greater_equal(shape2[-3:-1], filter_size)),
+          [shape2, filter_size],
+          summarize=8)
+  ]

  # Enforce the check to run before computation.
  with ops.control_dependencies(checks):
@ -2934,8 +2952,8 @@ def _ssim_per_channel(img1, img2, max_val=1.0):
    shape = array_ops.shape(x)
    x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))
    y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
-    return array_ops.reshape(y, array_ops.concat([shape[:-3],
-                                                  array_ops.shape(y)[1:]], 0))
+    return array_ops.reshape(
+        y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0))

  luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation)

@ -3094,9 +3112,11 @@ def ssim_multiscale(img1, img2, max_val, power_factors=_MSSSIM_WEIGHTS):
                                         lambda: flat_imgs)
          # pylint: enable=cell-var-from-loop

-          downscaled = [nn_ops.avg_pool(x, ksize=divisor, strides=divisor,
-                                        padding='VALID')
-                        for x in padded]
+          downscaled = [
+              nn_ops.avg_pool(
+                  x, ksize=divisor, strides=divisor, padding='VALID')
+              for x in padded
+          ]
          tails = [x[1:] for x in array_ops.shape_n(downscaled)]
          imgs = [
              array_ops.reshape(x, array_ops.concat([h, t], 0))
@ -3110,11 +3130,11 @@ def ssim_multiscale(img1, img2, max_val, power_factors=_MSSSIM_WEIGHTS):
    # Remove the cs score for the last scale. In the MS-SSIM calculation,
    # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).
    mcs.pop()  # Remove the cs score for the last scale.
-    mcs_and_ssim = array_ops.stack(mcs + [nn_ops.relu(ssim_per_channel)],
-                                   axis=-1)
+    mcs_and_ssim = array_ops.stack(
+        mcs + [nn_ops.relu(ssim_per_channel)], axis=-1)
    # Take weighted geometric mean across the scale axis.
-    ms_ssim = math_ops.reduce_prod(math_ops.pow(mcs_and_ssim, power_factors),
-                                   [-1])
+    ms_ssim = math_ops.reduce_prod(
+        math_ops.pow(mcs_and_ssim, power_factors), [-1])

    return math_ops.reduce_mean(ms_ssim, [-1])  # Avg over color channels.

@ -3165,7 +3185,7 @@ def sobel_edges(image):

  Arguments:
    image: Image tensor with shape [batch_size, h, w, d] and type float32 or
-    float64.  The image(s) must be 2x2 or larger.
+      float64.  The image(s) must be 2x2 or larger.

  Returns:
    Tensor holding edge maps for each channel. Returns a tensor with shape
@ -3182,8 +3202,8 @@ def sobel_edges(image):
  kernels = np.expand_dims(kernels, -2)
  kernels_tf = constant_op.constant(kernels, dtype=image.dtype)

-  kernels_tf = array_ops.tile(kernels_tf, [1, 1, image_shape[-1], 1],
-                              name='sobel_filters')
+  kernels_tf = array_ops.tile(
+      kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters')

  # Use depth-wise convolution to calculate edge maps per channel.
  pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]
@ -3270,14 +3290,13 @@ tf_export(v1=['image.resize_nearest_neighbor'])(


@tf_export('image.crop_and_resize', v1=[])
-def crop_and_resize_v2(
-    image,
-    boxes,
-    box_indices,
-    crop_size,
-    method='bilinear',
-    extrapolation_value=0,
-    name=None):
+def crop_and_resize_v2(image,
+                       boxes,
+                       box_indices,
+                       crop_size,
+                       method='bilinear',
+                       extrapolation_value=0,
+                       name=None):
  """Extracts crops from the input image tensor and resizes them.

  Extracts crops from the input image tensor and resizes them using bilinear
@ -3292,8 +3311,9 @@ def crop_and_resize_v2(
  `size = [crop_height, crop_width]`. The result is a 4-D tensor
  `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
  In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
-  results to using `tf.image.resize_bilinear()` or
-  `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
+  results to using `tf.compat.v1.image.resize_bilinear()` or
+  `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method`
+  argument) with
  `align_corners=True`.

  Args:
@ -3320,7 +3340,7 @@ def crop_and_resize_v2(
    method: An optional string specifying the sampling method for resizing. It
      can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.
      Currently two sampling methods are supported: Bilinear and Nearest
-      Neighbor.
+        Neighbor.
    extrapolation_value: An optional `float`. Defaults to `0`. Value used for
      extrapolation, when applicable.
    name: A name for the operation (optional).
@ -3328,14 +3348,15 @@ def crop_and_resize_v2(
  Returns:
    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
  """
-  return gen_image_ops.crop_and_resize(
-      image, boxes, box_indices, crop_size, method, extrapolation_value, name)
+  return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size,
+                                       method, extrapolation_value, name)


@tf_export(v1=['image.crop_and_resize'])
-@deprecation.deprecated_args(
-    None, 'box_ind is deprecated, use box_indices instead', 'box_ind')
-def crop_and_resize_v1(   # pylint: disable=missing-docstring
+@deprecation.deprecated_args(None,
+                             'box_ind is deprecated, use box_indices instead',
+                             'box_ind')
+def crop_and_resize_v1(  # pylint: disable=missing-docstring
    image,
    boxes,
    box_ind=None,
@ -3344,13 +3365,15 @@ def crop_and_resize_v1(   # pylint: disable=missing-docstring
    extrapolation_value=0,
    name=None,
    box_indices=None):
-  box_ind = deprecation.deprecated_argument_lookup(
-      "box_indices", box_indices, "box_ind", box_ind)
-  return gen_image_ops.crop_and_resize(
-      image, boxes, box_ind, crop_size, method, extrapolation_value, name)
+  box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices,
+                                                   'box_ind', box_ind)
+  return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method,
+                                       extrapolation_value, name)
+

 crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__

+
@tf_export(v1=['image.extract_glimpse'])
 def extract_glimpse(
    input,  # pylint: disable=redefined-builtin
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@ -25,6 +25,7 @@ def _initializer(shape, dtype=dtypes.float32, partition_info=None):
    partition_info: (Optional) variable_scope._PartitionInfo object holding
      additional information about how the variable is partitioned. May be
      `None` if the variable is not partitioned.
+
  Returns:
    A `Tensor` of type `dtype` and `shape`.
 """
@ -46,14 +47,13 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.deprecation import deprecated
-from tensorflow.python.util.deprecation import  deprecated_arg_values
-from tensorflow.python.util.deprecation import  deprecated_args
+from tensorflow.python.util.deprecation import deprecated_arg_values
+from tensorflow.python.util.deprecation import deprecated_args
 from tensorflow.python.util.tf_export import tf_export


 class Initializer(object):
-  """Initializer base class: all initializers inherit from this class.
-  """
+  """Initializer base class: all initializers inherit from this class."""

  def __call__(self, shape, dtype=None, partition_info=None):
    """Returns a tensor object initialized as specified by the initializer.
@ -88,8 +88,8 @@ class Initializer(object):
    ```

    Args:
-      config: A Python dictionary.
-        It will typically be the output of `get_config`.
+      config: A Python dictionary. It will typically be the output of
+        `get_config`.

    Returns:
      An Initializer instance.
@ -104,8 +104,7 @@ class Zeros(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, dtype=dtypes.float32):
    self.dtype = dtypes.as_dtype(dtype)

@ -125,8 +124,7 @@ class Ones(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, dtype=dtypes.float32):
    self.dtype = dtypes.as_dtype(dtype)

@ -182,11 +180,11 @@ class Constant(Initializer):
    >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
    >>> # value = np.array(value)
    >>> # value = value.reshape([2, 4])
-    >>> init = tf.constant_initializer(value)
+    >>> init = tf.compat.v1.constant_initializer(value)

    >>> print('fitting shape:')
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[2, 4], initializer=init)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
    >>>   x.initializer.run()
    >>>   print(x.eval())

@ -195,8 +193,8 @@ class Constant(Initializer):
     [ 4.  5.  6.  7.]]

    >>> print('larger shape:')
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[3, 4], initializer=init)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
    >>>   x.initializer.run()
    >>>   print(x.eval())

@ -206,15 +204,17 @@ class Constant(Initializer):
     [ 7.  7.  7.  7.]]

    >>> print('smaller shape:')
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[2, 3], initializer=init)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)

    ValueError: Too many elements provided. Needed at most 6, but received 8

    >>> print('shape verification:')
-    >>> init_verify = tf.constant_initializer(value, verify_shape=True)
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[3, 4], initializer=init_verify)
+    >>> init_verify = tf.compat.v1.constant_initializer(value,
+    verify_shape=True)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[3, 4],
+    initializer=init_verify)

    TypeError: Expected Tensor's shape: (3, 4), got (8,).
  ```
@ -222,12 +222,9 @@ class Constant(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
-  @deprecated_args(None,
-                   "Objects must now be the required shape or no shape "
-                   "can be specified",
-                   "verify_shape")
+                   "of passing it to the constructor", "dtype")
+  @deprecated_args(None, "Objects must now be the required shape or no shape "
+                   "can be specified", "verify_shape")
  def __init__(self, value=0, dtype=dtypes.float32, verify_shape=False):
    if not (np.isscalar(value) or isinstance(value, (list, tuple, np.ndarray))):
      raise TypeError(
@ -260,21 +257,19 @@ class RandomUniform(Initializer):
  """Initializer that generates tensors with a uniform distribution.

  Args:
-    minval: A python scalar or a scalar tensor. Lower bound of the range
-      of random values to generate.
-    maxval: A python scalar or a scalar tensor. Upper bound of the range
-      of random values to generate.  Defaults to 1 for float types.
+    minval: A python scalar or a scalar tensor. Lower bound of the range of
+      random values to generate.
+    maxval: A python scalar or a scalar tensor. Upper bound of the range of
+      random values to generate.  Defaults to 1 for float types.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer.
  """

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, minval=0, maxval=None, seed=None, dtype=dtypes.float32):
    self.minval = minval
    self.maxval = maxval
@ -302,21 +297,19 @@ class RandomNormal(Initializer):
  """Initializer that generates tensors with a normal distribution.

  Args:
-    mean: a python scalar or a scalar tensor. Mean of the random values
-      to generate.
-    stddev: a python scalar or a scalar tensor. Standard deviation of the
-      random values to generate.
+    mean: a python scalar or a scalar tensor. Mean of the random values to
+      generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the random
+      values to generate.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
  """

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, mean=0.0, stddev=1.0, seed=None, dtype=dtypes.float32):
    self.mean = mean
    self.stddev = stddev
@ -350,21 +343,19 @@ class TruncatedNormal(Initializer):
  neural network weights and filters.

  Args:
-    mean: a python scalar or a scalar tensor. Mean of the random values
-      to generate.
-    stddev: a python scalar or a scalar tensor. Standard deviation of the
-      random values to generate.
+    mean: a python scalar or a scalar tensor. Mean of the random values to
+      generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the random
+      values to generate.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
  """

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, mean=0.0, stddev=1.0, seed=None, dtype=dtypes.float32):
    self.mean = mean
    self.stddev = stddev
@ -386,8 +377,9 @@ class TruncatedNormal(Initializer):
    }


-@tf_export(v1=["initializers.uniform_unit_scaling",
-               "uniform_unit_scaling_initializer"])
+@tf_export(v1=[
+    "initializers.uniform_unit_scaling", "uniform_unit_scaling_initializer"
+])
@deprecation.deprecated_endpoints("uniform_unit_scaling_initializer",
                                  "initializers.uniform_unit_scaling")
 class UniformUnitScaling(Initializer):
@ -411,11 +403,9 @@ class UniformUnitScaling(Initializer):
  Args:
    factor: Float.  A multiplicative factor by which the values will be scaled.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Sussillo et al., 2014](https://arxiv.org/abs/1412.6558)
      ([pdf](http://arxiv.org/pdf/1412.6558.pdf))
@ -423,8 +413,7 @@ class UniformUnitScaling(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  @deprecated(None,
              "Use tf.initializers.variance_scaling instead with distribution="
              "uniform to get equivalent behavior.")
@ -479,8 +468,7 @@ class VarianceScaling(Initializer):
    mode: One of "fan_in", "fan_out", "fan_avg".
    distribution: Random distribution to use. One of "normal", "uniform".
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.

@ -491,8 +479,7 @@ class VarianceScaling(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  @deprecated_arg_values(
      None,
      "`normal` is a deprecated alias for `truncated_normal`",
@ -508,8 +495,9 @@ class VarianceScaling(Initializer):
    if mode not in {"fan_in", "fan_out", "fan_avg"}:
      raise ValueError("Invalid `mode` argument:", mode)
    distribution = distribution.lower()
-    if distribution not in {"normal", "uniform",
-                            "truncated_normal", "untruncated_normal"}:
+    if distribution not in {
+        "normal", "uniform", "truncated_normal", "untruncated_normal"
+    }:
      raise ValueError("Invalid `distribution` argument:", distribution)
    self.scale = scale
    self.mode = mode
@ -538,8 +526,7 @@ class VarianceScaling(Initializer):
          shape, 0.0, stddev, dtype, seed=self.seed)
    elif self.distribution == "untruncated_normal":
      stddev = math.sqrt(scale)
-      return random_ops.random_normal(
-          shape, 0.0, stddev, dtype, seed=self.seed)
+      return random_ops.random_normal(shape, 0.0, stddev, dtype, seed=self.seed)
    else:
      limit = math.sqrt(3.0 * scale)
      return random_ops.random_uniform(
@ -575,11 +562,9 @@ class Orthogonal(Initializer):
  Args:
    gain: multiplicative factor to apply to the orthogonal matrix
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C)
      ([pdf](https://arxiv.org/pdf/1312.6120.pdf))
@ -587,8 +572,7 @@ class Orthogonal(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, gain=1.0, seed=None, dtype=dtypes.float32):
    self.gain = gain
    self.dtype = _assert_float_dtype(dtypes.as_dtype(dtype))
@ -640,14 +624,13 @@ class ConvolutionDeltaOrthogonal(Initializer):


  Args:
-    gain: Multiplicative factor to apply to the orthogonal
-      matrix. Default is 1. The 2-norm of an input is multiplied by a factor of
-      `gain` after applying this convolution.
+    gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1.
+      The 2-norm of an input is multiplied by a factor of `gain` after applying
+      this convolution.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Xiao et al., 2018](http://proceedings.mlr.press/v80/xiao18a.html)
      ([pdf](http://proceedings.mlr.press/v80/xiao18a/xiao18a.pdf))
@ -671,7 +654,8 @@ class ConvolutionDeltaOrthogonal(Initializer):

    # Generate a random matrix
    a = random_ops.random_normal([shape[-1], shape[-1]],
-                                 dtype=dtype, seed=self.seed)
+                                 dtype=dtype,
+                                 seed=self.seed)
    # Compute the qr factorization
    q, r = gen_linalg_ops.qr(a, full_matrices=False)
    # Make Q uniform
@ -680,14 +664,15 @@ class ConvolutionDeltaOrthogonal(Initializer):
    q = q[:shape[-2], :]
    q *= math_ops.cast(self.gain, dtype=dtype)
    if len(shape) == 3:
-      weight = array_ops.scatter_nd([[(shape[0]-1)//2]],
+      weight = array_ops.scatter_nd([[(shape[0] - 1) // 2]],
                                    array_ops.expand_dims(q, 0), shape)
    elif len(shape) == 4:
-      weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2]],
+      weight = array_ops.scatter_nd([[(shape[0] - 1) // 2,
+                                      (shape[1] - 1) // 2]],
                                    array_ops.expand_dims(q, 0), shape)
    else:
-      weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2,
-                                      (shape[2]-1)//2]],
+      weight = array_ops.scatter_nd([[(shape[0] - 1) // 2, (shape[1] - 1) // 2,
+                                      (shape[2] - 1) // 2]],
                                    array_ops.expand_dims(q, 0), shape)
    return weight

@ -701,14 +686,13 @@ class ConvolutionOrthogonal(Initializer):
  Base class used to construct 1D, 2D and 3D orthogonal kernels for convolution.

  Args:
-    gain: multiplicative factor to apply to the orthogonal
-      matrix. Default is 1. The 2-norm of an input is multiplied by a factor of
-      `gain` after applying this convolution.
+    gain: multiplicative factor to apply to the orthogonal matrix. Default is 1.
+      The 2-norm of an input is multiplied by a factor of `gain` after applying
+      this convolution.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Xiao et al., 2018](http://proceedings.mlr.press/v80/xiao18a.html)
      ([pdf](http://proceedings.mlr.press/v80/xiao18a/xiao18a.pdf))
@ -731,6 +715,7 @@ class ConvolutionOrthogonal(Initializer):

    Args:
      n: Dimension.
+
    Returns:
      A n x n orthogonal matrix.
    """
@ -748,13 +733,14 @@ class ConvolutionOrthogonal(Initializer):

    Args:
      n: Dimension.
+
    Returns:
      A n x n symmetric projection matrix, i.e. a matrix P s.t. P=P*P, P=P^T.
    """
    q = self._orthogonal_matrix(n)
    # randomly zeroing out some columns
-    mask = math_ops.cast(random_ops.random_normal([n], seed=self.seed) > 0,
-                         self.dtype)
+    mask = math_ops.cast(
+        random_ops.random_normal([n], seed=self.seed) > 0, self.dtype)
    if self.seed:
      self.seed += 1
    c = math_ops.multiply(q, mask)
@ -771,14 +757,12 @@ class ConvolutionOrthogonal2D(ConvolutionOrthogonal):
  See algorithm 1 in (Xiao et al., 2018).

  Args:
-    gain: Multiplicative factor to apply to the orthogonal
-      matrix. Default is 1. This has the effect of scaling the output 2-norm by
-      a factor of `gain`.
+    gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1.
+      This has the effect of scaling the output 2-norm by a factor of `gain`.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Xiao et al., 2018](http://proceedings.mlr.press/v80/xiao18a.html)
      ([pdf](http://proceedings.mlr.press/v80/xiao18a/xiao18a.pdf))
@ -807,6 +791,7 @@ class ConvolutionOrthogonal2D(ConvolutionOrthogonal):
      x: A k1 * k2 dictionary.
      k1: First dimension of x.
      k2: Second dimension of x.
+
    Returns:
      A k1 * k2 tensor.
    """
@ -815,11 +800,14 @@ class ConvolutionOrthogonal2D(ConvolutionOrthogonal):
                            for i in range(k1)])

  def _block_orth(self, p1, p2):
-    """Construct a 2 x 2 kernel. Used to construct orthgonal kernel.
+    """Construct a 2 x 2 kernel.
+
+    Used to construct orthgonal kernel.

    Args:
      p1: A symmetric projection matrix.
      p2: A symmetric projection matrix.
+
    Returns:
      A 2 x 2 kernel [[p1p2,         p1(1-p2)],
                      [(1-p1)p2, (1-p1)(1-p2)]].
@ -877,6 +865,7 @@ class ConvolutionOrthogonal2D(ConvolutionOrthogonal):
      ksize: Kernel size.
      cin: Number of input channels.
      cout: Number of output channels.
+
    Returns:
      An [ksize, ksize, cin, cout] orthogonal kernel.
    Raises:
@ -889,11 +878,11 @@ class ConvolutionOrthogonal2D(ConvolutionOrthogonal):
    if ksize == 1:
      return array_ops.expand_dims(array_ops.expand_dims(orth, 0), 0)

-    p = self._block_orth(self._symmetric_projection(cout),
-                         self._symmetric_projection(cout))
+    p = self._block_orth(
+        self._symmetric_projection(cout), self._symmetric_projection(cout))
    for _ in range(ksize - 2):
-      temp = self._block_orth(self._symmetric_projection(cout),
-                              self._symmetric_projection(cout))
+      temp = self._block_orth(
+          self._symmetric_projection(cout), self._symmetric_projection(cout))
      p = self._matrix_conv(p, temp)
    for i in range(ksize):
      for j in range(ksize):
@ -912,15 +901,13 @@ class ConvolutionOrthogonal1D(ConvolutionOrthogonal):
  See algorithm 1 in (Xiao et al., 2018).

  Args:
-    gain: Multiplicative factor to apply to the orthogonal
-      matrix. Default is 1. The 2-norm of an input is multiplied by a factor of
-      `gain` after applying this convolution.
+    gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1.
+      The 2-norm of an input is multiplied by a factor of `gain` after applying
+      this convolution.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Xiao et al., 2018](http://proceedings.mlr.press/v80/xiao18a.html)
      ([pdf](http://proceedings.mlr.press/v80/xiao18a/xiao18a.pdf))
@ -945,6 +932,7 @@ class ConvolutionOrthogonal1D(ConvolutionOrthogonal):
    Args:
      x: A dictionary of length k.
      k: Dimension of x.
+
    Returns:
      A tensor with the same dimension.
    """
@ -952,10 +940,13 @@ class ConvolutionOrthogonal1D(ConvolutionOrthogonal):
    return array_ops.stack([x[i] for i in range(k)])

  def _block_orth(self, projection_matrix):
-    """Construct a kernel. Used to construct orthgonal kernel.
+    """Construct a kernel.
+
+    Used to construct orthgonal kernel.

    Args:
      projection_matrix: A symmetric projection matrix of size n x n.
+
    Returns:
      [projection_matrix, (1 - projection_matrix)].
    """
@ -1002,6 +993,7 @@ class ConvolutionOrthogonal1D(ConvolutionOrthogonal):
      ksize: Kernel size.
      cin: Number of input channels.
      cout: Number of output channels.
+
    Returns:
      An [ksize, ksize, cin, cout] orthogonal kernel.
    Raises:
@ -1034,14 +1026,13 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
  See algorithm 1 (Xiao et al., 2018).

  Args:
-    gain: Multiplicative factor to apply to the orthogonal
-      matrix. Default is 1. The 2-norm of an input is multiplied by a factor of
-      `gain` after applying this convolution.
+    gain: Multiplicative factor to apply to the orthogonal matrix. Default is 1.
+      The 2-norm of an input is multiplied by a factor of `gain` after applying
+      this convolution.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Xiao et al., 2018](http://proceedings.mlr.press/v80/xiao18a.html)
      ([pdf](http://proceedings.mlr.press/v80/xiao18a/xiao18a.pdf))
@ -1071,6 +1062,7 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
      k1: First dimension of x.
      k2: Second dimension of x.
      k3: Third dimension of x.
+
    Returns:
      A k1 * k2 * k3 tensor.
    """
@ -1080,12 +1072,15 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
         for j in range(k2)]) for i in range(k1)])

  def _block_orth(self, p1, p2, p3):
-    """Construct a 3 x 3 kernel. Used to construct orthgonal kernel.
+    """Construct a 3 x 3 kernel.
+
+    Used to construct orthgonal kernel.

    Args:
      p1: A symmetric projection matrix.
      p2: A symmetric projection matrix.
      p3: A symmetric projection matrix.
+
    Returns:
      A 2 x 2 x 2 kernel.
    Raises:
@ -1097,11 +1092,14 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
    n = p1_shape[0]
    eye = linalg_ops_impl.eye(n, dtype=self.dtype)
    kernel2x2x2 = {}
+
    def matmul(p1, p2, p3):
      return math_ops.matmul(math_ops.matmul(p1, p2), p3)
+
    def cast(i, p):
      """Return p or (1-p)."""
-      return i * p + (1-i) * (eye - p)
+      return i * p + (1 - i) * (eye - p)
+
    for i in [0, 1]:
      for j in [0, 1]:
        for k in [0, 1]:
@ -1139,9 +1137,9 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
            for index2 in range(min(k, j + 1)):
              for index3 in range(min(k, r + 1)):
                if (i - index1) < l and (j - index2) < l and (r - index3) < l:
-                  result[i, j, r] += math_ops.matmul(m1[index1, index2, index3],
-                                                     m2[i - index1, j - index2,
-                                                        r - index3])
+                  result[i, j, r] += math_ops.matmul(
+                      m1[index1, index2, index3],
+                      m2[i - index1, j - index2, r - index3])
    return result

  def _orthogonal_kernel(self, ksize, cin, cout):
@ -1151,6 +1149,7 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
      ksize: Kernel size.
      cin: Number of input channels.
      cout: Number of output channels.
+
    Returns:
      An [ksize, ksize, ksize, cin, cout] orthogonal kernel.
    Raises:
@ -1162,16 +1161,15 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
    orth = self._orthogonal_matrix(cout)[0:cin, :]
    if ksize == 1:
      return array_ops.expand_dims(
-          array_ops.expand_dims(
-              array_ops.expand_dims(orth, 0), 0), 0)
+          array_ops.expand_dims(array_ops.expand_dims(orth, 0), 0), 0)

-    p = self._block_orth(self._symmetric_projection(cout),
-                         self._symmetric_projection(cout),
-                         self._symmetric_projection(cout))
+    p = self._block_orth(
+        self._symmetric_projection(cout), self._symmetric_projection(cout),
+        self._symmetric_projection(cout))
    for _ in range(ksize - 2):
-      temp = self._block_orth(self._symmetric_projection(cout),
-                              self._symmetric_projection(cout),
-                              self._symmetric_projection(cout))
+      temp = self._block_orth(
+          self._symmetric_projection(cout), self._symmetric_projection(cout),
+          self._symmetric_projection(cout))
      p = self._matrix_conv(p, temp)
    for i in range(ksize):
      for j in range(ksize):
@ -1196,8 +1194,7 @@ class Identity(Initializer):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, gain=1.0, dtype=dtypes.float32):
    self.gain = gain
    self.dtype = _assert_float_dtype(dtypes.as_dtype(dtype))
@ -1234,11 +1231,9 @@ class GlorotUniform(VarianceScaling):

  Args:
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
-      for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
      ([pdf](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf))
@ -1246,8 +1241,7 @@ class GlorotUniform(VarianceScaling):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, seed=None, dtype=dtypes.float32):
    super(GlorotUniform, self).__init__(
        scale=1.0,
@ -1274,10 +1268,9 @@ class GlorotNormal(VarianceScaling):

  Args:
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    dtype: Default data type, used if no `dtype` argument is provided when
      calling the initializer. Only floating point types are supported.
-
  References:
      [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
      ([pdf](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf))
@ -1285,8 +1278,7 @@ class GlorotNormal(VarianceScaling):

  @deprecated_args(None,
                   "Call initializer instance with the dtype argument instead "
-                   "of passing it to the constructor",
-                   "dtype")
+                   "of passing it to the constructor", "dtype")
  def __init__(self, seed=None, dtype=dtypes.float32):
    super(GlorotNormal, self).__init__(
        scale=1.0,
@ -1338,7 +1330,9 @@ def lecun_normal(seed=None):

  References:
      - Self-Normalizing Neural Networks,
-      [Klambauer et al., 2017](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks)  # pylint: disable=line-too-long
+      [Klambauer et al.,
+      2017](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks)
+      # pylint: disable=line-too-long
      ([pdf](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf))
      - Efficient Backprop,
      [Lecun et al., 1998](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)
@ -1363,7 +1357,9 @@ def lecun_uniform(seed=None):

  References:
      - Self-Normalizing Neural Networks,
-      [Klambauer et al., 2017](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks)  # pylint: disable=line-too-long
+      [Klambauer et al.,
+      2017](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks)
+      # pylint: disable=line-too-long
      ([pdf](https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf))
      - Efficient Backprop,
      [Lecun et al., 1998](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf)
@ -1389,7 +1385,8 @@ def he_normal(seed=None):

  References:
      [He et al., 2015]
-      (https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html)  # pylint: disable=line-too-long
+      (https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html)
+      # pylint: disable=line-too-long
      ([pdf](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf))
  """
  return VarianceScaling(
@ -1412,7 +1409,8 @@ def he_uniform(seed=None):

  References:
      [He et al., 2015]
-      (https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html)  # pylint: disable=line-too-long
+      (https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html)
+      # pylint: disable=line-too-long
      ([pdf](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf))
  """
  return VarianceScaling(
--- a/tensorflow/python/ops/init_ops_v2.py
+++ b/tensorflow/python/ops/init_ops_v2.py
@ -156,11 +156,11 @@ class Constant(Initializer):
    >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
    >>> # value = np.array(value)
    >>> # value = value.reshape([2, 4])
-    >>> init = tf.constant_initializer(value)
+    >>> init = tf.compat.v1.constant_initializer(value)

    >>> print('fitting shape:')
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[2, 4], initializer=init)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
    >>>   x.initializer.run()
    >>>   print(x.eval())

@ -169,8 +169,8 @@ class Constant(Initializer):
     [ 4.  5.  6.  7.]]

    >>> print('larger shape:')
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[3, 4], initializer=init)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
    >>>   x.initializer.run()
    >>>   print(x.eval())

@ -180,8 +180,8 @@ class Constant(Initializer):
     [ 7.  7.  7.  7.]]

    >>> print('smaller shape:')
-    >>> with tf.Session():
-    >>>   x = tf.get_variable('x', shape=[2, 3], initializer=init)
+    >>> with tf.compat.v1.Session():
+    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)

    ValueError: Too many elements provided. Needed at most 6, but received 8
  ```
@ -225,7 +225,7 @@ class RandomUniform(Initializer):
    maxval: A python scalar or a scalar tensor. Upper bound of the range
      of random values to generate.  Defaults to 1 for float types.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
  """

@ -270,7 +270,7 @@ class RandomNormal(Initializer):
    stddev: a python scalar or a scalar tensor. Standard deviation of the
      random values to generate.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
  """

@ -317,7 +317,7 @@ class TruncatedNormal(Initializer):
    stddev: a python scalar or a scalar tensor. Standard deviation of the
      random values to generate.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
  """

@ -371,7 +371,7 @@ class VarianceScaling(Initializer):
    distribution: Random distribution to use. One of "truncated_normal",
      "untruncated_normal" and  "uniform".
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.

  Raises:
@ -459,7 +459,7 @@ class Orthogonal(Initializer):
  Args:
    gain: multiplicative factor to apply to the orthogonal matrix
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
    for behavior.

  References:
@ -561,7 +561,7 @@ class GlorotUniform(VarianceScaling):

  Args:
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.

  References:
@ -590,7 +590,7 @@ class GlorotNormal(VarianceScaling):

  Args:
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.

  References:
      [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
--- a/tensorflow/python/ops/linalg/linear_operator.py
+++ b/tensorflow/python/ops/linalg/linear_operator.py
@ -959,7 +959,7 @@ class LinearOperator(object):
    ==> [1., 2.]

    # Equivalent, but inefficient method
-    tf.matrix_diag_part(my_operator.to_dense())
+    tf.linalg.diag_part(my_operator.to_dense())
    ==> [1., 2.]
    ```

--- a/tensorflow/python/ops/linalg/linear_operator_block_diag.py
+++ b/tensorflow/python/ops/linalg/linear_operator_block_diag.py
@ -83,18 +83,18 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator):
  ==> tf.concat([operator_1.matmul(x1), operator_2.matmul(x2)])

  # Create a [2, 3] batch of 4 x 4 linear operators.
-  matrix_44 = tf.random_normal(shape=[2, 3, 4, 4])
+  matrix_44 = tf.random.normal(shape=[2, 3, 4, 4])
  operator_44 = LinearOperatorFullMatrix(matrix)

  # Create a [1, 3] batch of 5 x 5 linear operators.
-  matrix_55 = tf.random_normal(shape=[1, 3, 5, 5])
+  matrix_55 = tf.random.normal(shape=[1, 3, 5, 5])
  operator_55 = LinearOperatorFullMatrix(matrix_55)

  # Combine to create a [2, 3] batch of 9 x 9 operators.
  operator_99 = LinearOperatorBlockDiag([operator_44, operator_55])

  # Create a shape [2, 3, 9] vector.
-  x = tf.random_normal(shape=[2, 3, 9])
+  x = tf.random.normal(shape=[2, 3, 9])
  operator_99.matmul(x)
  ==> Shape [2, 3, 9] Tensor
  ```
--- a/tensorflow/python/ops/linalg/linear_operator_circulant.py
+++ b/tensorflow/python/ops/linalg/linear_operator_circulant.py
@ -602,7 +602,7 @@ class LinearOperatorCirculant(_BaseLinearOperatorCirculant):
  ```python
  # convolution_kernel is real ==> spectrum is Hermitian.
  convolution_kernel = [1., 2., 1.]]
-  spectrum = tf.fft(tf.cast(convolution_kernel, tf.complex64))
+  spectrum = tf.signal.fft(tf.cast(convolution_kernel, tf.complex64))

  # spectrum is Hermitian ==> operator is real.
  # spectrum is shape [3] ==> operator is shape [3, 3]
@ -654,7 +654,7 @@ class LinearOperatorCirculant(_BaseLinearOperatorCirculant):
       [0, 1, 4, 1],
       [1, 0, 1, 4]]

-  # convolution_kernel = tf.ifft(spectrum)
+  # convolution_kernel = tf.signal.ifft(spectrum)
  operator.convolution_kernel()
  ==> [4, 1, 0, 1]
  ```
@ -830,7 +830,7 @@ class LinearOperatorCirculant2D(_BaseLinearOperatorCirculant):
  ```python
  # convolution_kernel is real ==> spectrum is Hermitian.
  convolution_kernel = [[1., 2., 1.], [5., -1., 1.]]
-  spectrum = tf.fft2d(tf.cast(convolution_kernel, tf.complex64))
+  spectrum = tf.signal.fft2d(tf.cast(convolution_kernel, tf.complex64))

  # spectrum is shape [2, 3] ==> operator is shape [6, 6]
  # spectrum is Hermitian ==> operator is real.
--- a/tensorflow/python/ops/linalg/linear_operator_composition.py
+++ b/tensorflow/python/ops/linalg/linear_operator_composition.py
@ -73,18 +73,18 @@ class LinearOperatorComposition(linear_operator.LinearOperator):
  ==> Shape [2, 4] Tensor

  # Create a [2, 3] batch of 4 x 5 linear operators.
-  matrix_45 = tf.random_normal(shape=[2, 3, 4, 5])
+  matrix_45 = tf.random.normal(shape=[2, 3, 4, 5])
  operator_45 = LinearOperatorFullMatrix(matrix)

  # Create a [2, 3] batch of 5 x 6 linear operators.
-  matrix_56 = tf.random_normal(shape=[2, 3, 5, 6])
+  matrix_56 = tf.random.normal(shape=[2, 3, 5, 6])
  operator_56 = LinearOperatorFullMatrix(matrix_56)

  # Compose to create a [2, 3] batch of 4 x 6 operators.
  operator_46 = LinearOperatorComposition([operator_45, operator_56])

  # Create a shape [2, 3, 6, 2] vector.
-  x = tf.random_normal(shape=[2, 3, 6, 2])
+  x = tf.random.normal(shape=[2, 3, 6, 2])
  operator.matmul(x)
  ==> Shape [2, 3, 4, 2] Tensor
  ```
--- a/tensorflow/python/ops/linalg/linear_operator_diag.py
+++ b/tensorflow/python/ops/linalg/linear_operator_diag.py
@ -63,13 +63,13 @@ class LinearOperatorDiag(linear_operator.LinearOperator):
  ==> Shape [2, 4] Tensor

  # Create a [2, 3] batch of 4 x 4 linear operators.
-  diag = tf.random_normal(shape=[2, 3, 4])
+  diag = tf.random.normal(shape=[2, 3, 4])
  operator = LinearOperatorDiag(diag)

  # Create a shape [2, 1, 4, 2] vector.  Note that this shape is compatible
  # since the batch dimensions, [2, 1], are broadcast to
  # operator.batch_shape = [2, 3].
-  y = tf.random_normal(shape=[2, 1, 4, 2])
+  y = tf.random.normal(shape=[2, 1, 4, 2])
  x = operator.solve(y)
  ==> operator.matmul(x) = y
  ```
--- a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py
+++ b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py
@ -57,7 +57,7 @@ class LinearOperatorFullMatrix(linear_operator.LinearOperator):
  ==> Shape [2, 4] Tensor

  # Create a [2, 3] batch of 4 x 4 linear operators.
-  matrix = tf.random_normal(shape=[2, 3, 4, 4])
+  matrix = tf.random.normal(shape=[2, 3, 4, 4])
  operator = LinearOperatorFullMatrix(matrix)
  ```

--- a/tensorflow/python/ops/linalg/linear_operator_identity.py
+++ b/tensorflow/python/ops/linalg/linear_operator_identity.py
@ -131,7 +131,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity):
  operator.matmul(x)
  ==> Shape [2, 4] Tensor, same as x.

-  y = tf.random_normal(shape=[3, 2, 4])
+  y = tf.random.normal(shape=[3, 2, 4])
  # Note that y.shape is compatible with operator.shape because operator.shape
  # is broadcast to [3, 2, 2].
  # This broadcast does NOT require copying data, since we can infer that y
@ -492,7 +492,7 @@ class LinearOperatorScaledIdentity(BaseLinearOperatorIdentity):
  operator.matmul(x)
  ==> 3 * x

-  y = tf.random_normal(shape=[3, 2, 4])
+  y = tf.random.normal(shape=[3, 2, 4])
  # Note that y.shape is compatible with operator.shape because operator.shape
  # is broadcast to [3, 2, 2].
  x = operator.solve(y)
--- a/tensorflow/python/ops/linalg/linear_operator_kronecker.py
+++ b/tensorflow/python/ops/linalg/linear_operator_kronecker.py
@ -98,18 +98,18 @@ class LinearOperatorKronecker(linear_operator.LinearOperator):
  ==> Shape [4, 2] Tensor

  # Create a [2, 3] batch of 4 x 5 linear operators.
-  matrix_45 = tf.random_normal(shape=[2, 3, 4, 5])
+  matrix_45 = tf.random.normal(shape=[2, 3, 4, 5])
  operator_45 = LinearOperatorFullMatrix(matrix)

  # Create a [2, 3] batch of 5 x 6 linear operators.
-  matrix_56 = tf.random_normal(shape=[2, 3, 5, 6])
+  matrix_56 = tf.random.normal(shape=[2, 3, 5, 6])
  operator_56 = LinearOperatorFullMatrix(matrix_56)

  # Compose to create a [2, 3] batch of 20 x 30 operators.
  operator_large = LinearOperatorKronecker([operator_45, operator_56])

  # Create a shape [2, 3, 20, 2] vector.
-  x = tf.random_normal(shape=[2, 3, 6, 2])
+  x = tf.random.normal(shape=[2, 3, 6, 2])
  operator_large.matmul(x)
  ==> Shape [2, 3, 30, 2] Tensor
  ```
--- a/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py
+++ b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py
@ -66,7 +66,7 @@ class LinearOperatorLowerTriangular(linear_operator.LinearOperator):
  ==> Shape [2, 4] Tensor

  # Create a [2, 3] batch of 4 x 4 linear operators.
-  tril = tf.random_normal(shape=[2, 3, 4, 4])
+  tril = tf.random.normal(shape=[2, 3, 4, 4])
  operator = LinearOperatorLowerTriangular(tril)
  ```

--- a/tensorflow/python/ops/linalg/linear_operator_test_util.py
+++ b/tensorflow/python/ops/linalg/linear_operator_test_util.py
@ -90,7 +90,7 @@ class LinearOperatorDerivedClassTest(test.TestCase):

  @property
  def _dtypes_to_test(self):
-    # TODO(langmore) Test tf.float16 once tf.matrix_solve works in 16bit.
+    # TODO(langmore) Test tf.float16 once tf.linalg.solve works in 16bit.
    return [dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128]

  @property
--- a/tensorflow/python/ops/linalg/linear_operator_util.py
+++ b/tensorflow/python/ops/linalg/linear_operator_util.py
@ -146,8 +146,8 @@ def broadcast_matrix_batch_dims(batch_matrices, name=None):
  Example broadcasting many batch dims

  ```python
-  x = tf.random_normal(shape=(2, 3, 1, 4, 4))
-  y = tf.random_normal(shape=(1, 3, 2, 5, 5))
+  x = tf.random.normal(shape=(2, 3, 1, 4, 4))
+  y = tf.random.normal(shape=(1, 3, 2, 5, 5))
  x_bc, y_bc = broadcast_matrix_batch_dims([x, y])

  x_bc.shape
@ -260,10 +260,10 @@ def matmul_with_broadcast(a,

  ```python
  # A 2-batch of 3x4 matrices
-  a = tf.random_normal(shape=(2, 3, 4))
+  a = tf.random.normal(shape=(2, 3, 4))

  # A single 4x5 matrix
-  b = tf.random_normal(shape=(4, 5))
+  b = tf.random.normal(shape=(4, 5))

  result = matmul_with_broadcast(a, b)

@ -365,7 +365,7 @@ def matrix_triangular_solve_with_broadcast(matrix,
                                           name=None):
  """Solves triangular systems of linear equations with by backsubstitution.

-  Works identically to `tf.matrix_triangular_solve`, but broadcasts batch dims
+  Works identically to `tf.linalg.triangular_solve`, but broadcasts batch dims
  of `matrix` and `rhs` (by replicating) if they are determined statically to be
  different, or if static shapes are not fully defined.  Thus, this may result
  in an inefficient replication of data.
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@ -89,8 +89,8 @@ def cholesky_solve(chol, rhs, name=None):
  # Solve 10 separate 2x2 linear systems:
  A = ... # shape 10 x 2 x 2
  RHS = ... # shape 10 x 2 x 1
-  chol = tf.cholesky(A)  # shape 10 x 2 x 2
-  X = tf.cholesky_solve(chol, RHS)  # shape 10 x 2 x 1
+  chol = tf.linalg.cholesky(A)  # shape 10 x 2 x 2
+  X = tf.linalg.cholesky_solve(chol, RHS)  # shape 10 x 2 x 1
  # tf.matmul(A, X) ~ RHS
  X[3, :, 0]  # Solution to the linear system A[3, :, :] x = RHS[3, :, 0]

@ -103,7 +103,7 @@ def cholesky_solve(chol, rhs, name=None):

  Args:
    chol:  A `Tensor`.  Must be `float32` or `float64`, shape is `[..., M, M]`.
-      Cholesky factorization of `A`, e.g. `chol = tf.cholesky(A)`.
+      Cholesky factorization of `A`, e.g. `chol = tf.linalg.cholesky(A)`.
      For that reason, only the lower triangular parts (including the diagonal)
      of the last two dimensions of `chol` are used.  The strictly upper part is
      assumed to be zero and not accessed.
--- a/tensorflow/python/ops/logging_ops.py
+++ b/tensorflow/python/ops/logging_ops.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Logging and Summary Operations."""
 # pylint: disable=protected-access
 from __future__ import absolute_import
@ -48,12 +47,10 @@ try:
 except NameError:
  pass

-
 # The python wrapper for Assert is in control_flow_ops, as the Assert
 # call relies on certain conditionals for its dependencies.  Use
 # control_flow_ops.Assert.

-
 # Assert and Print are special symbols in python, so we must
 # have an upper-case version of them.
 #
@ -64,15 +61,15 @@ except NameError:

 # pylint: disable=invalid-name
@deprecated("2018-08-20", "Use tf.print instead of tf.Print. Note that "
-                          "tf.print returns a no-output operator that directly "
-                          "prints the output. Outside of defuns or eager mode, "
-                          "this operator will not be executed unless it is "
-                          "directly specified in session.run or used as a "
-                          "control dependency for other operators. This is "
-                          "only a concern in graph mode. Below is an example "
-                          "of how to ensure tf.print executes in graph mode:\n"
-                          """```python
-    sess = tf.Session()
+            "tf.print returns a no-output operator that directly "
+            "prints the output. Outside of defuns or eager mode, "
+            "this operator will not be executed unless it is "
+            "directly specified in session.run or used as a "
+            "control dependency for other operators. This is "
+            "only a concern in graph mode. Below is an example "
+            "of how to ensure tf.print executes in graph mode:\n"
+            """```python
+    sess = tf.compat.v1.Session()
    with sess.as_default():
        tensor = tf.range(10)
        print_op = tf.print(tensor)
@ -86,8 +83,7 @@ the following:
  `from __future__ import print_function`
 """)
@tf_export(v1=["Print"])
-def Print(input_, data, message=None, first_n=None, summarize=None,
-          name=None):
+def Print(input_, data, message=None, first_n=None, summarize=None, name=None):
  """Prints a list of tensors.

  This is an identity op (behaves like `tf.identity`) with the side effect
@ -102,15 +98,17 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
    data: A list of tensors to print out when op is evaluated.
    message: A string, prefix of the error message.
    first_n: Only log `first_n` number of times. Negative numbers log always;
-             this is the default.
+      this is the default.
    summarize: Only print this many entries of each tensor. If None, then a
-               maximum of 3 elements are printed per input tensor.
+      maximum of 3 elements are printed per input tensor.
    name: A name for the operation (optional).

  Returns:
    A `Tensor`. Has the same type and contents as `input_`.
  """
  return gen_logging_ops._print(input_, data, message, first_n, summarize, name)
+
+
 # pylint: enable=invalid-name


@ -159,7 +157,7 @@ def print_v2(*inputs, **kwargs):
  Example:
    Single-input usage:
    ```python
-    tf.enable_eager_execution()
+    tf.compat.v1.enable_eager_execution()
    tensor = tf.range(10)
    tf.print(tensor, output_stream=sys.stderr)
    ```
@ -167,7 +165,7 @@ def print_v2(*inputs, **kwargs):

    Multi-input usage:
    ```python
-    tf.enable_eager_execution()
+    tf.compat.v1.enable_eager_execution()
    tensor = tf.range(10)
    tf.print("tensors:", tensor, {2: tensor * 2}, output_stream=sys.stdout)
    ```
@ -176,7 +174,7 @@ def print_v2(*inputs, **kwargs):

    Usage in a defun:
    ```python
-    tf.enable_eager_execution()
+    tf.compat.v1.enable_eager_execution()

    @tf.contrib.eager.defun
    def f():
@ -190,7 +188,7 @@ def print_v2(*inputs, **kwargs):

    Usage when constructing graphs:
    ```python
-    sess = tf.Session()
+    sess = tf.compat.v1.Session()
    with sess.as_default():
        tensor = tf.range(10)
        print_op = tf.print("tensors:", tensor, {2: tensor * 2},
@ -208,12 +206,13 @@ def print_v2(*inputs, **kwargs):
  Args:
    *inputs: Positional arguments that are the inputs to print. Inputs in the
      printed output will be separated by spaces. Inputs may be python
-      primitives, tensors, data structures such as dicts and lists that
-      may contain tensors (with the data structures possibly nested in
-      arbitrary ways), and printable python objects.
+      primitives, tensors, data structures such as dicts and lists that may
+      contain tensors (with the data structures possibly nested in arbitrary
+      ways), and printable python objects.
    output_stream: The output stream, logging level, or file to print to.
-      Defaults to sys.stderr, but sys.stdout, tf.logging.info,
-      tf.logging.warning, and tf.logging.error are also supported. To print to
+      Defaults to sys.stderr, but sys.stdout, tf.compat.v1.logging.info,
+      tf.compat.v1.logging.warning, and tf.compat.v1.logging.error are also
+      supported. To print to
      a file, pass a string started with "file://" followed by the file path,
      e.g., "file:///tmp/foo.out".
    summarize: The first and last `summarize` elements within each dimension are
@ -261,17 +260,17 @@ def print_v2(*inputs, **kwargs):
  else:
    output_stream_string = output_stream_to_constant.get(output_stream)
    if not output_stream_string:
-      raise ValueError(
-          "Unsupported output stream, logging level, or file." +
-          str(output_stream) + ". Supported streams are sys.stdout, "
-          "sys.stderr, tf.logging.info, "
-          "tf.logging.warning, tf.logging.error. " +
-          "File needs to be in the form of 'file://<filepath>'.")
+      raise ValueError("Unsupported output stream, logging level, or file." +
+                       str(output_stream) +
+                       ". Supported streams are sys.stdout, "
+                       "sys.stderr, tf.logging.info, "
+                       "tf.logging.warning, tf.logging.error. " +
+                       "File needs to be in the form of 'file://<filepath>'.")

  # If we are only printing a single string scalar, there is no need to format
-  if (len(inputs) == 1 and tensor_util.is_tensor(inputs[0])
-      and (not isinstance(inputs[0], sparse_tensor.SparseTensor))
-      and (inputs[0].shape.ndims == 0)and (inputs[0].dtype == dtypes.string)):
+  if (len(inputs) == 1 and tensor_util.is_tensor(inputs[0]) and
+      (not isinstance(inputs[0], sparse_tensor.SparseTensor)) and
+      (inputs[0].shape.ndims == 0) and (inputs[0].dtype == dtypes.string)):
    formatted_string = inputs[0]
  # Otherwise, we construct an appropriate template for the tensors we are
  # printing, and format the template using those tensors.
@ -282,10 +281,9 @@ def print_v2(*inputs, **kwargs):
    templates = []
    tensors = []
    tensor_free_structure = nest.map_structure(
-        lambda x: "" if tensor_util.is_tensor(x) else x,
-        inputs)
-    tensor_free_template = " ".join(pprint.pformat(x)
-                                    for x in tensor_free_structure)
+        lambda x: "" if tensor_util.is_tensor(x) else x, inputs)
+    tensor_free_template = " ".join(
+        pprint.pformat(x) for x in tensor_free_structure)
    placeholder = _generate_placeholder_string(tensor_free_template)

    for input_ in inputs:
@ -300,8 +298,7 @@ def print_v2(*inputs, **kwargs):
          tensors.extend([x.indices, x.values, x.dense_shape])
          placeholders.append(
              "SparseTensor(indices={}, values={}, shape={})".format(
-                  placeholder, placeholder, placeholder)
-          )
+                  placeholder, placeholder, placeholder))
        elif tensor_util.is_tensor(x):
          tensors.append(x)
          placeholders.append(placeholder)
@ -335,13 +332,16 @@ def print_v2(*inputs, **kwargs):
    template = " ".join(templates)
    template = template.replace("'" + placeholder + "'", placeholder)
    formatted_string = string_ops.string_format(
-        inputs=tensors, template=template, placeholder=placeholder,
+        inputs=tensors,
+        template=template,
+        placeholder=placeholder,
        summarize=summarize,
        name=format_name)

-  return gen_logging_ops.print_v2(formatted_string,
-                                  output_stream=output_stream_string,
-                                  name=name)
+  return gen_logging_ops.print_v2(
+      formatted_string, output_stream=output_stream_string, name=name)
+
+
 # pylint: enable=g-doc-args


@ -369,7 +369,8 @@ def histogram_summary(tag, values, collections=None, name=None):
  This ops is deprecated. Please switch to tf.summary.histogram.

  For an explanation of why this op was deprecated, and information on how to
-  migrate, look ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)
+  migrate, look
+  ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)

  The generated
  [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
@ -379,8 +380,8 @@ def histogram_summary(tag, values, collections=None, name=None):

  Args:
    tag: A `string` `Tensor`. 0-D.  Tag to use for the summary value.
-    values: A real numeric `Tensor`. Any shape. Values to use to
-      build the histogram.
+    values: A real numeric `Tensor`. Any shape. Values to use to build the
+      histogram.
    collections: Optional list of graph collections keys. The new summary op is
      added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
    name: A name for the operation (optional).
@ -390,8 +391,7 @@ def histogram_summary(tag, values, collections=None, name=None):
    buffer.
  """
  with ops.name_scope(name, "HistogramSummary", [tag, values]) as scope:
-    val = gen_logging_ops.histogram_summary(
-        tag=tag, values=values, name=scope)
+    val = gen_logging_ops.histogram_summary(tag=tag, values=values, name=scope)
    _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
  return val

@ -407,7 +407,8 @@ def image_summary(tag, tensor, max_images=3, collections=None, name=None):
  """Outputs a `Summary` protocol buffer with images.

  For an explanation of why this op was deprecated, and information on how to
-  migrate, look ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)
+  migrate, look
+  ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)

  The summary has up to `max_images` summary values containing images. The
  images are built from `tensor` which must be 4-D with shape `[batch_size,
@ -437,8 +438,8 @@ def image_summary(tag, tensor, max_images=3, collections=None, name=None):
     generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.

  Args:
-    tag: A scalar `Tensor` of type `string`. Used to build the `tag`
-      of the summary values.
+    tag: A scalar `Tensor` of type `string`. Used to build the `tag` of the
+      summary values.
    tensor: A 4-D `uint8` or `float32` `Tensor` of shape `[batch_size, height,
      width, channels]` where `channels` is 1, 3, or 4.
    max_images: Max number of batch elements to generate images for.
@ -473,7 +474,8 @@ def audio_summary(tag,

  This op is deprecated. Please switch to tf.summary.audio.
  For an explanation of why this op was deprecated, and information on how to
-  migrate, look ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)
+  migrate, look
+  ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)

  The summary has up to `max_outputs` summary values containing audio. The
  audio is built from `tensor` which must be 3-D with shape `[batch_size,
@ -489,8 +491,8 @@ def audio_summary(tag,
     generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.

  Args:
-    tag: A scalar `Tensor` of type `string`. Used to build the `tag`
-      of the summary values.
+    tag: A scalar `Tensor` of type `string`. Used to build the `tag` of the
+      summary values.
    tensor: A 3-D `float32` `Tensor` of shape `[batch_size, frames, channels]`
      or a 2-D `float32` `Tensor` of shape `[batch_size, frames]`.
    sample_rate: A Scalar `float32` `Tensor` indicating the sample rate of the
@ -505,8 +507,8 @@ def audio_summary(tag,
    buffer.
  """
  with ops.name_scope(name, "AudioSummary", [tag, tensor]) as scope:
-    sample_rate = ops.convert_to_tensor(sample_rate, dtype=dtypes.float32,
-                                        name="sample_rate")
+    sample_rate = ops.convert_to_tensor(
+        sample_rate, dtype=dtypes.float32, name="sample_rate")
    val = gen_logging_ops.audio_summary_v2(
        tag=tag,
        tensor=tensor,
@ -522,7 +524,8 @@ def merge_summary(inputs, collections=None, name=None):
  # pylint: disable=line-too-long
  """Merges summaries.

-  This op is deprecated. Please switch to tf.summary.merge, which has identical
+  This op is deprecated. Please switch to tf.compat.v1.summary.merge, which has
+  identical
  behavior.

  This op creates a
@ -554,7 +557,8 @@ def merge_summary(inputs, collections=None, name=None):
 def merge_all_summaries(key=ops.GraphKeys.SUMMARIES):
  """Merges all summaries collected in the default graph.

-  This op is deprecated. Please switch to tf.summary.merge_all, which has
+  This op is deprecated. Please switch to tf.compat.v1.summary.merge_all, which
+  has
  identical behavior.

  Args:
@ -610,7 +614,8 @@ def scalar_summary(tags, values, collections=None, name=None):

  This ops is deprecated. Please switch to tf.summary.scalar.
  For an explanation of why this op was deprecated, and information on how to
-  migrate, look ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)
+  migrate, look
+  ['here'](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/deprecated/__init__.py)

  The input `tags` and `values` must have the same shape.  The generated
  summary has a summary value for each tag-value pair in `tags` and `values`.
@ -631,6 +636,7 @@ def scalar_summary(tags, values, collections=None, name=None):
    _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
  return val

+
 ops.NotDifferentiable("HistogramSummary")
 ops.NotDifferentiable("ImageSummary")
 ops.NotDifferentiable("AudioSummary")
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@ -66,7 +66,8 @@ def initialize_all_tables(name="init_all_tables"):
 def tables_initializer(name="init_all_tables"):
  """Returns an Op that initializes all tables of the default graph.

-  See the [Low Level Intro](https://www.tensorflow.org/guide/low_level_intro#feature_columns)
+  See the [Low Level
+  Intro](https://www.tensorflow.org/guide/low_level_intro#feature_columns)
  guide, for an example of usage.

  Args:
@ -166,8 +167,7 @@ class InitializableLookupTableBase(LookupInterface):
        default_value, dtype=self._value_dtype)
    self._default_value.get_shape().merge_with(tensor_shape.scalar())
    if isinstance(initializer, trackable_base.Trackable):
-      self._initializer = self._track_trackable(
-          initializer, "_initializer")
+      self._initializer = self._track_trackable(initializer, "_initializer")
    with ops.init_scope():
      self._resource_handle = self._create_resource()
      self._init_op = self._initialize()
@ -219,8 +219,9 @@ class InitializableLookupTableBase(LookupInterface):
    with ops.name_scope(
        name, "%s_Lookup" % self.name,
        (self.resource_handle, key_tensor, self._default_value)):
-      values = gen_lookup_ops.lookup_table_find_v2(
-          self.resource_handle, key_tensor, self._default_value)
+      values = gen_lookup_ops.lookup_table_find_v2(self.resource_handle,
+                                                   key_tensor,
+                                                   self._default_value)

    values.set_shape(key_tensor.get_shape())
    if isinstance(keys, sparse_tensor.SparseTensor):
@ -411,8 +412,9 @@ class KeyValueTensorInitializer(TableInitializerBase):
    with ops.name_scope(
        self._name, values=(table.resource_handle, self._keys, self._values)):
      if fwd_compat.forward_compatible(2018, 9, 19):
-        init_op = gen_lookup_ops.lookup_table_import_v2(
-            table.resource_handle, self._keys, self._values)
+        init_op = gen_lookup_ops.lookup_table_import_v2(table.resource_handle,
+                                                        self._keys,
+                                                        self._values)
      else:
        # To maintain forward compatibiltiy, use the old implementation.
        init_op = gen_lookup_ops.initialize_table_v2(table.resource_handle,
@ -521,9 +523,9 @@ class TextFileInitializer(TableInitializerBase):
      on `delimiter`.

    Args:
-      filename: The filename of the text file to be used for initialization.
-        The path must be accessible from wherever the graph is initialized
-        (eg. trainer or eval workers). The filename may be a scalar `Tensor`.
+      filename: The filename of the text file to be used for initialization. The
+        path must be accessible from wherever the graph is initialized (eg.
+        trainer or eval workers). The filename may be a scalar `Tensor`.
      key_dtype: The `key` data type.
      key_index: the index that represents information of a line to get the
        table 'key' values from.
@ -575,8 +577,7 @@ class TextFileInitializer(TableInitializerBase):
    self._delimiter = delimiter
    self._name = name
    self._filename = self._track_trackable(
-        trackable.TrackableAsset(filename),
-        "_filename")
+        trackable.TrackableAsset(filename), "_filename")

    super(TextFileInitializer, self).__init__(key_dtype, value_dtype)

@ -649,13 +650,13 @@ class TextFileStringTableInitializer(TextFileInitializer):
      on `delimiter`.

    Args:
-      filename: The filename of the text file to be used for initialization.
-        The path must be accessible from wherever the graph is initialized
-        (eg. trainer or eval workers). The filename may be a scalar `Tensor`.
+      filename: The filename of the text file to be used for initialization. The
+        path must be accessible from wherever the graph is initialized (eg.
+        trainer or eval workers). The filename may be a scalar `Tensor`.
      key_column_index: The column index from the text file to get the keys
        from. The default is to use the line number, starting from zero.
-      value_column_index: The column index from the text file to get the
-        values from. The default is to use the whole line content.
+      value_column_index: The column index from the text file to get the values
+        from. The default is to use the whole line content.
      vocab_size: The number of elements in the file, if known.
      delimiter: The delimiter to separate fields in a line.
      name: Optional name for the op.
@ -701,9 +702,9 @@ class TextFileIdTableInitializer(TextFileInitializer):
      on `delimiter`.

    Args:
-      filename: The filename of the text file to be used for initialization.
-        The path must be accessible from wherever the graph is initialized
-        (eg. trainer or eval workers). The filename may be a scalar `Tensor`.
+      filename: The filename of the text file to be used for initialization. The
+        path must be accessible from wherever the graph is initialized (eg.
+        trainer or eval workers). The filename may be a scalar `Tensor`.
      key_column_index: The column index from the text file to get the `key`
        values from. The default is to use the whole line content.
      value_column_index: The column index from the text file to get the `value`
@ -832,8 +833,8 @@ class IdTableWithHashBuckets(LookupInterface):
        assignation of out-of-vocabulary buckets  (optional).
      name: A name for the operation (optional).
      key_dtype: Data type of keys passed to `lookup`. Defaults to
-        `table.key_dtype` if `table` is specified, otherwise `tf.string`.
-        Must be string or integer, and must be castable to `table.key_dtype`.
+        `table.key_dtype` if `table` is specified, otherwise `tf.string`. Must
+        be string or integer, and must be castable to `table.key_dtype`.

    Raises:
      ValueError: when `table` in None and `num_oov_buckets` is not positive.
@ -866,13 +867,13 @@ class IdTableWithHashBuckets(LookupInterface):
      self._table = None
      name = name or "hash_bucket"
    if (not key_dtype.is_integer) and (dtypes.string != key_dtype):
-      raise TypeError(
-          "Invalid key_dtype, expected integer or string, got %s." % key_dtype)
+      raise TypeError("Invalid key_dtype, expected integer or string, got %s." %
+                      key_dtype)
    self._num_oov_buckets = num_oov_buckets

    if not isinstance(hasher_spec, HasherSpec):
-      raise TypeError(
-          "hasher_spec must be of type HasherSpec, got %s" % hasher_spec)
+      raise TypeError("hasher_spec must be of type HasherSpec, got %s" %
+                      hasher_spec)
    self._hasher_spec = hasher_spec
    if name:
      self._table_name = name.split("/")[-1]
@ -1191,7 +1192,8 @@ def index_table_from_file(vocabulary_file=None,
  `[vocabulary size, vocabulary size + num_oov_buckets - 1]`.

  The underlying table must be initialized by calling
-  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
+  `session.run(tf.compat.v1.tables_initializer)` or `session.run(table.init)`
+  once.

  To specify multi-column vocabulary files, use key_column_index and
  value_column_index and delimiter.
@ -1219,7 +1221,7 @@ def index_table_from_file(vocabulary_file=None,
      vocabulary_file="test.txt", num_oov_buckets=1)
  ids = table.lookup(features)
  ...
-  tf.tables_initializer().run()
+  tf.compat.v1.tables_initializer().run()

  ids.eval()  ==> [0, 1, 3, 2]  # where 3 is the out-of-vocabulary bucket
  ```
@ -1248,12 +1250,13 @@ def index_table_from_file(vocabulary_file=None,
    ValueError: If `num_oov_buckets` is negative or `vocab_size` is not greater
      than zero.
  """
-  if vocabulary_file is None or (
-      isinstance(vocabulary_file, six.string_types) and not vocabulary_file):
+  if vocabulary_file is None or (isinstance(vocabulary_file, six.string_types)
+                                 and not vocabulary_file):
    raise ValueError("vocabulary_file must be specified and must not be empty.")
  if num_oov_buckets < 0:
-    raise ValueError("num_oov_buckets must be greater or equal than 0, got %d."
-                     % num_oov_buckets)
+    raise ValueError(
+        "num_oov_buckets must be greater or equal than 0, got %d." %
+        num_oov_buckets)
  if vocab_size is not None and vocab_size < 1:
    vocab_file_value = vocabulary_file
    if isinstance(vocabulary_file, ops.Tensor):
@ -1305,7 +1308,8 @@ def index_table_from_tensor(vocabulary_list,
  `[vocabulary list size, vocabulary list size + num_oov_buckets - 1]`.

  The underlying table must be initialized by calling
-  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
+  `session.run(tf.compat.v1.tables_initializer)` or `session.run(table.init)`
+  once.

  Elements in `vocabulary_list` cannot have duplicates, otherwise when executing
  the table initializer op, it will throw a `FailedPreconditionError`.
@ -1319,7 +1323,7 @@ def index_table_from_tensor(vocabulary_list,
  features = tf.constant(["emerson", "lake", "and", "palmer"])
  ids = table.lookup(features)
  ...
-  tf.tables_initializer().run()
+  tf.compat.v1.tables_initializer().run()

  ids.eval()  ==> [0, 1, 4, 2]
  ```
@ -1347,8 +1351,9 @@ def index_table_from_tensor(vocabulary_list,
    raise ValueError("vocabulary_list must be specified.")

  if num_oov_buckets < 0:
-    raise ValueError("num_oov_buckets must be greater or equal than 0, got %d."
-                     % num_oov_buckets)
+    raise ValueError(
+        "num_oov_buckets must be greater or equal than 0, got %d." %
+        num_oov_buckets)

  if (not dtype.is_integer) and (dtypes.string != dtype.base_dtype):
    raise TypeError("Only integer and string keys are supported.")
@ -1356,9 +1361,9 @@ def index_table_from_tensor(vocabulary_list,
  with ops.name_scope(name, "string_to_index"):
    keys = ops.convert_to_tensor(vocabulary_list)
    if keys.dtype.is_integer != dtype.is_integer:
-      raise ValueError("Expected %s, got %s." %
-                       ("integer"
-                        if dtype.is_integer else "non-integer", keys.dtype))
+      raise ValueError(
+          "Expected %s, got %s." %
+          ("integer" if dtype.is_integer else "non-integer", keys.dtype))
    if (not dtype.is_integer) and (keys.dtype.base_dtype != dtype):
      raise ValueError("Expected %s, got %s." % (dtype, keys.dtype))
    num_elements = array_ops.size(keys)
@ -1401,7 +1406,8 @@ def index_to_string_table_from_file(vocabulary_file,
  (an out-of-vocabulary entry) is assigned the `default_value`

  The underlying table must be initialized by calling
-  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
+  `session.run(tf.compat.v1.tables_initializer)` or `session.run(table.init)`
+  once.

  To specify multi-column vocabulary files, use key_column_index and
  value_column_index and delimiter.
@ -1429,7 +1435,7 @@ def index_to_string_table_from_file(vocabulary_file,
      vocabulary_file="test.txt", default_value="UNKNOWN")
  values = table.lookup(indices)
  ...
-  tf.tables_initializer().run()
+  tf.compat.v1.tables_initializer().run()

  values.eval() ==> ["lake", "UNKNOWN"]
  ```
@ -1453,8 +1459,8 @@ def index_to_string_table_from_file(vocabulary_file,
    ValueError: when `vocabulary_file` is empty.
    ValueError: when `vocab_size` is invalid.
  """
-  if vocabulary_file is None or (
-      isinstance(vocabulary_file, six.string_types) and not vocabulary_file):
+  if vocabulary_file is None or (isinstance(vocabulary_file, six.string_types)
+                                 and not vocabulary_file):
    raise ValueError("vocabulary_file must be specified and must not be empty.")

  if vocab_size is not None and vocab_size < 1:
@ -1487,7 +1493,8 @@ def index_to_string_table_from_tensor(vocabulary_list,
  (an out-of-vocabulary entry) is assigned the `default_value`

  The underlying table must be initialized by calling
-  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
+  `session.run(tf.compat.v1.tables_initializer)` or `session.run(table.init)`
+  once.

  Elements in `vocabulary_list` cannot have duplicates, otherwise when executing
  the table initializer op, it will throw a `FailedPreconditionError`.
@ -1501,7 +1508,7 @@ def index_to_string_table_from_tensor(vocabulary_list,
      vocabulary_list, default_value="UNKNOWN")
  values = table.lookup(indices)
  ...
-  tf.tables_initializer().run()
+  tf.compat.v1.tables_initializer().run()

  values.eval() ==> ["lake", "UNKNOWN"]
  ```
@ -1761,8 +1768,9 @@ class MutableHashTable(LookupInterface):
      # pylint: disable=protected-access
      with ops.name_scope(name, "%s_table_restore" % self.name):
        with ops.colocate_with(self.op.resource_handle):
-          return gen_lookup_ops.lookup_table_import_v2(
-              self.op.resource_handle, restored_tensors[0], restored_tensors[1])
+          return gen_lookup_ops.lookup_table_import_v2(self.op.resource_handle,
+                                                       restored_tensors[0],
+                                                       restored_tensors[1])


@tf_export("lookup.experimental.DenseHashTable")
@ -2052,8 +2060,9 @@ class DenseHashTable(LookupInterface):
      # pylint: disable=protected-access
      with ops.name_scope(name, "%s_table_restore" % self.name):
        with ops.colocate_with(self.op.resource_handle):
-          return gen_lookup_ops.lookup_table_import_v2(
-              self.op.resource_handle, restored_tensors[0], restored_tensors[1])
+          return gen_lookup_ops.lookup_table_import_v2(self.op.resource_handle,
+                                                       restored_tensors[0],
+                                                       restored_tensors[1])


 ops.NotDifferentiable("LookupTableFind")
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@ -48,7 +48,7 @@ For example:

 ```python
 c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-tf.segment_sum(c, tf.constant([0, 0, 1]))
+tf.math.segment_sum(c, tf.constant([0, 0, 1]))
 #  ==>  [[0 0 0 0]
 #        [5 6 7 8]]
 ```
@ -60,7 +60,7 @@ tensor can be efficiently allocated.

 ``` python
 c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-tf.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2)
+tf.math.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2)
 # ==> [[ 6,  8, 10, 12],
 #       [-1, -2, -3, -4]]
 ```
@ -126,37 +126,35 @@ def _set_doc(doc):
@deprecation.deprecated_args(None, "Use the `axis` argument instead",
                             "dimension")
@_set_doc(
-    gen_math_ops.arg_max.__doc__.replace("dimensions", "axes").replace(
-        "dimension", "axis"))
+    gen_math_ops.arg_max.__doc__.replace("dimensions",
+                                         "axes").replace("dimension", "axis"))
 def argmax(input,
           axis=None,
           name=None,
           dimension=None,
           output_type=dtypes.int64):
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "dimension", dimension)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "dimension",
+                                                dimension)
  return argmax_v2(input, axis, output_type, name)


@tf_export("math.argmax", "argmax", v1=[])
-def argmax_v2(input,
-              axis=None,
-              output_type=dtypes.int64,
-              name=None):
+def argmax_v2(input, axis=None, output_type=dtypes.int64, name=None):
  """Returns the index with the largest value across axes of a tensor.

  Note that in case of ties the identity of the return value is not guaranteed.

  Args:
    input: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-    `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`, `quint8`,
-    `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`, `uint64`.
+      `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`,
+      `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`,
+      `uint64`.
    axis: A `Tensor`. Must be one of the following types: `int32`, `int64`.
      int32 or int64, must be in the range `-rank(input), rank(input))`.
      Describes which axis of the input Tensor to reduce across. For vectors,
      use axis = 0.
-    output_type: An optional `tf.DType` from: `tf.int32, tf.int64`.
-      Defaults to `tf.int64`.
+    output_type: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to
+      `tf.int64`.
    name: A name for the operation (optional).

  Returns:
@ -181,37 +179,35 @@ def argmax_v2(input,
@deprecation.deprecated_args(None, "Use the `axis` argument instead",
                             "dimension")
@_set_doc(
-    gen_math_ops.arg_min.__doc__.replace("dimensions", "axes").replace(
-        "dimension", "axis"))
+    gen_math_ops.arg_min.__doc__.replace("dimensions",
+                                         "axes").replace("dimension", "axis"))
 def argmin(input,
           axis=None,
           name=None,
           dimension=None,
           output_type=dtypes.int64):
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "dimension", dimension)
+  axis = deprecation.deprecated_argument_lookup("axis", axis, "dimension",
+                                                dimension)
  return argmin_v2(input, axis, output_type, name)


@tf_export("math.argmin", "argmin", v1=[])
-def argmin_v2(input,
-              axis=None,
-              output_type=dtypes.int64,
-              name=None):
+def argmin_v2(input, axis=None, output_type=dtypes.int64, name=None):
  """Returns the index with the smallest value across axes of a tensor.

  Note that in case of ties the identity of the return value is not guaranteed.

  Args:
    input: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-    `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`, `quint8`,
-    `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`, `uint64`.
+      `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`,
+      `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`,
+      `uint64`.
    axis: A `Tensor`. Must be one of the following types: `int32`, `int64`.
      int32 or int64, must be in the range `-rank(input), rank(input))`.
      Describes which axis of the input Tensor to reduce across. For vectors,
      use axis = 0.
-    output_type: An optional `tf.DType` from: `tf.int32, tf.int64`.
-      Defaults to `tf.int64`.
+    output_type: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to
+      `tf.int64`.
    name: A name for the operation (optional).

  Returns:
@ -242,8 +238,8 @@ def argmin_v2(input,
 def abs(x, name=None):  # pylint: disable=redefined-builtin
  r"""Computes the absolute value of a tensor.

-  Given a tensor of integer or floating-point values, this operation returns a 
-  tensor of the same type, where each element contains the absolute value of the 
+  Given a tensor of integer or floating-point values, this operation returns a
+  tensor of the same type, where each element contains the absolute value of the
  corresponding element in the input.

  Given a tensor `x` of complex numbers, this operation returns a tensor of type
@ -261,7 +257,7 @@ def abs(x, name=None):  # pylint: disable=redefined-builtin
    name: A name for the operation (optional).

  Returns:
-    A `Tensor` or `SparseTensor` the same size, type, and sparsity as `x` with 
+    A `Tensor` or `SparseTensor` the same size, type, and sparsity as `x` with
      absolute values.
    Note, for `complex64` or `complex128` input, the returned `Tensor` will be
      of type `float32` or `float64`, respectively.
@ -271,6 +267,8 @@ def abs(x, name=None):  # pylint: disable=redefined-builtin
    if x.dtype.is_complex:
      return gen_math_ops.complex_abs(x, Tout=x.dtype.real_dtype, name=name)
    return gen_math_ops._abs(x, name=name)
+
+
 # pylint: enable=g-docstring-has-escape


@ -359,7 +357,6 @@ def _sub(x, y, name=None):
 _sub.__doc__ = (
    gen_math_ops.sub.__doc__ + ("" if _sub.__doc__ is None else _sub.__doc__))

-
 negative = gen_math_ops.neg


@ -410,8 +407,8 @@ def scalar_mul(scalar, x, name=None):
  shape = scalar.get_shape()
  if shape.ndims == 0:
    if isinstance(x, ops.IndexedSlices):
-      return ops.IndexedSlices(gen_math_ops.mul(scalar, x.values, name),
-                               x.indices, x.dense_shape)
+      return ops.IndexedSlices(
+          gen_math_ops.mul(scalar, x.values, name), x.indices, x.dense_shape)
    else:
      return gen_math_ops.mul(scalar, x, name)
  else:
@ -441,9 +438,9 @@ def pow(x, y, name=None):  # pylint: disable=redefined-builtin

  Args:
    x: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`,
-     `complex64`, or `complex128`.
+      `complex64`, or `complex128`.
    y: A `Tensor` of type `float16`, `float32`, `float64`, `int32`, `int64`,
-     `complex64`, or `complex128`.
+      `complex64`, or `complex128`.
    name: A name for the operation (optional).

  Returns:
@ -475,8 +472,7 @@ def complex(real, imag, name=None):
  ```

  Args:
-    real: A `Tensor`. Must be one of the following types: `float32`,
-      `float64`.
+    real: A `Tensor`. Must be one of the following types: `float32`, `float64`.
    imag: A `Tensor`. Must have the same type as `real`.
    name: A name for the operation (optional).

@ -510,7 +506,7 @@ def real(input, name=None):

  ```python
  x = tf.constant([-2.25 + 4.75j, 3.25 + 5.75j])
-  tf.real(x)  # [-2.25, 3.25]
+  tf.math.real(x)  # [-2.25, 3.25]
  ```

  If `input` is already real, it is returned unchanged.
@ -544,7 +540,7 @@ def imag(input, name=None):

  ```python
  x = tf.constant([-2.25 + 4.75j, 3.25 + 5.75j])
-  tf.imag(x)  # [4.75, 5.75]
+  tf.math.imag(x)  # [4.75, 5.75]
  ```

  Args:
@ -582,7 +578,7 @@ def angle(input, name=None):

  ```
  input = tf.constant([-2.25 + 4.75j, 3.25 + 5.75j], dtype=tf.complex64)
-  tf.angle(input).numpy()
+  tf.math.angle(input).numpy()
  # ==> array([2.0131705, 1.056345 ], dtype=float32)
  ```

@ -719,17 +715,16 @@ def saturate_cast(value, dtype, name=None):
    value = ops.convert_to_tensor(value, name="value")
    dtype = dtypes.as_dtype(dtype).base_dtype
    if value.dtype.min < dtype.min:
-      value = gen_math_ops.maximum(value,
-                                   ops.convert_to_tensor(
-                                       dtype.min, dtype=value.dtype,
-                                       name="min"))
+      value = gen_math_ops.maximum(
+          value,
+          ops.convert_to_tensor(dtype.min, dtype=value.dtype, name="min"))
    if value.dtype.max > dtype.max:
-      value = gen_math_ops.minimum(value,
-                                   ops.convert_to_tensor(
-                                       dtype.max, dtype=value.dtype,
-                                       name="max"))
+      value = gen_math_ops.minimum(
+          value,
+          ops.convert_to_tensor(dtype.max, dtype=value.dtype, name="max"))
    return cast(value, dtype, name=name)

+
@deprecation.deprecated(date=None, instructions="Use `tf.cast` instead.")
@tf_export(v1=["to_float"])
 def to_float(x, name="ToFloat"):
@ -889,8 +884,8 @@ def _OverrideBinaryOperatorHelper(func, op_name, clazz_object=ops.Tensor):
        return func(x, y, name=name)
      elif not isinstance(y, sparse_tensor.SparseTensor):
        try:
-          y = ops.convert_to_tensor_v2(y, dtype_hint=x.dtype.base_dtype,
-                                       name="y")
+          y = ops.convert_to_tensor_v2(
+              y, dtype_hint=x.dtype.base_dtype, name="y")
        except TypeError:
          # If the RHS is not a tensor, it might be a tensor aware object
          # that can implement the operator with knowledge of itself
@ -904,13 +899,10 @@ def _OverrideBinaryOperatorHelper(func, op_name, clazz_object=ops.Tensor):
  def binary_op_wrapper_sparse(sp_x, y):
    with ops.name_scope(None, op_name, [sp_x, y]) as name:
      y = ops.convert_to_tensor(y, dtype=sp_x.dtype.base_dtype, name="y")
-      return sparse_tensor.SparseTensor(sp_x.indices,
-                                        func(
-                                            sp_x.indices,
-                                            sp_x.values,
-                                            sp_x.dense_shape,
-                                            y,
-                                            name=name), sp_x.dense_shape)
+      return sparse_tensor.SparseTensor(
+          sp_x.indices,
+          func(sp_x.indices, sp_x.values, sp_x.dense_shape, y, name=name),
+          sp_x.dense_shape)

  def r_binary_op_wrapper(y, x):
    with ops.name_scope(None, op_name, [x, y]) as name:
@ -999,12 +991,15 @@ def _truediv_python3(x, y, name=None):


 def _div_python2(x, y, name=None):
-  """Divide two values using Python 2 semantics. Used for Tensor.__div__.
+  """Divide two values using Python 2 semantics.
+
+  Used for Tensor.__div__.

  Args:
    x: `Tensor` numerator of real numeric type.
    y: `Tensor` denominator of real numeric type.
    name: A name for the operation (optional).
+
  Returns:
    `x / y` returns the quotient of x and y.
  """
@ -1075,6 +1070,7 @@ def div(x, y, name=None):
    x: `Tensor` numerator of real numeric type.
    y: `Tensor` denominator of real numeric type.
    name: A name for the operation (optional).
+
  Returns:
    `x / y` returns the quotient of x and y.
  """
@ -1091,6 +1087,7 @@ def div_no_nan(x, y, name=None):
    x: A `Tensor`. Must be one of the following types: `float32`, `float64`.
    y: A `Tensor` whose dtype is compatible with `x`.
    name: A name for the operation (optional).
+
  Returns:
    The element-wise value of the x divided by y.
  """
@ -1126,8 +1123,8 @@ def multiply_no_nan(x, y, name=None):
    x_dtype = x.dtype.base_dtype
    y_dtype = y.dtype.base_dtype
    if x_dtype != y_dtype:
-      raise TypeError(
-          "x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype))
+      raise TypeError("x and y must have the same dtype, got %r != %r" %
+                      (x_dtype, y_dtype))
    return gen_math_ops.mul_no_nan(x, y, name=name)


@ -1143,7 +1140,8 @@ mod = gen_math_ops.floor_mod
 def floordiv(x, y, name=None):
  """Divides `x / y` elementwise, rounding toward the most negative integer.

-  The same as `tf.div(x,y)` for integers, but uses `tf.floor(tf.div(x,y))` for
+  The same as `tf.compat.v1.div(x,y)` for integers, but uses
+  `tf.floor(tf.compat.v1.div(x,y))` for
  floating point arguments so that the result is always an integer (though
  possibly an integer represented as floating point).  This op is generated by
  `x // y` floor division in Python 3 and in Python 2.7 with
@ -1260,14 +1258,13 @@ def range(start, limit=None, delta=1, dtype=None, name="range"):  # pylint: disa
  ```

  Args:
-    start: A 0-D `Tensor` (scalar). Acts as first entry in the range if
-      `limit` is not None; otherwise, acts as range limit and first entry
+    start: A 0-D `Tensor` (scalar). Acts as first entry in the range if `limit`
+      is not None; otherwise, acts as range limit and first entry defaults to 0.
+    limit: A 0-D `Tensor` (scalar). Upper limit of sequence, exclusive. If None,
+      defaults to the value of `start` while the first entry of the range
      defaults to 0.
-    limit: A 0-D `Tensor` (scalar). Upper limit of sequence,
-      exclusive. If None, defaults to the value of `start` while the first
-      entry of the range defaults to 0.
-    delta: A 0-D `Tensor` (scalar). Number that increments
-      `start`. Defaults to 1.
+    delta: A 0-D `Tensor` (scalar). Number that increments `start`. Defaults to
+      1.
    dtype: The type of the elements of the resulting tensor.
    name: A name for the operation. Defaults to "range".

@ -1292,9 +1289,8 @@ def range(start, limit=None, delta=1, dtype=None, name="range"):  # pylint: disa
          dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
      ]
      assert all(arg.dtype in dtype_hierarchy for arg in [start, limit, delta])
-      inferred_dtype = max(
-          [arg.dtype for arg in [start, limit, delta]],
-          key=dtype_hierarchy.index)
+      inferred_dtype = max([arg.dtype for arg in [start, limit, delta]],
+                           key=dtype_hierarchy.index)

      start = cast(start, inferred_dtype)
      limit = cast(limit, inferred_dtype)
@ -1336,8 +1332,9 @@ def _may_reduce_to_scalar(keepdims, axis, output):


@tf_export(v1=["math.reduce_sum", "reduce_sum"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_sum_v1(input_tensor,
                  axis=None,
                  keepdims=None,
@ -1367,9 +1364,9 @@ def reduce_sum_v1(input_tensor,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
    keepdims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
    reduction_indices: The old (deprecated) name for axis.
@ -1383,8 +1380,9 @@ def reduce_sum_v1(input_tensor,
  int64 while tensorflow returns the same dtype as the input.
  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_sum(input_tensor, axis, keepdims, name)
@ -1480,8 +1478,9 @@ def reduce_euclidean_norm(input_tensor, axis=None, keepdims=False, name=None):


@tf_export(v1=["math.count_nonzero", "count_nonzero"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
@deprecation.deprecated_args(
    None, "reduction_indices is deprecated, use axis instead", "axis")
 def count_nonzero(input_tensor=None,
@ -1510,11 +1509,11 @@ def count_nonzero(input_tensor=None,

  ```python
  x = tf.constant([[0, 1, 0], [1, 1, 0]])
-  tf.count_nonzero(x)  # 3
-  tf.count_nonzero(x, 0)  # [1, 2, 0]
-  tf.count_nonzero(x, 1)  # [1, 2]
-  tf.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
-  tf.count_nonzero(x, [0, 1])  # 3
+  tf.math.count_nonzero(x)  # 3
+  tf.math.count_nonzero(x, 0)  # [1, 2, 0]
+  tf.math.count_nonzero(x, 1)  # [1, 2]
+  tf.math.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
+  tf.math.count_nonzero(x, [0, 1])  # 3
  ```

  **NOTE** Strings are compared against zero-length empty string `""`. Any
@ -1523,15 +1522,15 @@ def count_nonzero(input_tensor=None,
  For example:
  ```python
  x = tf.constant(["", "a", "  ", "b", ""])
-  tf.count_nonzero(x) # 3, with "a", "  ", and "b" as nonzero strings.
+  tf.math.count_nonzero(x) # 3, with "a", "  ", and "b" as nonzero strings.
  ```

  Args:
-    input_tensor: The tensor to reduce. Should be of numeric type, `bool`,
-      or `string`.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+    input_tensor: The tensor to reduce. Should be of numeric type, `bool`, or
+      `string`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
    keepdims: If true, retains reduced dimensions with length 1.
    dtype: The output dtype; defaults to `tf.int64`.
    name: A name for the operation (optional).
@ -1544,22 +1543,23 @@ def count_nonzero(input_tensor=None,
  """
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
-  input_tensor = deprecation.deprecated_argument_lookup(
-      "input", input, "input_tensor", input_tensor)
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis,
-      "reduction_indices", reduction_indices
-      )
+  input_tensor = deprecation.deprecated_argument_lookup("input", input,
+                                                        "input_tensor",
+                                                        input_tensor)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)

  return count_nonzero_v2(input_tensor, axis, keepdims, dtype, name)


@tf_export("math.count_nonzero", v1=[])
-def count_nonzero_v2(input,  # pylint: disable=redefined-builtin
-                     axis=None,
-                     keepdims=None,
-                     dtype=dtypes.int64,
-                     name=None):
+def count_nonzero_v2(
+    input,  # pylint: disable=redefined-builtin
+    axis=None,
+    keepdims=None,
+    dtype=dtypes.int64,
+    name=None):
  """Computes number of nonzero elements across dimensions of a tensor.

  Reduces `input` along the dimensions given in `axis`.
@ -1578,11 +1578,11 @@ def count_nonzero_v2(input,  # pylint: disable=redefined-builtin

  ```python
  x = tf.constant([[0, 1, 0], [1, 1, 0]])
-  tf.count_nonzero(x)  # 3
-  tf.count_nonzero(x, 0)  # [1, 2, 0]
-  tf.count_nonzero(x, 1)  # [1, 2]
-  tf.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
-  tf.count_nonzero(x, [0, 1])  # 3
+  tf.math.count_nonzero(x)  # 3
+  tf.math.count_nonzero(x, 0)  # [1, 2, 0]
+  tf.math.count_nonzero(x, 1)  # [1, 2]
+  tf.math.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
+  tf.math.count_nonzero(x, [0, 1])  # 3
  ```

  **NOTE** Strings are compared against zero-length empty string `""`. Any
@ -1591,15 +1591,13 @@ def count_nonzero_v2(input,  # pylint: disable=redefined-builtin
  For example:
  ```python
  x = tf.constant(["", "a", "  ", "b", ""])
-  tf.count_nonzero(x) # 3, with "a", "  ", and "b" as nonzero strings.
+  tf.math.count_nonzero(x) # 3, with "a", "  ", and "b" as nonzero strings.
  ```

  Args:
-    input: The tensor to reduce. Should be of numeric type, `bool`,
-      or `string`.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input), rank(input))`.
+    input: The tensor to reduce. Should be of numeric type, `bool`, or `string`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input), rank(input))`.
    keepdims: If true, retains reduced dimensions with length 1.
    dtype: The output dtype; defaults to `tf.int64`.
    name: A name for the operation (optional).
@ -1650,9 +1648,9 @@ def reduce_mean_v1(input_tensor,

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
    keepdims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
    reduction_indices: The old (deprecated) name for axis.
@ -1678,8 +1676,9 @@ def reduce_mean_v1(input_tensor,

  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_mean(input_tensor, axis, keepdims, name)
@ -1852,9 +1851,9 @@ def reduce_prod(input_tensor, axis=None, keepdims=False, name=None):

  Args:
    input_tensor: The tensor to reduce. Should have numeric type.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
    keepdims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).

@ -1874,8 +1873,9 @@ def reduce_prod(input_tensor, axis=None, keepdims=False, name=None):


@tf_export(v1=["math.reduce_prod", "reduce_prod"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_prod_v1(input_tensor,
                   axis=None,
                   keepdims=None,
@ -1909,16 +1909,18 @@ def reduce_prod_v1(input_tensor,
  Equivalent to np.prod
  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_prod(input_tensor, axis, keepdims, name)


@tf_export(v1=["math.reduce_min", "reduce_min"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_min_v1(input_tensor,
                  axis=None,
                  keepdims=None,
@ -1952,8 +1954,9 @@ def reduce_min_v1(input_tensor,
  Equivalent to np.min
  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_min(input_tensor, axis, keepdims, name)
@ -1996,8 +1999,9 @@ def reduce_min(input_tensor, axis=None, keepdims=False, name=None):


@tf_export(v1=["math.reduce_max", "reduce_max"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_max_v1(input_tensor,
                  axis=None,
                  keepdims=None,
@ -2016,9 +2020,9 @@ def reduce_max_v1(input_tensor,

  Args:
    input_tensor: The tensor to reduce. Should have real numeric type.
-    axis: The dimensions to reduce. If `None` (the default),
-      reduces all dimensions. Must be in the range
-      `[-rank(input_tensor), rank(input_tensor))`.
+    axis: The dimensions to reduce. If `None` (the default), reduces all
+      dimensions. Must be in the range `[-rank(input_tensor),
+      rank(input_tensor))`.
    keepdims: If true, retains reduced dimensions with length 1.
    name: A name for the operation (optional).
    reduction_indices: The old (deprecated) name for axis.
@ -2031,8 +2035,9 @@ def reduce_max_v1(input_tensor,
  Equivalent to np.max
  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_max(input_tensor, axis, keepdims, name)
@ -2075,8 +2080,9 @@ def reduce_max(input_tensor, axis=None, keepdims=False, name=None):


@tf_export(v1=["math.reduce_all", "reduce_all"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_all_v1(input_tensor,
                  axis=None,
                  keepdims=None,
@ -2119,8 +2125,9 @@ def reduce_all_v1(input_tensor,
  Equivalent to np.all
  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_all(input_tensor, axis, keepdims, name)
@ -2172,8 +2179,9 @@ def reduce_all(input_tensor, axis=None, keepdims=False, name=None):


@tf_export(v1=["math.reduce_any", "reduce_any"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_any_v1(input_tensor,
                  axis=None,
                  keepdims=None,
@ -2216,8 +2224,9 @@ def reduce_any_v1(input_tensor,
  Equivalent to np.any
  @end_compatibility
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_any(input_tensor, axis, keepdims, name)
@ -2269,8 +2278,9 @@ def reduce_any(input_tensor, axis=None, keepdims=False, name=None):


@tf_export(v1=["math.reduce_logsumexp", "reduce_logsumexp"])
-@deprecation.deprecated_args(
-    None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(None,
+                             "keep_dims is deprecated, use keepdims instead",
+                             "keep_dims")
 def reduce_logsumexp_v1(input_tensor,
                        axis=None,
                        keepdims=None,
@ -2315,8 +2325,9 @@ def reduce_logsumexp_v1(input_tensor,
  Returns:
    The reduced tensor.
  """
-  axis = deprecation.deprecated_argument_lookup(
-      "axis", axis, "reduction_indices", reduction_indices)
+  axis = deprecation.deprecated_argument_lookup("axis", axis,
+                                                "reduction_indices",
+                                                reduction_indices)
  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
                                                    "keep_dims", keep_dims)
  return reduce_logsumexp(input_tensor, axis, keepdims, name)
@ -2363,10 +2374,7 @@ def reduce_logsumexp(input_tensor, axis=None, keepdims=False, name=None):
  keepdims = False if keepdims is None else keepdims
  input_tensor = ops.convert_to_tensor(input_tensor)
  with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name:
-    raw_max = reduce_max(
-        input_tensor,
-        axis=axis,
-        keepdims=True)
+    raw_max = reduce_max(input_tensor, axis=axis, keepdims=True)
    my_max = array_ops.stop_gradient(
        array_ops.where(
            gen_math_ops.is_finite(raw_max), raw_max,
@ -2805,10 +2813,10 @@ def add_n(inputs, name=None):

  `tf.math.add_n` performs the same operation as `tf.math.accumulate_n`, but it
  waits for all of its inputs to be ready before beginning to sum.
-  This buffering can result in higher memory consumption when inputs are ready 
+  This buffering can result in higher memory consumption when inputs are ready
  at different times, since the minimum temporary storage required is
  proportional to the input size rather than the output size.
-  
+
  This op does not [broadcast](
  https://docs.scipy.org/doc/numpy-1.13.0/user/basics.broadcasting.html)
  its inputs. If you need broadcasting, use `tf.math.add` (or the `+` operator)
@ -2861,10 +2869,10 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
  Optionally, pass `shape` and `tensor_dtype` for shape and type checking,
  otherwise, these are inferred.

-  `accumulate_n` performs the same operation as `tf.math.add_n`, but 
-  does not wait for all of its inputs to be ready before beginning to sum. 
-  This approach can save memory if inputs are ready at different times, since 
-  minimum temporary storage is proportional to the output size rather than the 
+  `accumulate_n` performs the same operation as `tf.math.add_n`, but
+  does not wait for all of its inputs to be ready before beginning to sum.
+  This approach can save memory if inputs are ready at different times, since
+  minimum temporary storage is proportional to the output size rather than the
  inputs' size.

  `accumulate_n` is differentiable (but wasn't previous to TensorFlow 1.7).
@ -2884,13 +2892,11 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):

  Args:
    inputs: A list of `Tensor` objects, each with same shape and type.
-    shape: Expected shape of elements of `inputs` (optional). 
-      Also controls the output shape of this op, which may affect type 
-      inference in other ops.
-      A value of `None` means "infer the input shape from the shapes in 
-      `inputs`".
-    tensor_dtype: Expected data type of `inputs` (optional).
-      A value of `None` means "infer the input dtype from `inputs[0]`".
+    shape: Expected shape of elements of `inputs` (optional). Also controls the
+      output shape of this op, which may affect type inference in other ops. A
+      value of `None` means "infer the input shape from the shapes in `inputs`".
+    tensor_dtype: Expected data type of `inputs` (optional). A value of `None`
+      means "infer the input dtype from `inputs[0]`".
    name: A name for the operation (optional).

  Returns:
@ -2953,8 +2959,8 @@ def sigmoid(x, name=None):
  Specifically, `y = 1 / (1 + exp(-x))`.

  Args:
-    x: A Tensor with type `float16`, `float32`, `float64`, `complex64`,
-      or `complex128`.
+    x: A Tensor with type `float16`, `float32`, `float64`, `complex64`, or
+      `complex128`.
    name: A name for the operation (optional).

  Returns:
@ -3109,8 +3115,8 @@ def cumsum(x, axis=0, exclusive=False, reverse=False, name=None):

  Args:
    x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-       `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-       `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+      `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+      `complex128`, `qint8`, `quint8`, `qint32`, `half`.
    axis: A `Tensor` of type `int32` (default: 0). Must be in the range
      `[-rank(x), rank(x))`.
    exclusive: If `True`, perform exclusive cumsum.
@ -3162,8 +3168,8 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):

  Args:
    x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-       `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-       `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+      `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+      `complex128`, `qint8`, `quint8`, `qint32`, `half`.
    axis: A `Tensor` of type `int32` (default: 0). Must be in the range
      `[-rank(x), rank(x))`.
    exclusive: If `True`, perform exclusive cumprod.
@ -3220,8 +3226,8 @@ def conj(x, name=None):
    elif x.dtype.is_floating or x.dtype.is_integer:
      return x
    else:
-      raise TypeError(
-          "Expected numeric or variant tensor, got dtype %r" % x.dtype)
+      raise TypeError("Expected numeric or variant tensor, got dtype %r" %
+                      x.dtype)


 def _BroadcastShape(op):
@ -3238,6 +3244,7 @@ def reduced_shape(input_shape, axes):
  Args:
    input_shape: 1-D Tensor, the shape of the Tensor being reduced.
    axes: 1-D Tensor, the reduction axes.
+
  Returns:
    A 1-D Tensor, the output shape as if keepdims were set to True.
  """
@ -3310,8 +3317,8 @@ def unsorted_segment_mean(data, segment_ids, num_segments, name=None):
  Args:
    data: A `Tensor` with floating point or complex dtype.
    segment_ids: An integer tensor whose shape is a prefix of `data.shape`.
-    num_segments: An integer scalar `Tensor`.  The number of distinct
-      segment IDs.
+    num_segments: An integer scalar `Tensor`.  The number of distinct segment
+      IDs.
    name: A name for the operation (optional).

  Returns:
@ -3359,8 +3366,8 @@ def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None):
  Args:
    data: A `Tensor` with floating point or complex dtype.
    segment_ids: An integer tensor whose shape is a prefix of `data.shape`.
-    num_segments: An integer scalar `Tensor`.  The number of distinct
-      segment IDs.
+    num_segments: An integer scalar `Tensor`.  The number of distinct segment
+      IDs.
    name: A name for the operation (optional).

  Returns:
@ -3420,15 +3427,15 @@ def sparse_segment_sum(data, indices, segment_ids, name=None,
  #     [5 6 7 8]]

  # Which is equivalent to:
-  tf.segment_sum(c, tf.constant([0, 0, 1]))
+  tf.math.segment_sum(c, tf.constant([0, 0, 1]))
  ```

  Args:
    data: A `Tensor` with data that will be assembled in the output.
    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
      `segment_ids`.
-    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
-      Values should be sorted and can be repeated.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`. Values
+      should be sorted and can be repeated.
    name: A name for the operation (optional).
    num_segments: An optional int32 scalar. Indicates the size of the output
      `Tensor`.
@ -3483,8 +3490,8 @@ def sparse_segment_mean(data,
    data: A `Tensor` with data that will be assembled in the output.
    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
      `segment_ids`.
-    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
-      Values should be sorted and can be repeated.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`. Values
+      should be sorted and can be repeated.
    name: A name for the operation (optional).
    num_segments: An optional int32 scalar. Indicates the size of the output
      `Tensor`.
@ -3558,8 +3565,8 @@ def sparse_segment_sqrt_n(data,
    data: A `Tensor` with data that will be assembled in the output.
    indices: A 1-D `Tensor` with indices into `data`. Has same rank as
      `segment_ids`.
-    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`.
-      Values should be sorted and can be repeated.
+    segment_ids: A 1-D `Tensor` with indices into the output `Tensor`. Values
+      should be sorted and can be repeated.
    name: A name for the operation (optional).
    num_segments: An optional int32 scalar. Indicates the size of the output
      `Tensor`.
@ -3671,7 +3678,7 @@ def tensordot(a, b, axes, name=None):
    Args:
      a: `Tensor`.
      axes: List or `int32` `Tensor` of unique indices specifying valid axes of
-       `a`.
+        `a`.
      flipped: An optional `bool`. Defaults to `False`. If `True`, the method
        assumes that `a` is the second argument in the contraction operation.

@ -3735,12 +3742,12 @@ def tensordot(a, b, axes, name=None):
        if axes > a_shape.ndims:
          raise ValueError("'axes' must not be larger than the number of "
                           "dimensions of tensor %s." % a)
-        return (list(xrange(a_shape.ndims - axes, a_shape.ndims)),
-                list(xrange(axes)))
+        return (list(xrange(a_shape.ndims - axes,
+                            a_shape.ndims)), list(xrange(axes)))
      else:
        rank = array_ops.rank(a)
-        return (range(rank - axes, rank, dtype=dtypes.int32),
-                range(axes, dtype=dtypes.int32))
+        return (range(rank - axes, rank,
+                      dtype=dtypes.int32), range(axes, dtype=dtypes.int32))
    elif isinstance(axes, (list, tuple)):
      if len(axes) != 2:
        raise ValueError("'axes' must be an integer or have length 2.")
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@ -136,7 +136,7 @@ class BatchNormalizationTest(test.TestCase):
    self.assertAllClose(mean_ref, mean_val, atol=1e-3)
    # This is for Bessel's correction. tf.nn.moments uses n, instead of n-1, as
    # the denominator in the formula to calculate variance, while
-    # tf.nn.fused_batch_norm has Bessel's correction built in.
+    # tf.compat.v1.nn.fused_batch_norm has Bessel's correction built in.
    sample_size = x_val.size / scale_val.size
    var_ref = var_ref * sample_size / (max(sample_size - 1.0, 1.0))
    self.assertAllClose(var_ref, var_val, atol=1e-3)
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@ -522,7 +522,7 @@ def zero_fraction(value, name=None):

  ```python
      z = tf.nn.relu(...)
-      summ = tf.summary.scalar('sparsity', tf.nn.zero_fraction(z))
+      summ = tf.compat.v1.summary.scalar('sparsity', tf.nn.zero_fraction(z))
  ```

  Args:
@ -1451,7 +1451,7 @@ def _compute_sampled_logits(weights,
        class biases.
    labels: A `Tensor` of type `int64` and shape `[batch_size,
        num_true]`. The target classes.  Note that this format differs from
-        the `labels` argument of `nn.softmax_cross_entropy_with_logits_v2`.
+        the `labels` argument of `nn.softmax_cross_entropy_with_logits`.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
@ -1476,7 +1476,7 @@ def _compute_sampled_logits(weights,
    out_logits: `Tensor` object with shape
        `[batch_size, num_true + num_sampled]`, for passing to either
        `nn.sigmoid_cross_entropy_with_logits` (NCE) or
-        `nn.softmax_cross_entropy_with_logits_v2` (sampled softmax).
+        `nn.softmax_cross_entropy_with_logits` (sampled softmax).
    out_labels: A Tensor object with the same shape as `out_logits`.
  """

@ -1652,7 +1652,7 @@ def nce_loss_v2(weights,
  Note: By default this uses a log-uniform (Zipfian) distribution for sampling,
  so your labels must be sorted in order of decreasing frequency to achieve
  good results.  For more details, see
-  `tf.nn.log_uniform_candidate_sampler`.
+  `tf.random.log_uniform_candidate_sampler`.

  Note: In the case where `num_true` > 1, we assign to each target class
  the target probability 1 / `num_true` so that the target probabilities
@ -1756,7 +1756,7 @@ def nce_loss(weights,
  Note: By default this uses a log-uniform (Zipfian) distribution for sampling,
  so your labels must be sorted in order of decreasing frequency to achieve
  good results.  For more details, see
-  `tf.nn.log_uniform_candidate_sampler`.
+  `tf.random.log_uniform_candidate_sampler`.

  Note: In the case where `num_true` > 1, we assign to each target class
  the target probability 1 / `num_true` so that the target probabilities
@ -1855,7 +1855,7 @@ def sampled_softmax_loss_v2(weights,
    logits = tf.matmul(inputs, tf.transpose(weights))
    logits = tf.nn.bias_add(logits, biases)
    labels_one_hot = tf.one_hot(labels, n_classes)
-    loss = tf.nn.softmax_cross_entropy_with_logits_v2(
+    loss = tf.nn.softmax_cross_entropy_with_logits(
        labels=labels_one_hot,
        logits=logits)
  ```
@ -1877,7 +1877,7 @@ def sampled_softmax_loss_v2(weights,
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The
      target classes.  Note that this format differs from the `labels` argument
-      of `nn.softmax_cross_entropy_with_logits_v2`.
+      of `nn.softmax_cross_entropy_with_logits`.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward activations of
      the input network.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
@ -1950,7 +1950,7 @@ def sampled_softmax_loss(weights,
    logits = tf.matmul(inputs, tf.transpose(weights))
    logits = tf.nn.bias_add(logits, biases)
    labels_one_hot = tf.one_hot(labels, n_classes)
-    loss = tf.nn.softmax_cross_entropy_with_logits_v2(
+    loss = tf.nn.softmax_cross_entropy_with_logits(
        labels=labels_one_hot,
        logits=logits)
  ```
@ -1968,7 +1968,7 @@ def sampled_softmax_loss(weights,
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    labels: A `Tensor` of type `int64` and shape `[batch_size,
        num_true]`. The target classes.  Note that this format differs from
-        the `labels` argument of `nn.softmax_cross_entropy_with_logits_v2`.
+        the `labels` argument of `nn.softmax_cross_entropy_with_logits`.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@ -4120,7 +4120,7 @@ def dropout(x, keep_prob=None, noise_shape=None, seed=None, name=None,
    noise_shape: A 1-D `Tensor` of type `int32`, representing the
      shape for randomly generated keep/drop flags.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    name: A name for this operation (optional).
    rate: A scalar `Tensor` with the same type as `x`. The probability that each
      element of `x` is discarded.
@ -4176,7 +4176,7 @@ def dropout_v2(x, rate, noise_shape=None, seed=None, name=None):
    noise_shape: A 1-D `Tensor` of type `int32`, representing the
      shape for randomly generated keep/drop flags.
    seed: A Python integer. Used to create random seeds. See
-      `tf.set_random_seed` for behavior.
+      `tf.compat.v1.set_random_seed` for behavior.
    name: A name for this operation (optional).

  Returns:
--- a/tensorflow/python/ops/ragged/ragged_batch_gather_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_batch_gather_ops.py
@ -60,7 +60,7 @@ def batch_gather(params, indices, name=None):
    ```python
    >>> params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
    >>> indices = tf.ragged.constant([[1, 2, 0], [], [], [0, 0]])
-    >>> tf.batch_gather(params, indices)
+    >>> tf.compat.v1.batch_gather(params, indices)
    [['b', 'c', 'a'], [], [], ['e', 'e']]
    ```
  """
--- a/tensorflow/python/ops/ragged/ragged_gather_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_ops.py
@ -147,7 +147,7 @@ def gather_nd(params, indices, batch_dims=0, name=None):

  #### Examples:
    ```python
-    >>> params = tf.ragged.constant_value(
+    >>> params = tf.compat.v1.ragged.constant_value(
    ...     [ [ ['000', '001'], ['010'              ]          ],
    ...       [ ['100'       ], ['110', '111', '112'], ['120'] ],
    ...       [ [            ], ['210'              ]          ] ])
--- a/tensorflow/python/ops/ragged/ragged_tensor.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor.py
@ -277,7 +277,7 @@ class RaggedTensor(composite_tensor.CompositeTensor):
    ```

    Warning: currently, this needs to cast value_rowids to int64 before
-    converting, since `tf.bincount` only supports `int32`.
+    converting, since `tf.math.bincount` only supports `int32`.

    Args:
      values: A potentially ragged tensor with shape `[nvals, ...]`.
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@ -728,7 +728,8 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
      * Call __getitem__ with int values in the slice spec wrapped in
        `tf.constant()`.
      * Call __getitem__ with int values in the slice spec wrapped in
-        `tf.placeholder()` (so value is not known at graph construction time).
+        `tf.compat.v1.placeholder()` (so value is not known at graph
+        construction time).

    Args:
      rt: The RaggedTensor to test.
@ -1084,8 +1085,8 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
      expected_repr = (
          'tf.RaggedTensor(values=tf.Tensor([{}], shape=(7,), dtype=string), '
          'row_splits=tf.Tensor([{}], shape=(6,), dtype=int64))'.format(
-              ' '.join(repr(x) for x in values), ' '.join(
-                  repr(x) for x in row_splits)))
+              ' '.join(repr(x) for x in values),
+              ' '.join(repr(x) for x in row_splits)))
      self.assertEqual(str(rt), expected_str)
      self.assertEqual(repr(rt), expected_repr)
    else:
@ -1230,6 +1231,5 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
    ragged_math_ops.reduce_sum(a)
    self.assertLen(a.consumers(), 1)

-
 if __name__ == '__main__':
  googletest.main()
--- a/tensorflow/python/ops/ragged/ragged_where_op.py
+++ b/tensorflow/python/ops/ragged/ragged_where_op.py
@ -77,23 +77,23 @@ def where(condition, x=None, y=None, name=None):
  #### Examples:
    ```python
    >>> # Coordinates where condition is true.
-    >>> condition = tf.ragged.constant_value(
+    >>> condition = tf.compat.v1.ragged.constant_value(
    ...     [[True, False, True], [False, True]])
    >>> ragged.where(condition)
    [[0, 0], [0, 2], [1, 1]]

    >>> # Elementwise selection between x and y, based on condition.
-    >>> condition = tf.ragged.constant_value(
+    >>> condition = tf.compat.v1.ragged.constant_value(
    ...     [[True, False, True], [False, True]])
-    >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
-    >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
+    >>> x = tf.compat.v1.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
+    >>> y = tf.compat.v1.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
    >>> ragged.where(condition, x, y)
    [['A', 'b', 'C'], ['d', 'E']]

    >>> # Row selection between x and y, based on condition.
    >>> condition = [True, False]
-    >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
-    >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
+    >>> x = tf.compat.v1.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
+    >>> y = tf.compat.v1.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
    >>> ragged.where(condition, x, y)
    [['A', 'B', 'C'], ['d', 'e']]
    ```
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@ -63,7 +63,7 @@ def random_normal(shape,
    dtype: The type of the output.
    seed: A Python integer. Used to create a random seed for the distribution.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: A name for the operation (optional).

@ -112,7 +112,7 @@ def parameterized_truncated_normal(shape,
    dtype: The type of the output.
    seed: A Python integer. Used to create a random seed for the distribution.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: A name for the operation (optional).

@ -162,7 +162,7 @@ def truncated_normal(shape,
    dtype: The type of the output.
    seed: A Python integer. Used to create a random seed for the distribution.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: A name for the operation (optional).

@ -217,7 +217,7 @@ def random_uniform(shape,
    dtype: The type of the output: `float16`, `float32`, `float64`, `int32`,
      or `int64`.
    seed: A Python integer. Used to create a random seed for the distribution.
-      See `tf.set_random_seed`
+      See `tf.compat.v1.set_random_seed`
      for behavior.
    name: A name for the operation (optional).

@ -270,7 +270,7 @@ def random_shuffle(value, seed=None, name=None):
    value: A Tensor to be shuffled.
    seed: A Python integer. Used to create a random seed for the distribution.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: A name for the operation (optional).

@ -299,7 +299,7 @@ def random_crop(value, size, seed=None, name=None):
    value: Input tensor to crop.
    size: 1-D tensor with size the rank of `value`.
    seed: Python integer. Used to create a random seed. See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: A name for this operation (optional).

@ -338,7 +338,7 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
  ```python
  # samples has shape [1, 5], where each value is either 0 or 1 with equal
  # probability.
-  samples = tf.multinomial(tf.log([[10., 10.]]), 5)
+  samples = tf.random.categorical(tf.math.log([[10., 10.]]), 5)
  ```

  Args:
@ -346,7 +346,7 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
      `[i, :]` represents the unnormalized log-probabilities for all classes.
    num_samples: 0-D.  Number of independent samples to draw for each row slice.
    seed: A Python integer. Used to create a random seed for the distribution.
-      See `tf.set_random_seed` for behavior.
+      See `tf.compat.v1.set_random_seed` for behavior.
    name: Optional name for the operation.
    output_dtype: integer type to use for the output. Defaults to int64.

@ -366,7 +366,7 @@ def categorical(logits, num_samples, dtype=None, seed=None, name=None):
  ```python
  # samples has shape [1, 5], where each value is either 0 or 1 with equal
  # probability.
-  samples = tf.random.categorical(tf.log([[10., 10.]]), 5)
+  samples = tf.random.categorical(tf.math.log([[10., 10.]]), 5)
  ```

  Args:
@ -375,7 +375,7 @@ def categorical(logits, num_samples, dtype=None, seed=None, name=None):
    num_samples: 0-D.  Number of independent samples to draw for each row slice.
    dtype: integer type to use for the output. Defaults to int64.
    seed: A Python integer. Used to create a random seed for the distribution.
-      See `tf.set_random_seed` for behavior.
+      See `tf.compat.v1.set_random_seed` for behavior.
    name: Optional name for the operation.

  Returns:
@ -386,7 +386,7 @@ def categorical(logits, num_samples, dtype=None, seed=None, name=None):


 def multinomial_categorical_impl(logits, num_samples, dtype, seed):
-  """Implementation for random.multinomial (v1) and random.categorical (v2)."""
+  """Implementation for random.categorical (v1) and random.categorical (v2)."""
  logits = ops.convert_to_tensor(logits, name="logits")
  seed1, seed2 = random_seed.get_seed(seed)
  return gen_random_ops.multinomial(
@ -425,17 +425,17 @@ def random_gamma(shape,
  Example:

  ```python
-  samples = tf.random_gamma([10], [0.5, 1.5])
+  samples = tf.random.gamma([10], [0.5, 1.5])
  # samples has shape [10, 2], where each slice [:, 0] and [:, 1] represents
  # the samples drawn from each distribution

-  samples = tf.random_gamma([7, 5], [0.5, 1.5])
+  samples = tf.random.gamma([7, 5], [0.5, 1.5])
  # samples has shape [7, 5, 2], where each slice [:, :, 0] and [:, :, 1]
  # represents the 7x5 samples drawn from each of the two distributions

  alpha = tf.constant([[1.],[3.],[5.]])
  beta = tf.constant([[3., 4.]])
-  samples = tf.random_gamma([30], alpha=alpha, beta=beta)
+  samples = tf.random.gamma([30], alpha=alpha, beta=beta)
  # samples has shape [30, 3, 2], with 30 samples each of 3x2 distributions.

  loss = tf.reduce_mean(tf.square(samples))
@ -458,7 +458,7 @@ def random_gamma(shape,
      `float64`.
    seed: A Python integer. Used to create a random seed for the distributions.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: Optional name for the operation.

@ -490,11 +490,11 @@ def random_poisson(lam, shape, dtype=dtypes.float32, seed=None, name=None):
  Example:

  ```python
-  samples = tf.random_poisson([0.5, 1.5], [10])
+  samples = tf.random.poisson([0.5, 1.5], [10])
  # samples has shape [10, 2], where each slice [:, 0] and [:, 1] represents
  # the samples drawn from each distribution

-  samples = tf.random_poisson([12.2, 3.3], [7, 5])
+  samples = tf.random.poisson([12.2, 3.3], [7, 5])
  # samples has shape [7, 5, 2], where each slice [:, :, 0] and [:, :, 1]
  # represents the 7x5 samples drawn from each of the two distributions
  ```
@ -509,7 +509,7 @@ def random_poisson(lam, shape, dtype=dtypes.float32, seed=None, name=None):
      `int64`.
    seed: A Python integer. Used to create a random seed for the distributions.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: Optional name for the operation.

@ -529,11 +529,11 @@ def random_poisson_v2(shape, lam, dtype=dtypes.float32, seed=None, name=None):
  Example:

  ```python
-  samples = tf.random_poisson([10], [0.5, 1.5])
+  samples = tf.random.poisson([10], [0.5, 1.5])
  # samples has shape [10, 2], where each slice [:, 0] and [:, 1] represents
  # the samples drawn from each distribution

-  samples = tf.random_poisson([7, 5], [12.2, 3.3])
+  samples = tf.random.poisson([7, 5], [12.2, 3.3])
  # samples has shape [7, 5, 2], where each slice [:, :, 0] and [:, :, 1]
  # represents the 7x5 samples drawn from each of the two distributions
  ```
@ -548,7 +548,7 @@ def random_poisson_v2(shape, lam, dtype=dtypes.float32, seed=None, name=None):
      `int64`.
    seed: A Python integer. Used to create a random seed for the distributions.
      See
-      `tf.set_random_seed`
+      `tf.compat.v1.set_random_seed`
      for behavior.
    name: Optional name for the operation.

--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@ -344,7 +344,7 @@ class ResourceVariable(variables.VariableV1):
  with tf.control_dependencies([other_assign]):
    # Will print 2.0 because the value was read before other_assign ran. If
    # `a` was a tf.Variable instead, 2.0 or 3.0 could be printed.
-    tf.Print(b, [b]).eval()
+    tf.compat.v1.Print(b, [b]).eval()
  ```
  """

@ -1268,7 +1268,7 @@ class ResourceVariable(variables.VariableV1):
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        op = ref.scatter_nd_sub(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(op)
    ```

@ -1321,7 +1321,7 @@ class ResourceVariable(variables.VariableV1):
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        add = ref.scatter_nd_add(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(add)
    ```

@ -1374,7 +1374,7 @@ class ResourceVariable(variables.VariableV1):
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        op = ref.scatter_nd_update(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(op)
    ```

--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """RNN helpers for TensorFlow models."""
 from __future__ import absolute_import
 from __future__ import division
@ -36,7 +35,6 @@ from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export

-
 # pylint: disable=protected-access
 _concat = rnn_cell_impl._concat
 # pylint: enable=protected-access
@ -60,12 +58,11 @@ def _transpose_batch_time(x):

  x_rank = array_ops.rank(x)
  x_t = array_ops.transpose(
-      x, array_ops.concat(
-          ([1, 0], math_ops.range(2, x_rank)), axis=0))
+      x, array_ops.concat(([1, 0], math_ops.range(2, x_rank)), axis=0))
  x_t.set_shape(
-      tensor_shape.TensorShape([
-          x_static_shape.dims[1].value, x_static_shape.dims[0].value
-      ]).concatenate(x_static_shape[2:]))
+      tensor_shape.TensorShape(
+          [x_static_shape.dims[1].value,
+           x_static_shape.dims[0].value]).concatenate(x_static_shape[2:]))
  return x_t


@ -73,9 +70,8 @@ def _best_effort_input_batch_size(flat_input):
  """Get static input batch size if available, with fallback to the dynamic one.

  Args:
-    flat_input: An iterable of time major input Tensors of shape
-      `[max_time, batch_size, ...]`.
-    All inputs should have compatible batch sizes.
+    flat_input: An iterable of time major input Tensors of shape `[max_time,
+      batch_size, ...]`. All inputs should have compatible batch sizes.

  Returns:
    The batch size in Python integer if available, or a scalar Tensor otherwise.
@ -88,8 +84,8 @@ def _best_effort_input_batch_size(flat_input):
    if shape.rank is None:
      continue
    if shape.rank < 2:
-      raise ValueError(
-          "Expected input tensor %s to have rank at least 2" % input_)
+      raise ValueError("Expected input tensor %s to have rank at least 2" %
+                       input_)
    batch_size = shape.dims[1].value
    if batch_size is not None:
      return batch_size
@ -156,6 +152,7 @@ def _is_keras_rnn_cell(rnn_cell):
  Args:
    rnn_cell: An RNN cell instance that either follow the Keras interface or TF
      RNN interface.
+
  Returns:
    Boolean, whether the cell is an Keras RNN cell.
  """
@ -163,15 +160,21 @@ def _is_keras_rnn_cell(rnn_cell):
  # library like Deepmind that didn't inherit tf.nn.rnn_cell.RNNCell.
  # Keras cells never had zero_state method, which was from the original
  # interface from TF RNN cell.
-  return (not isinstance(rnn_cell, rnn_cell_impl.RNNCell)
-          and isinstance(rnn_cell, base_layer.Layer)
-          and getattr(rnn_cell, "zero_state", None) is None)
+  return (not isinstance(rnn_cell, rnn_cell_impl.RNNCell) and
+          isinstance(rnn_cell, base_layer.Layer) and
+          getattr(rnn_cell, "zero_state", None) is None)


 # pylint: disable=unused-argument
-def _rnn_step(
-    time, sequence_length, min_sequence_length, max_sequence_length,
-    zero_output, state, call_cell, state_size, skip_conditionals=False):
+def _rnn_step(time,
+              sequence_length,
+              min_sequence_length,
+              max_sequence_length,
+              zero_output,
+              state,
+              call_cell,
+              state_size,
+              skip_conditionals=False):
  """Calculate one step of a dynamic RNN minibatch.

  Returns an (output, state) pair conditioned on `sequence_length`.
@ -246,10 +249,12 @@ def _rnn_step(
    # a calculated state & output.
    flat_new_output = [
        _copy_one_through(zero_output, new_output)
-        for zero_output, new_output in zip(flat_zero_output, flat_new_output)]
+        for zero_output, new_output in zip(flat_zero_output, flat_new_output)
+    ]
    flat_new_state = [
        _copy_one_through(state, new_state)
-        for state, new_state in zip(flat_state, flat_new_state)]
+        for state, new_state in zip(flat_state, flat_new_state)
+    ]
    return flat_new_output + flat_new_state

  def _maybe_copy_some_through():
@ -263,7 +268,8 @@ def _rnn_step(
    flat_new_output = nest.flatten(new_output)
    return control_flow_ops.cond(
        # if t < min_seq_len: calculate and return everything
-        time < min_sequence_length, lambda: flat_new_output + flat_new_state,
+        time < min_sequence_length,
+        lambda: flat_new_output + flat_new_state,
        # else copy some of it through
        lambda: _copy_some_through(flat_new_output, flat_new_state))

@ -284,7 +290,8 @@ def _rnn_step(
    empty_update = lambda: flat_zero_output + flat_state
    final_output_and_state = control_flow_ops.cond(
        # if t >= max_seq_len: copy all state through, output zeros
-        time >= max_sequence_length, empty_update,
+        time >= max_sequence_length,
+        empty_update,
        # otherwise calculation is required: copy some or all of it through
        _maybe_copy_some_through)

@ -313,10 +320,9 @@ def _reverse_seq(input_seq, lengths):

  Args:
    input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
-               or nested tuples of tensors.
+      or nested tuples of tensors.
    lengths:   A `Tensor` of dimension batch_size, containing lengths for each
-               sequence in the batch. If "None" is specified, simply reverses
-               the list.
+      sequence in the batch. If "None" is specified, simply reverses the list.

  Returns:
    time-reversed sequence
@ -328,8 +334,7 @@ def _reverse_seq(input_seq, lengths):

  flat_results = [[] for _ in range(len(input_seq))]
  for sequence in zip(*flat_input_seq):
-    input_shape = tensor_shape.unknown_shape(
-        rank=sequence[0].get_shape().rank)
+    input_shape = tensor_shape.unknown_shape(rank=sequence[0].get_shape().rank)
    for input_ in sequence:
      input_shape.merge_with(input_.get_shape())
      input_.set_shape(input_shape)
@ -345,8 +350,10 @@ def _reverse_seq(input_seq, lengths):
      r.set_shape(input_shape)
      flat_result.append(r)

-  results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
-             for input_, flat_result in zip(input_seq, flat_results)]
+  results = [
+      nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
+      for input_, flat_result in zip(input_seq, flat_results)
+  ]
  return results


@ -354,10 +361,17 @@ def _reverse_seq(input_seq, lengths):
                        "keras.layers.RNN(cell))`, which is equivalent to "
                        "this API")
@tf_export(v1=["nn.bidirectional_dynamic_rnn"])
-def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
-                              initial_state_fw=None, initial_state_bw=None,
-                              dtype=None, parallel_iterations=None,
-                              swap_memory=False, time_major=False, scope=None):
+def bidirectional_dynamic_rnn(cell_fw,
+                              cell_bw,
+                              inputs,
+                              sequence_length=None,
+                              initial_state_fw=None,
+                              initial_state_bw=None,
+                              dtype=None,
+                              parallel_iterations=None,
+                              swap_memory=False,
+                              time_major=False,
+                              scope=None):
  """Creates a dynamic version of bidirectional recurrent neural network.

  Takes input and builds independent forward and backward RNNs. The input_size
@ -373,38 +387,38 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
    inputs: The RNN inputs.
      If time_major == False (default), this must be a tensor of shape:
        `[batch_size, max_time, ...]`, or a nested tuple of such elements.
-      If time_major == True, this must be a tensor of shape:
-        `[max_time, batch_size, ...]`, or a nested tuple of such elements.
+      If time_major == True, this must be a tensor of shape: `[max_time,
+        batch_size, ...]`, or a nested tuple of such elements.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
-      containing the actual lengths for each of the sequences in the batch.
-      If not provided, all batch entries are assumed to be full sequences; and
-      time reversal is applied from time `0` to `max_time` for each sequence.
-    initial_state_fw: (optional) An initial state for the forward RNN.
-      This must be a tensor of appropriate type and shape
-      `[batch_size, cell_fw.state_size]`.
-      If `cell_fw.state_size` is a tuple, this should be a tuple of
-      tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
-    initial_state_bw: (optional) Same as for `initial_state_fw`, but using
-      the corresponding properties of `cell_bw`.
+      containing the actual lengths for each of the sequences in the batch. If
+      not provided, all batch entries are assumed to be full sequences; and time
+      reversal is applied from time `0` to `max_time` for each sequence.
+    initial_state_fw: (optional) An initial state for the forward RNN. This must
+      be a tensor of appropriate type and shape `[batch_size,
+      cell_fw.state_size]`. If `cell_fw.state_size` is a tuple, this should be a
+      tuple of tensors having shapes `[batch_size, s] for s in
+      cell_fw.state_size`.
+    initial_state_bw: (optional) Same as for `initial_state_fw`, but using the
+      corresponding properties of `cell_bw`.
    dtype: (optional) The data type for the initial states and expected output.
      Required if initial_states are not provided or RNN states have a
      heterogeneous dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
-      parallel.  Those operations which do not have any temporal dependency
-      and can be run in parallel, will be.  This parameter trades off
-      time for space.  Values >> 1 use more memory but take less time,
-      while smaller values use less memory but computations take longer.
+      parallel.  Those operations which do not have any temporal dependency and
+      can be run in parallel, will be.  This parameter trades off time for
+      space.  Values >> 1 use more memory but take less time, while smaller
+      values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
-      but needed for back prop from GPU to CPU.  This allows training RNNs
-      which would typically not fit on a single GPU, with very minimal (or no)
+      but needed for back prop from GPU to CPU.  This allows training RNNs which
+      would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
-    time_major: The shape format of the `inputs` and `outputs` Tensors.
-      If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
-      If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
-      Using `time_major = True` is a bit more efficient because it avoids
-      transposes at the beginning and end of the RNN calculation.  However,
-      most TensorFlow data is batch-major, so by default this function
-      accepts input and emits output in batch-major form.
+    time_major: The shape format of the `inputs` and `outputs` Tensors. If true,
+      these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false,
+      these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using
+      `time_major = True` is a bit more efficient because it avoids transposes
+      at the beginning and end of the RNN calculation.  However, most TensorFlow
+      data is batch-major, so by default this function accepts input and emits
+      output in batch-major form.
    scope: VariableScope for the created subgraph; defaults to
      "bidirectional_rnn"

@ -439,10 +453,15 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
    # Forward direction
    with vs.variable_scope("fw") as fw_scope:
      output_fw, output_state_fw = dynamic_rnn(
-          cell=cell_fw, inputs=inputs, sequence_length=sequence_length,
-          initial_state=initial_state_fw, dtype=dtype,
-          parallel_iterations=parallel_iterations, swap_memory=swap_memory,
-          time_major=time_major, scope=fw_scope)
+          cell=cell_fw,
+          inputs=inputs,
+          sequence_length=sequence_length,
+          initial_state=initial_state_fw,
+          dtype=dtype,
+          parallel_iterations=parallel_iterations,
+          swap_memory=swap_memory,
+          time_major=time_major,
+          scope=fw_scope)

    # Backward direction
    if not time_major:
@ -455,8 +474,10 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
    def _reverse(input_, seq_lengths, seq_axis, batch_axis):
      if seq_lengths is not None:
        return array_ops.reverse_sequence(
-            input=input_, seq_lengths=seq_lengths,
-            seq_axis=seq_axis, batch_axis=batch_axis)
+            input=input_,
+            seq_lengths=seq_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis)
      else:
        return array_ops.reverse(input_, axis=[seq_axis])

@ -471,14 +492,21 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,

      inputs_reverse = nest.map_structure(_map_reverse, inputs)
      tmp, output_state_bw = dynamic_rnn(
-          cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,
-          initial_state=initial_state_bw, dtype=dtype,
-          parallel_iterations=parallel_iterations, swap_memory=swap_memory,
-          time_major=time_major, scope=bw_scope)
+          cell=cell_bw,
+          inputs=inputs_reverse,
+          sequence_length=sequence_length,
+          initial_state=initial_state_bw,
+          dtype=dtype,
+          parallel_iterations=parallel_iterations,
+          swap_memory=swap_memory,
+          time_major=time_major,
+          scope=bw_scope)

  output_bw = _reverse(
-      tmp, seq_lengths=sequence_length,
-      seq_axis=time_axis, batch_axis=batch_axis)
+      tmp,
+      seq_lengths=sequence_length,
+      seq_axis=time_axis,
+      batch_axis=batch_axis)

  outputs = (output_fw, output_bw)
  output_states = (output_state_fw, output_state_bw)
@ -490,9 +518,15 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
    None,
    "Please use `keras.layers.RNN(cell)`, which is equivalent to this API")
@tf_export(v1=["nn.dynamic_rnn"])
-def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
-                dtype=None, parallel_iterations=None, swap_memory=False,
-                time_major=False, scope=None):
+def dynamic_rnn(cell,
+                inputs,
+                sequence_length=None,
+                initial_state=None,
+                dtype=None,
+                parallel_iterations=None,
+                swap_memory=False,
+                time_major=False,
+                scope=None):
  """Creates a recurrent neural network specified by RNNCell `cell`.

  Performs fully dynamic unrolling of `inputs`.
@ -501,7 +535,7 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,

  ```python
  # create a BasicRNNCell
-  rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
+  rnn_cell = tf.compat.v1.nn.rnn_cell.BasicRNNCell(hidden_size)

  # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]

@ -509,22 +543,22 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
  initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)

  # 'state' is a tensor of shape [batch_size, cell_state_size]
-  outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
+  outputs, state = tf.compat.v1.nn.dynamic_rnn(rnn_cell, input_data,
                                     initial_state=initial_state,
                                     dtype=tf.float32)
  ```

  ```python
  # create 2 LSTMCells
-  rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
+  rnn_layers = [tf.compat.v1.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]

  # create a RNN cell composed sequentially of a number of RNNCells
-  multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
+  multi_rnn_cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell(rnn_layers)

  # 'outputs' is a tensor of shape [batch_size, max_time, 256]
  # 'state' is a N-tuple where N is the number of LSTMCells containing a
-  # tf.contrib.rnn.LSTMStateTuple for each cell
-  outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
+  # tf.nn.rnn_cell.LSTMStateTuple for each cell
+  outputs, state = tf.compat.v1.nn.dynamic_rnn(cell=multi_rnn_cell,
                                     inputs=data,
                                     dtype=tf.float32)
  ```
@ -534,46 +568,43 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
    cell: An instance of RNNCell.
    inputs: The RNN inputs.
      If `time_major == False` (default), this must be a `Tensor` of shape:
-        `[batch_size, max_time, ...]`, or a nested tuple of such
-        elements.
-      If `time_major == True`, this must be a `Tensor` of shape:
-        `[max_time, batch_size, ...]`, or a nested tuple of such
-        elements.
-      This may also be a (possibly nested) tuple of Tensors satisfying
-      this property.  The first two dimensions must match across all the inputs,
-      but otherwise the ranks and other shape components may differ.
-      In this case, input to `cell` at each time-step will replicate the
-      structure of these tuples, except for the time dimension (from which the
-      time is taken).
-      The input to `cell` at each time step will be a `Tensor` or (possibly
-      nested) tuple of Tensors each with dimensions `[batch_size, ...]`.
-    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`.
-      Used to copy-through state and zero-out outputs when past a batch
-      element's sequence length.  So it's more for performance than correctness.
-    initial_state: (optional) An initial state for the RNN.
-      If `cell.state_size` is an integer, this must be
-      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
-      If `cell.state_size` is a tuple, this should be a tuple of
-      tensors having shapes `[batch_size, s] for s in cell.state_size`.
+        `[batch_size, max_time, ...]`, or a nested tuple of such elements.
+      If `time_major == True`, this must be a `Tensor` of shape: `[max_time,
+        batch_size, ...]`, or a nested tuple of such elements. This may also be
+        a (possibly nested) tuple of Tensors satisfying this property.  The
+        first two dimensions must match across all the inputs, but otherwise the
+        ranks and other shape components may differ. In this case, input to
+        `cell` at each time-step will replicate the structure of these tuples,
+        except for the time dimension (from which the time is taken). The input
+        to `cell` at each time step will be a `Tensor` or (possibly nested)
+        tuple of Tensors each with dimensions `[batch_size, ...]`.
+    sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. Used
+      to copy-through state and zero-out outputs when past a batch element's
+      sequence length.  So it's more for performance than correctness.
+    initial_state: (optional) An initial state for the RNN. If `cell.state_size`
+      is an integer, this must be a `Tensor` of appropriate type and shape
+      `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this
+      should be a tuple of tensors having shapes `[batch_size, s] for s in
+      cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
    parallel_iterations: (Default: 32).  The number of iterations to run in
-      parallel.  Those operations which do not have any temporal dependency
-      and can be run in parallel, will be.  This parameter trades off
-      time for space.  Values >> 1 use more memory but take less time,
-      while smaller values use less memory but computations take longer.
+      parallel.  Those operations which do not have any temporal dependency and
+      can be run in parallel, will be.  This parameter trades off time for
+      space.  Values >> 1 use more memory but take less time, while smaller
+      values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
-      but needed for back prop from GPU to CPU.  This allows training RNNs
-      which would typically not fit on a single GPU, with very minimal (or no)
+      but needed for back prop from GPU to CPU.  This allows training RNNs which
+      would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
-    time_major: The shape format of the `inputs` and `outputs` Tensors.
-      If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
-      If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
-      Using `time_major = True` is a bit more efficient because it avoids
-      transposes at the beginning and end of the RNN calculation.  However,
-      most TensorFlow data is batch-major, so by default this function
-      accepts input and emits output in batch-major form.
+    time_major: The shape format of the `inputs` and `outputs` Tensors. If true,
+      these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false,
+      these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using
+      `time_major = True` is a bit more efficient because it avoids transposes
+      at the beginning and end of the RNN calculation.  However, most TensorFlow
+      data is batch-major, so by default this function accepts input and emits
+      output in batch-major form.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
@ -631,7 +662,8 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
            "sequence_length must be a vector of length batch_size, "
            "but saw shape: %s" % sequence_length.get_shape())
      sequence_length = array_ops.identity(  # Just to find it in the graph.
-          sequence_length, name="sequence_length")
+          sequence_length,
+          name="sequence_length")

    batch_size = _best_effort_input_batch_size(flat_input)

@ -650,9 +682,10 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
      x_shape = array_ops.shape(x)
      packed_shape = array_ops.stack(shape)
      return control_flow_ops.Assert(
-          math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)),
-          ["Expected shape for Tensor %s is " % x.name,
-           packed_shape, " but saw shape: ", x_shape])
+          math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), [
+              "Expected shape for Tensor %s is " % x.name, packed_shape,
+              " but saw shape: ", x_shape
+          ])

    if not context.executing_eagerly() and sequence_length is not None:
      # Perform some shape validation
@ -696,8 +729,8 @@ def _dynamic_rnn_loop(cell,
    inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
      tuple of such elements.
    initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
-      `cell.state_size` is a tuple, then this should be a tuple of
-      tensors having shapes `[batch_size, s] for s in cell.state_size`.
+      `cell.state_size` is a tuple, then this should be a tuple of tensors
+      having shapes `[batch_size, s] for s in cell.state_size`.
    parallel_iterations: Positive Python int.
    swap_memory: A Python boolean
    sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
@ -736,8 +769,8 @@ def _dynamic_rnn_loop(cell,
  time_steps = input_shape[0]
  batch_size = _best_effort_input_batch_size(flat_input)

-  inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
-                           for input_ in flat_input)
+  inputs_got_shape = tuple(
+      input_.get_shape().with_rank_at_least(3) for input_ in flat_input)

  const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]

@ -762,10 +795,10 @@ def _dynamic_rnn_loop(cell,
    return array_ops.zeros(
        array_ops.stack(size), _infer_state_dtype(dtype, state))

-  flat_zero_output = tuple(_create_zero_arrays(output)
-                           for output in flat_output_size)
-  zero_output = nest.pack_sequence_as(structure=cell.output_size,
-                                      flat_sequence=flat_zero_output)
+  flat_zero_output = tuple(
+      _create_zero_arrays(output) for output in flat_output_size)
+  zero_output = nest.pack_sequence_as(
+      structure=cell.output_size, flat_sequence=flat_zero_output)

  if sequence_length is not None:
    min_sequence_length = math_ops.reduce_min(sequence_length)
@ -779,19 +812,20 @@ def _dynamic_rnn_loop(cell,
    base_name = scope

  def _create_ta(name, element_shape, dtype):
-    return tensor_array_ops.TensorArray(dtype=dtype,
-                                        size=time_steps,
-                                        element_shape=element_shape,
-                                        tensor_array_name=base_name + name)
+    return tensor_array_ops.TensorArray(
+        dtype=dtype,
+        size=time_steps,
+        element_shape=element_shape,
+        tensor_array_name=base_name + name)

  in_graph_mode = not context.executing_eagerly()
  if in_graph_mode:
    output_ta = tuple(
        _create_ta(
            "output_%d" % i,
-            element_shape=(tensor_shape.TensorShape([const_batch_size])
-                           .concatenate(
-                               _maybe_tensor_shape_from_tensor(out_size))),
+            element_shape=(
+                tensor_shape.TensorShape([const_batch_size]).concatenate(
+                    _maybe_tensor_shape_from_tensor(out_size))),
            dtype=_infer_state_dtype(dtype, state))
        for i, out_size in enumerate(flat_output_size))
    input_ta = tuple(
@ -800,8 +834,8 @@ def _dynamic_rnn_loop(cell,
            element_shape=flat_input_i.shape[1:],
            dtype=flat_input_i.dtype)
        for i, flat_input_i in enumerate(flat_input))
-    input_ta = tuple(ta.unstack(input_)
-                     for ta, input_ in zip(input_ta, flat_input))
+    input_ta = tuple(
+        ta.unstack(input_) for ta, input_ in zip(input_ta, flat_input))
  else:
    output_ta = tuple([0 for _ in range(time_steps.numpy())]
                      for i in range(len(flat_output_size)))
@ -866,8 +900,8 @@ def _dynamic_rnn_loop(cell,
  if in_graph_mode:
    # Make sure that we run at least 1 step, if necessary, to ensure
    # the TensorArrays pick up the dynamic shape.
-    loop_bound = math_ops.minimum(
-        time_steps, math_ops.maximum(1, max_sequence_length))
+    loop_bound = math_ops.minimum(time_steps,
+                                  math_ops.maximum(1, max_sequence_length))
  else:
    # Using max_sequence_length isn't currently supported in the Eager branch.
    loop_bound = time_steps
@ -885,8 +919,9 @@ def _dynamic_rnn_loop(cell,
    final_outputs = tuple(ta.stack() for ta in output_final_ta)
    # Restore some shape information
    for output, output_size in zip(final_outputs, flat_output_size):
-      shape = _concat(
-          [const_time_steps, const_batch_size], output_size, static=True)
+      shape = _concat([const_time_steps, const_batch_size],
+                      output_size,
+                      static=True)
      output.set_shape(shape)
  else:
    final_outputs = output_final_ta
@ -901,8 +936,11 @@ def _dynamic_rnn_loop(cell,


@tf_export(v1=["nn.raw_rnn"])
-def raw_rnn(cell, loop_fn,
-            parallel_iterations=None, swap_memory=False, scope=None):
+def raw_rnn(cell,
+            loop_fn,
+            parallel_iterations=None,
+            swap_memory=False,
+            scope=None):
  """Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`.

  **NOTE: This method is still in testing, and the API may change.**
@ -948,13 +986,14 @@ def raw_rnn(cell, loop_fn,
  A simple implementation of `dynamic_rnn` via `raw_rnn` looks like this:

  ```python
-  inputs = tf.placeholder(shape=(max_time, batch_size, input_depth),
+  inputs = tf.compat.v1.placeholder(shape=(max_time, batch_size, input_depth),
                          dtype=tf.float32)
-  sequence_length = tf.placeholder(shape=(batch_size,), dtype=tf.int32)
+  sequence_length = tf.compat.v1.placeholder(shape=(batch_size,),
+  dtype=tf.int32)
  inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time)
  inputs_ta = inputs_ta.unstack(inputs)

-  cell = tf.contrib.rnn.LSTMCell(num_units)
+  cell = tf.compat.v1.nn.rnn_cell.LSTMCell(num_units)

  def loop_fn(time, cell_output, cell_state, loop_state):
    emit_output = cell_output  # == None for time == 0
@ -978,68 +1017,60 @@ def raw_rnn(cell, loop_fn,

  Args:
    cell: An instance of RNNCell.
-    loop_fn: A callable that takes inputs
-      `(time, cell_output, cell_state, loop_state)`
-      and returns the tuple
-      `(finished, next_input, next_cell_state, emit_output, next_loop_state)`.
-      Here `time` is an int32 scalar `Tensor`, `cell_output` is a
-      `Tensor` or (possibly nested) tuple of tensors as determined by
-      `cell.output_size`, and `cell_state` is a `Tensor`
-      or (possibly nested) tuple of tensors, as determined by the `loop_fn`
-      on its first call (and should match `cell.state_size`).
+    loop_fn: A callable that takes inputs `(time, cell_output, cell_state,
+      loop_state)` and returns the tuple `(finished, next_input,
+      next_cell_state, emit_output, next_loop_state)`. Here `time` is an int32
+      scalar `Tensor`, `cell_output` is a `Tensor` or (possibly nested) tuple of
+      tensors as determined by `cell.output_size`, and `cell_state` is a
+      `Tensor` or (possibly nested) tuple of tensors, as determined by the
+      `loop_fn` on its first call (and should match `cell.state_size`).
      The outputs are: `finished`, a boolean `Tensor` of
      shape `[batch_size]`, `next_input`: the next input to feed to `cell`,
      `next_cell_state`: the next state to feed to `cell`,
-      and `emit_output`: the output to store for this iteration.
-
-      Note that `emit_output` should be a `Tensor` or (possibly nested)
-      tuple of tensors which is aggregated in the `emit_ta` inside the
-      `while_loop`. For the first call to `loop_fn`, the `emit_output`
-      corresponds to the `emit_structure` which is then used to determine the
-      size of the `zero_tensor` for the `emit_ta` (defaults to
-      `cell.output_size`). For the subsequent calls to the `loop_fn`, the
-      `emit_output` corresponds to the actual output tensor
-      that is to be aggregated in the `emit_ta`. The parameter `cell_state`
-      and output `next_cell_state` may be either a single or (possibly nested)
-      tuple of tensors.  The parameter `loop_state` and
-      output `next_loop_state` may be either a single or (possibly nested) tuple
-      of `Tensor` and `TensorArray` objects.  This last parameter
-      may be ignored by `loop_fn` and the return value may be `None`.  If it
-      is not `None`, then the `loop_state` will be propagated through the RNN
-      loop, for use purely by `loop_fn` to keep track of its own state.
-      The `next_loop_state` parameter returned may be `None`.
-
-      The first call to `loop_fn` will be `time = 0`, `cell_output = None`,
-      `cell_state = None`, and `loop_state = None`.  For this call:
-      The `next_cell_state` value should be the value with which to initialize
-      the cell's state.  It may be a final state from a previous RNN or it
-      may be the output of `cell.zero_state()`.  It should be a
-      (possibly nested) tuple structure of tensors.
-      If `cell.state_size` is an integer, this must be
-      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
-      If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
-      appropriate type and shape `[batch_size] + cell.state_size`.
-      If `cell.state_size` is a (possibly nested) tuple of ints or
-      `TensorShape`, this will be a tuple having the corresponding shapes.
-      The `emit_output` value may be either `None` or a (possibly nested)
-      tuple structure of tensors, e.g.,
-      `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`.
-      If this first `emit_output` return value is `None`,
-      then the `emit_ta` result of `raw_rnn` will have the same structure and
-      dtypes as `cell.output_size`.  Otherwise `emit_ta` will have the same
-      structure, shapes (prepended with a `batch_size` dimension), and dtypes
-      as `emit_output`.  The actual values returned for `emit_output` at this
-      initializing call are ignored.  Note, this emit structure must be
-      consistent across all time steps.
-
+      and `emit_output`: the output to store for this iteration.  Note that
+        `emit_output` should be a `Tensor` or (possibly nested) tuple of tensors
+        which is aggregated in the `emit_ta` inside the `while_loop`. For the
+        first call to `loop_fn`, the `emit_output` corresponds to the
+        `emit_structure` which is then used to determine the size of the
+        `zero_tensor` for the `emit_ta` (defaults to `cell.output_size`). For
+        the subsequent calls to the `loop_fn`, the `emit_output` corresponds to
+        the actual output tensor that is to be aggregated in the `emit_ta`. The
+        parameter `cell_state` and output `next_cell_state` may be either a
+        single or (possibly nested) tuple of tensors.  The parameter
+        `loop_state` and output `next_loop_state` may be either a single or
+        (possibly nested) tuple of `Tensor` and `TensorArray` objects.  This
+        last parameter may be ignored by `loop_fn` and the return value may be
+        `None`.  If it is not `None`, then the `loop_state` will be propagated
+        through the RNN loop, for use purely by `loop_fn` to keep track of its
+        own state. The `next_loop_state` parameter returned may be `None`.  The
+        first call to `loop_fn` will be `time = 0`, `cell_output = None`,
+      `cell_state = None`, and `loop_state = None`.  For this call: The
+        `next_cell_state` value should be the value with which to initialize the
+        cell's state.  It may be a final state from a previous RNN or it may be
+        the output of `cell.zero_state()`.  It should be a (possibly nested)
+        tuple structure of tensors. If `cell.state_size` is an integer, this
+        must be a `Tensor` of appropriate type and shape `[batch_size,
+        cell.state_size]`. If `cell.state_size` is a `TensorShape`, this must be
+        a `Tensor` of appropriate type and shape `[batch_size] +
+        cell.state_size`. If `cell.state_size` is a (possibly nested) tuple of
+        ints or `TensorShape`, this will be a tuple having the corresponding
+        shapes. The `emit_output` value may be either `None` or a (possibly
+        nested) tuple structure of tensors, e.g., `(tf.zeros(shape_0,
+        dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`. If this first
+        `emit_output` return value is `None`, then the `emit_ta` result of
+        `raw_rnn` will have the same structure and dtypes as `cell.output_size`.
+        Otherwise `emit_ta` will have the same structure, shapes (prepended with
+        a `batch_size` dimension), and dtypes as `emit_output`.  The actual
+        values returned for `emit_output` at this initializing call are ignored.
+        Note, this emit structure must be consistent across all time steps.
    parallel_iterations: (Default: 32).  The number of iterations to run in
-      parallel.  Those operations which do not have any temporal dependency
-      and can be run in parallel, will be.  This parameter trades off
-      time for space.  Values >> 1 use more memory but take less time,
-      while smaller values use less memory but computations take longer.
+      parallel.  Those operations which do not have any temporal dependency and
+      can be run in parallel, will be.  This parameter trades off time for
+      space.  Values >> 1 use more memory but take less time, while smaller
+      values use less memory but computations take longer.
    swap_memory: Transparently swap the tensors produced in forward inference
-      but needed for back prop from GPU to CPU.  This allows training RNNs
-      which would typically not fit on a single GPU, with very minimal (or no)
+      but needed for back prop from GPU to CPU.  This allows training RNNs which
+      would typically not fit on a single GPU, with very minimal (or no)
      performance penalty.
    scope: VariableScope for the created subgraph; defaults to "rnn".

@ -1086,14 +1117,15 @@ def raw_rnn(cell, loop_fn,
        varscope.set_caching_device(lambda op: op.device)

    time = constant_op.constant(0, dtype=dtypes.int32)
-    (elements_finished, next_input, initial_state, emit_structure,
-     init_loop_state) = loop_fn(
+    (elements_finished, next_input,
+     initial_state, emit_structure, init_loop_state) = loop_fn(
         time, None, None, None)  # time, cell_output, cell_state, loop_state
    flat_input = nest.flatten(next_input)

    # Need a surrogate loop state for the while_loop if none is available.
-    loop_state = (init_loop_state if init_loop_state is not None
-                  else constant_op.constant(0, dtype=dtypes.int32))
+    loop_state = (
+        init_loop_state if init_loop_state is not None else
+        constant_op.constant(0, dtype=dtypes.int32))

    input_shape = [input_.get_shape() for input_ in flat_input]
    static_batch_size = tensor_shape.dimension_at_index(input_shape[0], 0)
@ -1112,13 +1144,14 @@ def raw_rnn(cell, loop_fn,
    state = initial_state
    flat_state = nest.flatten(state)
    flat_state = [ops.convert_to_tensor(s) for s in flat_state]
-    state = nest.pack_sequence_as(structure=state,
-                                  flat_sequence=flat_state)
+    state = nest.pack_sequence_as(structure=state, flat_sequence=flat_state)

    if emit_structure is not None:
      flat_emit_structure = nest.flatten(emit_structure)
-      flat_emit_size = [emit.shape if emit.shape.is_fully_defined() else
-                        array_ops.shape(emit) for emit in flat_emit_structure]
+      flat_emit_size = [
+          emit.shape if emit.shape.is_fully_defined() else array_ops.shape(emit)
+          for emit in flat_emit_structure
+      ]
      flat_emit_dtypes = [emit.dtype for emit in flat_emit_structure]
    else:
      emit_structure = cell.output_size
@ -1129,26 +1162,28 @@ def raw_rnn(cell, loop_fn,
        tensor_array_ops.TensorArray(
            dtype=dtype_i,
            dynamic_size=True,
-            element_shape=(tensor_shape.TensorShape([const_batch_size])
-                           .concatenate(
-                               _maybe_tensor_shape_from_tensor(size_i))),
+            element_shape=(tensor_shape.TensorShape([
+                const_batch_size
+            ]).concatenate(_maybe_tensor_shape_from_tensor(size_i))),
            size=0,
            name="rnn_output_%d" % i)
-        for i, (dtype_i, size_i)
-        in enumerate(zip(flat_emit_dtypes, flat_emit_size))]
-    emit_ta = nest.pack_sequence_as(structure=emit_structure,
-                                    flat_sequence=flat_emit_ta)
+        for i, (dtype_i,
+                size_i) in enumerate(zip(flat_emit_dtypes, flat_emit_size))
+    ]
+    emit_ta = nest.pack_sequence_as(
+        structure=emit_structure, flat_sequence=flat_emit_ta)
    flat_zero_emit = [
        array_ops.zeros(_concat(batch_size, size_i), dtype_i)
-        for size_i, dtype_i in zip(flat_emit_size, flat_emit_dtypes)]
-    zero_emit = nest.pack_sequence_as(structure=emit_structure,
-                                      flat_sequence=flat_zero_emit)
+        for size_i, dtype_i in zip(flat_emit_size, flat_emit_dtypes)
+    ]
+    zero_emit = nest.pack_sequence_as(
+        structure=emit_structure, flat_sequence=flat_zero_emit)

    def condition(unused_time, elements_finished, *_):
      return math_ops.logical_not(math_ops.reduce_all(elements_finished))

-    def body(time, elements_finished, current_input,
-             emit_ta, state, loop_state):
+    def body(time, elements_finished, current_input, emit_ta, state,
+             loop_state):
      """Internal while loop body for raw_rnn.

      Args:
@ -1169,8 +1204,8 @@ def raw_rnn(cell, loop_fn,

      next_time = time + 1
      (next_finished, next_input, next_state, emit_output,
-       next_loop_state) = loop_fn(
-           next_time, next_output, cell_state, loop_state)
+       next_loop_state) = loop_fn(next_time, next_output, cell_state,
+                                  loop_state)

      nest.assert_same_structure(state, next_state)
      nest.assert_same_structure(current_input, next_input)
@ -1182,6 +1217,7 @@ def raw_rnn(cell, loop_fn,

      def _copy_some_through(current, candidate):
        """Copy some tensors through via array_ops.where."""
+
        def copy_fn(cur_i, cand_i):
          # TensorArray and scalar get passed through.
          if isinstance(cur_i, tensor_array_ops.TensorArray):
@ -1191,23 +1227,26 @@ def raw_rnn(cell, loop_fn,
          # Otherwise propagate the old or the new value.
          with ops.colocate_with(cand_i):
            return array_ops.where(elements_finished, cur_i, cand_i)
+
        return nest.map_structure(copy_fn, current, candidate)

      emit_output = _copy_some_through(zero_emit, emit_output)
      next_state = _copy_some_through(state, next_state)

-      emit_ta = nest.map_structure(
-          lambda ta, emit: ta.write(time, emit), emit_ta, emit_output)
+      emit_ta = nest.map_structure(lambda ta, emit: ta.write(time, emit),
+                                   emit_ta, emit_output)

      elements_finished = math_ops.logical_or(elements_finished, next_finished)

-      return (next_time, elements_finished, next_input,
-              emit_ta, next_state, loop_state)
+      return (next_time, elements_finished, next_input, emit_ta, next_state,
+              loop_state)

    returned = control_flow_ops.while_loop(
-        condition, body, loop_vars=[
-            time, elements_finished, next_input,
-            emit_ta, state, loop_state],
+        condition,
+        body,
+        loop_vars=[
+            time, elements_finished, next_input, emit_ta, state, loop_state
+        ],
        parallel_iterations=parallel_iterations,
        swap_memory=swap_memory)

@ -1219,9 +1258,9 @@ def raw_rnn(cell, loop_fn,
    return (emit_ta, final_state, final_loop_state)


-@deprecation.deprecated(
-    None, "Please use `keras.layers.RNN(cell, unroll=True)`, "
-    "which is equivalent to this API")
+@deprecation.deprecated(None,
+                        "Please use `keras.layers.RNN(cell, unroll=True)`, "
+                        "which is equivalent to this API")
@tf_export(v1=["nn.static_rnn"])
 def static_rnn(cell,
               inputs,
@ -1261,18 +1300,18 @@ def static_rnn(cell,

  Args:
    cell: An instance of RNNCell.
-    inputs: A length T list of inputs, each a `Tensor` of shape
-      `[batch_size, input_size]`, or a nested tuple of such elements.
-    initial_state: (optional) An initial state for the RNN.
-      If `cell.state_size` is an integer, this must be
-      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
-      If `cell.state_size` is a tuple, this should be a tuple of
-      tensors having shapes `[batch_size, s] for s in cell.state_size`.
+    inputs: A length T list of inputs, each a `Tensor` of shape `[batch_size,
+      input_size]`, or a nested tuple of such elements.
+    initial_state: (optional) An initial state for the RNN. If `cell.state_size`
+      is an integer, this must be a `Tensor` of appropriate type and shape
+      `[batch_size, cell.state_size]`. If `cell.state_size` is a tuple, this
+      should be a tuple of tensors having shapes `[batch_size, s] for s in
+      cell.state_size`.
    dtype: (optional) The data type for the initial state and expected output.
      Required if initial_state is not provided or RNN state has a heterogeneous
      dtype.
-    sequence_length: Specifies the length of each sequence in inputs.
-      An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
+    sequence_length: Specifies the length of each sequence in inputs. An int32
+      or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
@ -1356,9 +1395,10 @@ def static_rnn(cell,
        size = _concat(batch_size, output_size)
        output = array_ops.zeros(
            array_ops.stack(size), _infer_state_dtype(dtype, state))
-        shape = _concat(tensor_shape.dimension_value(fixed_batch_size),
-                        output_size,
-                        static=True)
+        shape = _concat(
+            tensor_shape.dimension_value(fixed_batch_size),
+            output_size,
+            static=True)
        output.set_shape(tensor_shape.TensorShape(shape))
        return output

@ -1403,9 +1443,9 @@ def static_rnn(cell,
    return (outputs, state)


-@deprecation.deprecated(
-    None, "Please use `keras.layers.RNN(cell, stateful=True)`, "
-    "which is equivalent to this API")
+@deprecation.deprecated(None,
+                        "Please use `keras.layers.RNN(cell, stateful=True)`, "
+                        "which is equivalent to this API")
@tf_export(v1=["nn.static_state_saving_rnn"])
 def static_state_saving_rnn(cell,
                            inputs,
@ -1417,16 +1457,15 @@ def static_state_saving_rnn(cell,

  Args:
    cell: An instance of `RNNCell`.
-    inputs: A length T list of inputs, each a `Tensor` of shape
-      `[batch_size, input_size]`.
+    inputs: A length T list of inputs, each a `Tensor` of shape `[batch_size,
+      input_size]`.
    state_saver: A state saver object with methods `state` and `save_state`.
    state_name: Python string or tuple of strings.  The name to use with the
-      state_saver. If the cell returns tuples of states (i.e.,
-      `cell.state_size` is a tuple) then `state_name` should be a tuple of
-      strings having the same length as `cell.state_size`.  Otherwise it should
-      be a single string.
-    sequence_length: (optional) An int32/int64 vector size [batch_size].
-      See the documentation for rnn() for more details about sequence_length.
+      state_saver. If the cell returns tuples of states (i.e., `cell.state_size`
+      is a tuple) then `state_name` should be a tuple of strings having the same
+      length as `cell.state_size`.  Otherwise it should be a single string.
+    sequence_length: (optional) An int32/int64 vector size [batch_size]. See the
+      documentation for rnn() for more details about sequence_length.
    scope: VariableScope for the created subgraph; defaults to "rnn".

  Returns:
@ -1445,8 +1484,8 @@ def static_state_saving_rnn(cell,

  if state_is_tuple != state_name_tuple:
    raise ValueError("state_name should be the same type as cell.state_size.  "
-                     "state_name: %s, cell.state_size: %s" % (str(state_name),
-                                                              str(state_size)))
+                     "state_name: %s, cell.state_size: %s" %
+                     (str(state_name), str(state_size)))

  if state_is_tuple:
    state_name_flat = nest.flatten(state_name)
@ -1524,17 +1563,17 @@ def static_bidirectional_rnn(cell_fw,
  Args:
    cell_fw: An instance of RNNCell, to be used for forward direction.
    cell_bw: An instance of RNNCell, to be used for backward direction.
-    inputs: A length T list of inputs, each a tensor of shape
-      [batch_size, input_size], or a nested tuple of such elements.
-    initial_state_fw: (optional) An initial state for the forward RNN.
-      This must be a tensor of appropriate type and shape
-      `[batch_size, cell_fw.state_size]`.
-      If `cell_fw.state_size` is a tuple, this should be a tuple of
-      tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
-    initial_state_bw: (optional) Same as for `initial_state_fw`, but using
-      the corresponding properties of `cell_bw`.
-    dtype: (optional) The data type for the initial state.  Required if
-      either of the initial states are not provided.
+    inputs: A length T list of inputs, each a tensor of shape [batch_size,
+      input_size], or a nested tuple of such elements.
+    initial_state_fw: (optional) An initial state for the forward RNN. This must
+      be a tensor of appropriate type and shape `[batch_size,
+      cell_fw.state_size]`. If `cell_fw.state_size` is a tuple, this should be a
+      tuple of tensors having shapes `[batch_size, s] for s in
+      cell_fw.state_size`.
+    initial_state_bw: (optional) Same as for `initial_state_fw`, but using the
+      corresponding properties of `cell_bw`.
+    dtype: (optional) The data type for the initial state.  Required if either
+      of the initial states are not provided.
    sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
      containing the actual lengths for each of the sequences.
    scope: VariableScope for the created subgraph; defaults to
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@ -56,7 +56,6 @@ from tensorflow.python.util import nest
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export

-
 _BIAS_VARIABLE_NAME = "bias"
 _WEIGHTS_VARIABLE_NAME = "kernel"

@ -81,8 +80,8 @@ def assert_like_rnncell(cell_name, cell):
  ASSERT_LIKE_RNNCELL_ERROR_REGEXP.

  Args:
-    cell_name: A string to give a meaningful error referencing to the name
-      of the functionargument.
+    cell_name: A string to give a meaningful error referencing to the name of
+      the functionargument.
    cell: The object which should behave like an RNNCell.

  Raises:
@ -95,8 +94,7 @@ def assert_like_rnncell(cell_name, cell):
      callable(cell),
  ]
  errors = [
-      "'output_size' property is missing",
-      "'state_size' property is missing",
+      "'output_size' property is missing", "'state_size' property is missing",
      "either 'zero_state' or 'get_initial_state' method is required",
      "is not callable"
  ]
@ -141,8 +139,9 @@ def _concat(prefix, suffix, static=False):
  else:
    p = tensor_shape.as_shape(prefix)
    p_static = p.as_list() if p.ndims is not None else None
-    p = (constant_op.constant(p.as_list(), dtype=dtypes.int32)
-         if p.is_fully_defined() else None)
+    p = (
+        constant_op.constant(p.as_list(), dtype=dtypes.int32)
+        if p.is_fully_defined() else None)
  if isinstance(suffix, ops.Tensor):
    s = suffix
    s_static = tensor_util.constant_value(suffix)
@ -154,22 +153,24 @@ def _concat(prefix, suffix, static=False):
  else:
    s = tensor_shape.as_shape(suffix)
    s_static = s.as_list() if s.ndims is not None else None
-    s = (constant_op.constant(s.as_list(), dtype=dtypes.int32)
-         if s.is_fully_defined() else None)
+    s = (
+        constant_op.constant(s.as_list(), dtype=dtypes.int32)
+        if s.is_fully_defined() else None)

  if static:
    shape = tensor_shape.as_shape(p_static).concatenate(s_static)
    shape = shape.as_list() if shape.ndims is not None else None
  else:
    if p is None or s is None:
-      raise ValueError("Provided a prefix or suffix of None: %s and %s"
-                       % (prefix, suffix))
+      raise ValueError("Provided a prefix or suffix of None: %s and %s" %
+                       (prefix, suffix))
    shape = array_ops.concat((p, s), 0)
  return shape


 def _zero_state_tensors(state_size, batch_size, dtype):
  """Create tensors of zeros based on state_size, batch_size, and dtype."""
+
  def get_state_shape(s):
    """Combine s with batch_size to get a proper tensor shape."""
    c = _concat(batch_size, s)
@ -178,6 +179,7 @@ def _zero_state_tensors(state_size, batch_size, dtype):
      c_static = _concat(batch_size, s, static=True)
      size.set_shape(c_static)
    return size
+
  return nest.map_structure(get_state_shape, state_size)


@ -220,8 +222,8 @@ class RNNCell(base_layer.Layer):
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
        with shape `[batch_size, self.state_size]`.  Otherwise, if
-        `self.state_size` is a tuple of integers, this should be a tuple
-        with shapes `[batch_size, s] for s in self.state_size`.
+        `self.state_size` is a tuple of integers, this should be a tuple with
+        shapes `[batch_size, s] for s in self.state_size`.
      scope: VariableScope for the created subgraph; defaults to class name.

    Returns:
@ -232,15 +234,15 @@ class RNNCell(base_layer.Layer):
        the arity and shapes of `state`.
    """
    if scope is not None:
-      with vs.variable_scope(scope,
-                             custom_getter=self._rnn_get_variable) as scope:
+      with vs.variable_scope(
+          scope, custom_getter=self._rnn_get_variable) as scope:
        return super(RNNCell, self).__call__(inputs, state, scope=scope)
    else:
      scope_attrname = "rnncell_scope"
      scope = getattr(self, scope_attrname, None)
      if scope is None:
-        scope = vs.variable_scope(vs.get_variable_scope(),
-                                  custom_getter=self._rnn_get_variable)
+        scope = vs.variable_scope(
+            vs.get_variable_scope(), custom_getter=self._rnn_get_variable)
        setattr(self, scope_attrname, scope)
      with scope:
        return super(RNNCell, self).__call__(inputs, state)
@ -331,8 +333,7 @@ class RNNCell(base_layer.Layer):
    if is_eager and _hasattr(self, "_last_zero_state"):
      (last_state_size, last_batch_size, last_dtype,
       last_output) = getattr(self, "_last_zero_state")
-      if (last_batch_size == batch_size and
-          last_dtype == dtype and
+      if (last_batch_size == batch_size and last_dtype == dtype and
          last_state_size == state_size):
        return last_output
    with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
@ -346,14 +347,15 @@ class LayerRNNCell(RNNCell):
  """Subclass of RNNCells that act like proper `tf.Layer` objects.

  For backwards compatibility purposes, most `RNNCell` instances allow their
-  `call` methods to instantiate variables via `tf.get_variable`.  The underlying
+  `call` methods to instantiate variables via `tf.compat.v1.get_variable`.  The
+  underlying
  variable scope thus keeps track of any variables, and returning cached
  versions.  This is atypical of `tf.layer` objects, which separate this
  part of layer building into a `build` method that is only called once.

  Here we provide a subclass for `RNNCell` objects that act exactly as
  `Layer` objects do.  They must provide a `build` method and their
-  `call` methods do not access Variables `tf.get_variable`.
+  `call` methods do not access Variables `tf.compat.v1.get_variable`.
  """

  def __call__(self, inputs, state, scope=None, *args, **kwargs):
@ -363,8 +365,8 @@ class LayerRNNCell(RNNCell):
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
        with shape `[batch_size, self.state_size]`.  Otherwise, if
-        `self.state_size` is a tuple of integers, this should be a tuple
-        with shapes `[batch_size, s] for s in self.state_size`.
+        `self.state_size` is a tuple of integers, this should be a tuple with
+        shapes `[batch_size, s] for s in self.state_size`.
      scope: optional cell scope.
      *args: Additional positional arguments.
      **kwargs: Additional keyword arguments.
@ -379,8 +381,8 @@ class LayerRNNCell(RNNCell):
    # Bypass RNNCell's variable capturing semantics for LayerRNNCell.
    # Instead, it is up to subclasses to provide a proper build
    # method.  See the class docstring for more details.
-    return base_layer.Layer.__call__(self, inputs, state, scope=scope,
-                                     *args, **kwargs)
+    return base_layer.Layer.__call__(
+        self, inputs, state, scope=scope, *args, **kwargs)


@tf_export(v1=["nn.rnn_cell.BasicRNNCell"])
@ -394,20 +396,19 @@ class BasicRNNCell(LayerRNNCell):
    num_units: int, The number of units in the RNN cell.
    activation: Nonlinearity to use.  Default: `tanh`. It could also be string
      that is within Keras activation function names.
-    reuse: (optional) Python boolean describing whether to reuse variables
-     in an existing scope.  If not `True`, and the existing scope already has
-     the given variables, an error is raised.
-    name: String, the name of the layer. Layers with the same name will
-      share weights, but to avoid mistakes we require reuse=True in such
-      cases.
-    dtype: Default dtype of the layer (default of `None` means use the type
-      of the first input). Required when `build` is called before `call`.
+    reuse: (optional) Python boolean describing whether to reuse variables in an
+      existing scope.  If not `True`, and the existing scope already has the
+      given variables, an error is raised.
+    name: String, the name of the layer. Layers with the same name will share
+      weights, but to avoid mistakes we require reuse=True in such cases.
+    dtype: Default dtype of the layer (default of `None` means use the type of
+      the first input). Required when `build` is called before `call`.
    **kwargs: Dict, keyword named properties for common layer attributes, like
      `trainable` etc when constructing the cell from configs of get_config().
  """

  @deprecated(None, "This class is equivalent as tf.keras.layers.SimpleRNNCell,"
-                    " and will be replaced by that in Tensorflow 2.0.")
+              " and will be replaced by that in Tensorflow 2.0.")
  def __init__(self,
               num_units,
               activation=None,
@ -419,9 +420,10 @@ class BasicRNNCell(LayerRNNCell):
        _reuse=reuse, name=name, dtype=dtype, **kwargs)
    _check_supported_dtypes(self.dtype)
    if context.executing_eagerly() and context.num_gpus() > 0:
-      logging.warn("%s: Note that this cell is not optimized for performance. "
-                   "Please use tf.contrib.cudnn_rnn.CudnnRNNTanh for better "
-                   "performance on GPU.", self)
+      logging.warn(
+          "%s: Note that this cell is not optimized for performance. "
+          "Please use tf.contrib.cudnn_rnn.CudnnRNNTanh for better "
+          "performance on GPU.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = input_spec.InputSpec(ndim=2)
@ -443,8 +445,8 @@ class BasicRNNCell(LayerRNNCell):
  @tf_utils.shape_type_conversion
  def build(self, inputs_shape):
    if inputs_shape[-1] is None:
-      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(inputs_shape))
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
+                       str(inputs_shape))
    _check_supported_dtypes(self.dtype)

    input_depth = inputs_shape[-1]
@ -479,7 +481,9 @@ class BasicRNNCell(LayerRNNCell):

@tf_export(v1=["nn.rnn_cell.GRUCell"])
 class GRUCell(LayerRNNCell):
-  """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
+  """Gated Recurrent Unit cell (cf.
+
+  http://arxiv.org/abs/1406.1078).

  Note that this cell is not optimized for performance. Please use
  `tf.contrib.cudnn_rnn.CudnnGRU` for better performance on GPU, or
@ -488,23 +492,22 @@ class GRUCell(LayerRNNCell):
  Args:
    num_units: int, The number of units in the GRU cell.
    activation: Nonlinearity to use.  Default: `tanh`.
-    reuse: (optional) Python boolean describing whether to reuse variables
-     in an existing scope.  If not `True`, and the existing scope already has
-     the given variables, an error is raised.
+    reuse: (optional) Python boolean describing whether to reuse variables in an
+      existing scope.  If not `True`, and the existing scope already has the
+      given variables, an error is raised.
    kernel_initializer: (optional) The initializer to use for the weight and
-    projection matrices.
+      projection matrices.
    bias_initializer: (optional) The initializer to use for the bias.
-    name: String, the name of the layer. Layers with the same name will
-      share weights, but to avoid mistakes we require reuse=True in such
-      cases.
-    dtype: Default dtype of the layer (default of `None` means use the type
-      of the first input). Required when `build` is called before `call`.
+    name: String, the name of the layer. Layers with the same name will share
+      weights, but to avoid mistakes we require reuse=True in such cases.
+    dtype: Default dtype of the layer (default of `None` means use the type of
+      the first input). Required when `build` is called before `call`.
    **kwargs: Dict, keyword named properties for common layer attributes, like
      `trainable` etc when constructing the cell from configs of get_config().
  """

  @deprecated(None, "This class is equivalent as tf.keras.layers.GRUCell,"
-                    " and will be replaced by that in Tensorflow 2.0.")
+              " and will be replaced by that in Tensorflow 2.0.")
  def __init__(self,
               num_units,
               activation=None,
@ -519,9 +522,10 @@ class GRUCell(LayerRNNCell):
    _check_supported_dtypes(self.dtype)

    if context.executing_eagerly() and context.num_gpus() > 0:
-      logging.warn("%s: Note that this cell is not optimized for performance. "
-                   "Please use tf.contrib.cudnn_rnn.CudnnGRU for better "
-                   "performance on GPU.", self)
+      logging.warn(
+          "%s: Note that this cell is not optimized for performance. "
+          "Please use tf.contrib.cudnn_rnn.CudnnGRU for better "
+          "performance on GPU.", self)
    # Inputs must be 2-dimensional.
    self.input_spec = input_spec.InputSpec(ndim=2)

@ -544,8 +548,8 @@ class GRUCell(LayerRNNCell):
  @tf_utils.shape_type_conversion
  def build(self, inputs_shape):
    if inputs_shape[-1] is None:
-      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(inputs_shape))
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
+                       str(inputs_shape))
    _check_supported_dtypes(self.dtype)
    input_depth = inputs_shape[-1]
    self._gate_kernel = self.add_variable(
@ -555,10 +559,9 @@ class GRUCell(LayerRNNCell):
    self._gate_bias = self.add_variable(
        "gates/%s" % _BIAS_VARIABLE_NAME,
        shape=[2 * self._num_units],
-        initializer=(
-            self._bias_initializer
-            if self._bias_initializer is not None
-            else init_ops.constant_initializer(1.0, dtype=self.dtype)))
+        initializer=(self._bias_initializer
+                     if self._bias_initializer is not None else
+                     init_ops.constant_initializer(1.0, dtype=self.dtype)))
    self._candidate_kernel = self.add_variable(
        "candidate/%s" % _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + self._num_units, self._num_units],
@ -566,10 +569,9 @@ class GRUCell(LayerRNNCell):
    self._candidate_bias = self.add_variable(
        "candidate/%s" % _BIAS_VARIABLE_NAME,
        shape=[self._num_units],
-        initializer=(
-            self._bias_initializer
-            if self._bias_initializer is not None
-            else init_ops.zeros_initializer(dtype=self.dtype)))
+        initializer=(self._bias_initializer
+                     if self._bias_initializer is not None else
+                     init_ops.zeros_initializer(dtype=self.dtype)))

    self.built = True

@ -631,7 +633,7 @@ class LSTMStateTuple(_LSTMStateTuple):

@tf_export(v1=["nn.rnn_cell.BasicLSTMCell"])
 class BasicLSTMCell(LayerRNNCell):
-  """DEPRECATED: Please use `tf.nn.rnn_cell.LSTMCell` instead.
+  """DEPRECATED: Please use `tf.compat.v1.nn.rnn_cell.LSTMCell` instead.

  Basic LSTM recurrent network cell.

@ -643,7 +645,7 @@ class BasicLSTMCell(LayerRNNCell):
  It does not allow cell clipping, a projection layer, and does not
  use peep-hole connections: it is the basic baseline.

-  For advanced models, please use the full `tf.nn.rnn_cell.LSTMCell`
+  For advanced models, please use the full `tf.compat.v1.nn.rnn_cell.LSTMCell`
  that follows.

  Note that this cell is not optimized for performance. Please use
@ -653,7 +655,7 @@ class BasicLSTMCell(LayerRNNCell):
  """

  @deprecated(None, "This class is equivalent as tf.keras.layers.LSTMCell,"
-                    " and will be replaced by that in Tensorflow 2.0.")
+              " and will be replaced by that in Tensorflow 2.0.")
  def __init__(self,
               num_units,
               forget_bias=1.0,
@ -667,38 +669,37 @@ class BasicLSTMCell(LayerRNNCell):

    Args:
      num_units: int, The number of units in the LSTM cell.
-      forget_bias: float, The bias added to forget gates (see above).
-        Must set to `0.0` manually when restoring from CudnnLSTM-trained
-        checkpoints.
-      state_is_tuple: If True, accepted and returned states are 2-tuples of
-        the `c_state` and `m_state`.  If False, they are concatenated
-        along the column axis.  The latter behavior will soon be deprecated.
+      forget_bias: float, The bias added to forget gates (see above). Must set
+        to `0.0` manually when restoring from CudnnLSTM-trained checkpoints.
+      state_is_tuple: If True, accepted and returned states are 2-tuples of the
+        `c_state` and `m_state`.  If False, they are concatenated along the
+        column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`. It
        could also be string that is within Keras activation function names.
-      reuse: (optional) Python boolean describing whether to reuse variables
-        in an existing scope.  If not `True`, and the existing scope already has
+      reuse: (optional) Python boolean describing whether to reuse variables in
+        an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
-      name: String, the name of the layer. Layers with the same name will
-        share weights, but to avoid mistakes we require reuse=True in such
-        cases.
-      dtype: Default dtype of the layer (default of `None` means use the type
-        of the first input). Required when `build` is called before `call`.
+      name: String, the name of the layer. Layers with the same name will share
+        weights, but to avoid mistakes we require reuse=True in such cases.
+      dtype: Default dtype of the layer (default of `None` means use the type of
+        the first input). Required when `build` is called before `call`.
      **kwargs: Dict, keyword named properties for common layer attributes, like
        `trainable` etc when constructing the cell from configs of get_config().
-
-      When restoring from CudnnLSTM-trained checkpoints, must use
-      `CudnnCompatibleLSTMCell` instead.
+        When restoring from CudnnLSTM-trained checkpoints, must use
+        `CudnnCompatibleLSTMCell` instead.
    """
    super(BasicLSTMCell, self).__init__(
        _reuse=reuse, name=name, dtype=dtype, **kwargs)
    _check_supported_dtypes(self.dtype)
    if not state_is_tuple:
-      logging.warn("%s: Using a concatenated state is slower and will soon be "
-                   "deprecated.  Use state_is_tuple=True.", self)
+      logging.warn(
+          "%s: Using a concatenated state is slower and will soon be "
+          "deprecated.  Use state_is_tuple=True.", self)
    if context.executing_eagerly() and context.num_gpus() > 0:
-      logging.warn("%s: Note that this cell is not optimized for performance. "
-                   "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
-                   "performance on GPU.", self)
+      logging.warn(
+          "%s: Note that this cell is not optimized for performance. "
+          "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
+          "performance on GPU.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = input_spec.InputSpec(ndim=2)
@ -723,8 +724,8 @@ class BasicLSTMCell(LayerRNNCell):
  @tf_utils.shape_type_conversion
  def build(self, inputs_shape):
    if inputs_shape[-1] is None:
-      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(inputs_shape))
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
+                       str(inputs_shape))
    _check_supported_dtypes(self.dtype)
    input_depth = inputs_shape[-1]
    h_depth = self._num_units
@ -743,10 +744,9 @@ class BasicLSTMCell(LayerRNNCell):

    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
-      state: An `LSTMStateTuple` of state tensors, each shaped
-        `[batch_size, num_units]`, if `state_is_tuple` has been set to
-        `True`.  Otherwise, a `Tensor` shaped
-        `[batch_size, 2 * num_units]`.
+      state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size,
+        num_units]`, if `state_is_tuple` has been set to `True`.  Otherwise, a
+        `Tensor` shaped `[batch_size, 2 * num_units]`.

    Returns:
      A pair containing the new hidden state, and the new state (either a
@ -776,8 +776,9 @@ class BasicLSTMCell(LayerRNNCell):
    # performance improvement. So using those at the cost of readability.
    add = math_ops.add
    multiply = math_ops.multiply
-    new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
-                multiply(sigmoid(i), self._activation(j)))
+    new_c = add(
+        multiply(c, sigmoid(add(f, forget_bias_tensor))),
+        multiply(sigmoid(i), self._activation(j)))
    new_h = multiply(self._activation(new_c), sigmoid(o))

    if self._state_is_tuple:
@ -827,13 +828,23 @@ class LSTMCell(LayerRNNCell):
  """

  @deprecated(None, "This class is equivalent as tf.keras.layers.LSTMCell,"
-                    " and will be replaced by that in Tensorflow 2.0.")
-  def __init__(self, num_units,
-               use_peepholes=False, cell_clip=None,
-               initializer=None, num_proj=None, proj_clip=None,
-               num_unit_shards=None, num_proj_shards=None,
-               forget_bias=1.0, state_is_tuple=True,
-               activation=None, reuse=None, name=None, dtype=None, **kwargs):
+              " and will be replaced by that in Tensorflow 2.0.")
+  def __init__(self,
+               num_units,
+               use_peepholes=False,
+               cell_clip=None,
+               initializer=None,
+               num_proj=None,
+               proj_clip=None,
+               num_unit_shards=None,
+               num_proj_shards=None,
+               forget_bias=1.0,
+               state_is_tuple=True,
+               activation=None,
+               reuse=None,
+               name=None,
+               dtype=None,
+               **kwargs):
    """Initialize the parameters for an LSTM cell.

    Args:
@ -848,48 +859,48 @@ class LSTMCell(LayerRNNCell):
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
-      num_unit_shards: Deprecated, will be removed by Jan. 2017.
-        Use a variable_scope partitioner instead.
-      num_proj_shards: Deprecated, will be removed by Jan. 2017.
-        Use a variable_scope partitioner instead.
-      forget_bias: Biases of the forget gate are initialized by default to 1
-        in order to reduce the scale of forgetting at the beginning of
-        the training. Must set it manually to `0.0` when restoring from
-        CudnnLSTM trained checkpoints.
-      state_is_tuple: If True, accepted and returned states are 2-tuples of
-        the `c_state` and `m_state`.  If False, they are concatenated
-        along the column axis.  This latter behavior will soon be deprecated.
+      num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a
+        variable_scope partitioner instead.
+      forget_bias: Biases of the forget gate are initialized by default to 1 in
+        order to reduce the scale of forgetting at the beginning of the
+        training. Must set it manually to `0.0` when restoring from CudnnLSTM
+        trained checkpoints.
+      state_is_tuple: If True, accepted and returned states are 2-tuples of the
+        `c_state` and `m_state`.  If False, they are concatenated along the
+        column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`. It
        could also be string that is within Keras activation function names.
-      reuse: (optional) Python boolean describing whether to reuse variables
-        in an existing scope.  If not `True`, and the existing scope already has
+      reuse: (optional) Python boolean describing whether to reuse variables in
+        an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
-      name: String, the name of the layer. Layers with the same name will
-        share weights, but to avoid mistakes we require reuse=True in such
-        cases.
-      dtype: Default dtype of the layer (default of `None` means use the type
-        of the first input). Required when `build` is called before `call`.
+      name: String, the name of the layer. Layers with the same name will share
+        weights, but to avoid mistakes we require reuse=True in such cases.
+      dtype: Default dtype of the layer (default of `None` means use the type of
+        the first input). Required when `build` is called before `call`.
      **kwargs: Dict, keyword named properties for common layer attributes, like
        `trainable` etc when constructing the cell from configs of get_config().
-
-      When restoring from CudnnLSTM-trained checkpoints, use
-      `CudnnCompatibleLSTMCell` instead.
+        When restoring from CudnnLSTM-trained checkpoints, use
+        `CudnnCompatibleLSTMCell` instead.
    """
    super(LSTMCell, self).__init__(
        _reuse=reuse, name=name, dtype=dtype, **kwargs)
    _check_supported_dtypes(self.dtype)
    if not state_is_tuple:
-      logging.warn("%s: Using a concatenated state is slower and will soon be "
-                   "deprecated.  Use state_is_tuple=True.", self)
+      logging.warn(
+          "%s: Using a concatenated state is slower and will soon be "
+          "deprecated.  Use state_is_tuple=True.", self)
    if num_unit_shards is not None or num_proj_shards is not None:
      logging.warn(
          "%s: The num_unit_shards and proj_unit_shards parameters are "
          "deprecated and will be removed in Jan 2017.  "
          "Use a variable scope with a partitioner instead.", self)
    if context.executing_eagerly() and context.num_gpus() > 0:
-      logging.warn("%s: Note that this cell is not optimized for performance. "
-                   "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
-                   "performance on GPU.", self)
+      logging.warn(
+          "%s: Note that this cell is not optimized for performance. "
+          "Please use tf.contrib.cudnn_rnn.CudnnLSTM for better "
+          "performance on GPU.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = input_spec.InputSpec(ndim=2)
@ -911,13 +922,13 @@ class LSTMCell(LayerRNNCell):

    if num_proj:
      self._state_size = (
-          LSTMStateTuple(num_units, num_proj)
-          if state_is_tuple else num_units + num_proj)
+          LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units +
+          num_proj)
      self._output_size = num_proj
    else:
      self._state_size = (
-          LSTMStateTuple(num_units, num_units)
-          if state_is_tuple else 2 * num_units)
+          LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 *
+          num_units)
      self._output_size = num_units

  @property
@ -931,15 +942,14 @@ class LSTMCell(LayerRNNCell):
  @tf_utils.shape_type_conversion
  def build(self, inputs_shape):
    if inputs_shape[-1] is None:
-      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
-                       % str(inputs_shape))
+      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s" %
+                       str(inputs_shape))
    _check_supported_dtypes(self.dtype)
    input_depth = inputs_shape[-1]
    h_depth = self._num_units if self._num_proj is None else self._num_proj
    maybe_partitioner = (
        partitioned_variables.fixed_size_partitioner(self._num_unit_shards)
-        if self._num_unit_shards is not None
-        else None)
+        if self._num_unit_shards is not None else None)
    self._kernel = self.add_variable(
        _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + h_depth, 4 * self._num_units],
@ -954,18 +964,17 @@ class LSTMCell(LayerRNNCell):
        shape=[4 * self._num_units],
        initializer=initializer)
    if self._use_peepholes:
-      self._w_f_diag = self.add_variable("w_f_diag", shape=[self._num_units],
-                                         initializer=self._initializer)
-      self._w_i_diag = self.add_variable("w_i_diag", shape=[self._num_units],
-                                         initializer=self._initializer)
-      self._w_o_diag = self.add_variable("w_o_diag", shape=[self._num_units],
-                                         initializer=self._initializer)
+      self._w_f_diag = self.add_variable(
+          "w_f_diag", shape=[self._num_units], initializer=self._initializer)
+      self._w_i_diag = self.add_variable(
+          "w_i_diag", shape=[self._num_units], initializer=self._initializer)
+      self._w_o_diag = self.add_variable(
+          "w_o_diag", shape=[self._num_units], initializer=self._initializer)

    if self._num_proj is not None:
      maybe_proj_partitioner = (
          partitioned_variables.fixed_size_partitioner(self._num_proj_shards)
-          if self._num_proj_shards is not None
-          else None)
+          if self._num_proj_shards is not None else None)
      self._proj_kernel = self.add_variable(
          "projection/%s" % _WEIGHTS_VARIABLE_NAME,
          shape=[self._num_units, self._num_proj],
@ -979,10 +988,9 @@ class LSTMCell(LayerRNNCell):

    Args:
      inputs: input Tensor, must be 2-D, `[batch, input_size]`.
-      state: if `state_is_tuple` is False, this must be a state Tensor,
-        `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
-        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
-        `m_state`.
+      state: if `state_is_tuple` is False, this must be a state Tensor, `2-D,
+        [batch, state_size]`.  If `state_is_tuple` is True, this must be a tuple
+        of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`.

    Returns:
      A tuple containing:
@ -1023,11 +1031,13 @@ class LSTMCell(LayerRNNCell):
        value=lstm_matrix, num_or_size_splits=4, axis=1)
    # Diagonal connections
    if self._use_peepholes:
-      c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
-           sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
+      c = (
+          sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev +
+          sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
    else:
-      c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
-           self._activation(j))
+      c = (
+          sigmoid(f + self._forget_bias) * c_prev +
+          sigmoid(i) * self._activation(j))

    if self._cell_clip is not None:
      # pylint: disable=invalid-unary-operand-type
@ -1046,8 +1056,9 @@ class LSTMCell(LayerRNNCell):
        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
        # pylint: enable=invalid-unary-operand-type

-    new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
-                 array_ops.concat([c, m], 1))
+    new_state = (
+        LSTMStateTuple(c, m)
+        if self._state_is_tuple else array_ops.concat([c, m], 1))
    return m, new_state

  def get_config(self):
@ -1071,12 +1082,14 @@ class LSTMCell(LayerRNNCell):

 def _enumerated_map_structure_up_to(shallow_structure, map_fn, *args, **kwargs):
  ix = [0]
+
  def enumerated_fn(*inner_args, **inner_kwargs):
    r = map_fn(ix[0], *inner_args, **inner_kwargs)
    ix[0] += 1
    return r
-  return nest.map_structure_up_to(shallow_structure,
-                                  enumerated_fn, *args, **kwargs)
+
+  return nest.map_structure_up_to(shallow_structure, enumerated_fn, *args,
+                                  **kwargs)


 def _default_dropout_state_filter_visitor(substate):
@ -1190,9 +1203,15 @@ class _RNNCellWrapperV2(keras_layer.AbstractRNNCell):
 class DropoutWrapperBase(object):
  """Operator adding dropout to inputs and outputs of the given cell."""

-  def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
-               state_keep_prob=1.0, variational_recurrent=False,
-               input_size=None, dtype=None, seed=None,
+  def __init__(self,
+               cell,
+               input_keep_prob=1.0,
+               output_keep_prob=1.0,
+               state_keep_prob=1.0,
+               variational_recurrent=False,
+               input_size=None,
+               dtype=None,
+               seed=None,
               dropout_state_filter_visitor=None):
    """Create a cell with added input, state, and/or output dropout.

@ -1217,40 +1236,34 @@ class DropoutWrapperBase(object):
        probability; if it is constant and 1, no output dropout will be added.
      state_keep_prob: unit Tensor or float between 0 and 1, output keep
        probability; if it is constant and 1, no output dropout will be added.
-        State dropout is performed on the outgoing states of the cell.
-        **Note** the state components to which dropout is applied when
-        `state_keep_prob` is in `(0, 1)` are also determined by
-        the argument `dropout_state_filter_visitor` (e.g. by default dropout
-        is never applied to the `c` component of an `LSTMStateTuple`).
-      variational_recurrent: Python bool.  If `True`, then the same
-        dropout pattern is applied across all time steps per run call.
-        If this parameter is set, `input_size` **must** be provided.
+        State dropout is performed on the outgoing states of the cell. **Note**
+        the state components to which dropout is applied when `state_keep_prob`
+        is in `(0, 1)` are also determined by the argument
+        `dropout_state_filter_visitor` (e.g. by default dropout is never applied
+        to the `c` component of an `LSTMStateTuple`).
+      variational_recurrent: Python bool.  If `True`, then the same dropout
+        pattern is applied across all time steps per run call. If this parameter
+        is set, `input_size` **must** be provided.
      input_size: (optional) (possibly nested tuple of) `TensorShape` objects
        containing the depth(s) of the input tensors expected to be passed in to
-        the `DropoutWrapper`.  Required and used **iff**
-         `variational_recurrent = True` and `input_keep_prob < 1`.
+        the `DropoutWrapper`.  Required and used **iff** `variational_recurrent
+        = True` and `input_keep_prob < 1`.
      dtype: (optional) The `dtype` of the input, state, and output tensors.
        Required and used **iff** `variational_recurrent = True`.
      seed: (optional) integer, the randomness seed.
      dropout_state_filter_visitor: (optional), default: (see below).  Function
-        that takes any hierarchical level of the state and returns
-        a scalar or depth=1 structure of Python booleans describing
-        which terms in the state should be dropped out.  In addition, if the
-        function returns `True`, dropout is applied across this sublevel.  If
-        the function returns `False`, dropout is not applied across this entire
-        sublevel.
+        that takes any hierarchical level of the state and returns a scalar or
+        depth=1 structure of Python booleans describing which terms in the state
+        should be dropped out.  In addition, if the function returns `True`,
+        dropout is applied across this sublevel.  If the function returns
+        `False`, dropout is not applied across this entire sublevel.
        Default behavior: perform dropout on all terms except the memory (`c`)
-        state of `LSTMCellState` objects, and don't try to apply dropout to
-        `TensorArray` objects:
-        ```
+          state of `LSTMCellState` objects, and don't try to apply dropout to
+        `TensorArray` objects: ```
        def dropout_state_filter_visitor(s):
-          if isinstance(s, LSTMCellState):
-            # Never perform dropout on the c state.
-            return LSTMCellState(c=False, h=True)
-          elif isinstance(s, TensorArray):
-            return False
-          return True
-        ```
+          if isinstance(s, LSTMCellState): # Never perform dropout on the c
+            state. return LSTMCellState(c=False, h=True)
+          elif isinstance(s, TensorArray): return False return True ```

    Raises:
      TypeError: if `cell` is not an `RNNCell`, or `keep_state_fn` is provided
@ -1260,24 +1273,26 @@ class DropoutWrapperBase(object):
    super(DropoutWrapperBase, self).__init__(cell)
    assert_like_rnncell("cell", cell)

-    if (dropout_state_filter_visitor is not None
-        and not callable(dropout_state_filter_visitor)):
+    if (dropout_state_filter_visitor is not None and
+        not callable(dropout_state_filter_visitor)):
      raise TypeError("dropout_state_filter_visitor must be callable")
    self._dropout_state_filter = (
        dropout_state_filter_visitor or _default_dropout_state_filter_visitor)
    with ops.name_scope("DropoutWrapperInit"):
+
      def tensor_and_const_value(v):
        tensor_value = ops.convert_to_tensor(v)
        const_value = tensor_util.constant_value(tensor_value)
        return (tensor_value, const_value)
+
      for prob, attr in [(input_keep_prob, "input_keep_prob"),
                         (state_keep_prob, "state_keep_prob"),
                         (output_keep_prob, "output_keep_prob")]:
        tensor_prob, const_prob = tensor_and_const_value(prob)
        if const_prob is not None:
          if const_prob < 0 or const_prob > 1:
-            raise ValueError("Parameter %s must be between 0 and 1: %d"
-                             % (attr, const_prob))
+            raise ValueError("Parameter %s must be between 0 and 1: %d" %
+                             (attr, const_prob))
          setattr(self, "_%s" % attr, float(const_prob))
        else:
          setattr(self, "_%s" % attr, tensor_prob)
@ -1299,8 +1314,7 @@ class DropoutWrapperBase(object):
        # Prepend a 1 for the batch dimension; for recurrent
        # variational dropout we use the same dropout mask for all
        # batch elements.
-        return array_ops.concat(
-            ([1], tensor_shape.TensorShape(s).as_list()), 0)
+        return array_ops.concat(([1], tensor_shape.TensorShape(s).as_list()), 0)

      def batch_noise(s, inner_seed):
        shape = convert_to_batch_shape(s)
@ -1360,7 +1374,11 @@ class DropoutWrapperBase(object):
    ret.set_shape(value.get_shape())
    return ret

-  def _dropout(self, values, salt_prefix, recurrent_noise, keep_prob,
+  def _dropout(self,
+               values,
+               salt_prefix,
+               recurrent_noise,
+               keep_prob,
               shallow_filtered_substructure=None):
    """Decides whether to perform standard dropout or recurrent dropout."""

@ -1370,21 +1388,25 @@ class DropoutWrapperBase(object):
      shallow_filtered_substructure = values

    if not self._variational_recurrent:
+
      def dropout(i, do_dropout, v):
        if not isinstance(do_dropout, bool) or do_dropout:
          return nn_ops.dropout_v2(
              v, rate=1. - keep_prob, seed=self._gen_seed(salt_prefix, i))
        else:
          return v
+
      return _enumerated_map_structure_up_to(
          shallow_filtered_substructure, dropout,
          *[shallow_filtered_substructure, values])
    else:
+
      def dropout(i, do_dropout, v, n):
        if not isinstance(do_dropout, bool) or do_dropout:
          return self._variational_recurrent_dropout_value(i, v, n, keep_prob)
        else:
          return v
+
      return _enumerated_map_structure_up_to(
          shallow_filtered_substructure, dropout,
          *[shallow_filtered_substructure, values, recurrent_noise])
@ -1405,12 +1427,12 @@ class DropoutWrapperBase(object):
      - Output: A tensor with cell's output.
      - New state: A tensor or tuple of tensors with new wrapped cell's state.
    """
+
    def _should_dropout(p):
      return (not isinstance(p, float)) or p < 1

    if _should_dropout(self._input_keep_prob):
-      inputs = self._dropout(inputs, "input",
-                             self._recurrent_input_noise,
+      inputs = self._dropout(inputs, "input", self._recurrent_input_noise,
                             self._input_keep_prob)
    output, new_state = cell_call_fn(inputs, state, **kwargs)
    if _should_dropout(self._state_keep_prob):
@ -1418,13 +1440,11 @@ class DropoutWrapperBase(object):
      # which ones to keep.
      shallow_filtered_substructure = nest.get_traverse_shallow_structure(
          self._dropout_state_filter, new_state)
-      new_state = self._dropout(new_state, "state",
-                                self._recurrent_state_noise,
+      new_state = self._dropout(new_state, "state", self._recurrent_state_noise,
                                self._state_keep_prob,
                                shallow_filtered_substructure)
    if _should_dropout(self._output_keep_prob):
-      output = self._dropout(output, "output",
-                             self._recurrent_output_noise,
+      output = self._dropout(output, "output", self._recurrent_output_noise,
                             self._output_keep_prob)
    return output, new_state

@ -1460,7 +1480,7 @@ class ResidualWrapperBase(object):
      residual_fn: (Optional) The function to map raw cell inputs and raw cell
        outputs to the actual cell outputs of the residual network.
        Defaults to calling nest.map_structure on (lambda i, o: i + o), inputs
-        and outputs.
+          and outputs.
    """
    super(ResidualWrapperBase, self).__init__(cell)
    self._residual_fn = residual_fn
@ -1495,13 +1515,16 @@ class ResidualWrapperBase(object):
      ValueError: If cell inputs and outputs have different structure (value).
    """
    outputs, new_state = cell_call_fn(inputs, state, **kwargs)
+
    # Ensure shapes match
    def assert_shape_match(inp, out):
      inp.get_shape().assert_is_compatible_with(out.get_shape())
+
    def default_residual_fn(inputs, outputs):
      nest.assert_same_structure(inputs, outputs)
      nest.map_structure(assert_shape_match, inputs, outputs)
      return nest.map_structure(lambda inp, out: inp + out, inputs, outputs)
+
    res_outputs = (self._residual_fn or default_residual_fn)(inputs, outputs)
    return (res_outputs, new_state)

@ -1593,17 +1616,16 @@ class MultiRNNCell(RNNCell):
  """

  @deprecated(None, "This class is equivalent as "
-                    "tf.keras.layers.StackedRNNCells, and will be replaced by "
-                    "that in Tensorflow 2.0.")
+              "tf.keras.layers.StackedRNNCells, and will be replaced by "
+              "that in Tensorflow 2.0.")
  def __init__(self, cells, state_is_tuple=True):
    """Create a RNN cell composed sequentially of a number of RNNCells.

    Args:
      cells: list of RNNCells that will be composed in this order.
      state_is_tuple: If True, accepted and returned states are n-tuples, where
-        `n = len(cells)`.  If False, the states are all
-        concatenated along the column axis.  This latter behavior will soon be
-        deprecated.
+        `n = len(cells)`.  If False, the states are all concatenated along the
+        column axis.  This latter behavior will soon be deprecated.

    Raises:
      ValueError: if cells is empty (not allowed), or at least one of the cells
@ -1613,13 +1635,12 @@ class MultiRNNCell(RNNCell):
    if not cells:
      raise ValueError("Must specify at least one cell for MultiRNNCell.")
    if not nest.is_sequence(cells):
-      raise TypeError(
-          "cells must be a list or tuple, but saw: %s." % cells)
+      raise TypeError("cells must be a list or tuple, but saw: %s." % cells)

    if len(set([id(cell) for cell in cells])) < len(cells):
-      logging.log_first_n(logging.WARN,
-                          "At least two cells provided to MultiRNNCell "
-                          "are the same object and will share weights.", 1)
+      logging.log_first_n(
+          logging.WARN, "At least two cells provided to MultiRNNCell "
+          "are the same object and will share weights.", 1)

    self._cells = cells
    for cell_number, cell in enumerate(self._cells):
@ -1632,8 +1653,8 @@ class MultiRNNCell(RNNCell):
    if not state_is_tuple:
      if any(nest.is_sequence(c.state_size) for c in self._cells):
        raise ValueError("Some cells return tuples of states, but the flag "
-                         "state_is_tuple is not set.  State sizes are: %s"
-                         % str([c.state_size for c in self._cells]))
+                         "state_is_tuple is not set.  State sizes are: %s" %
+                         str([c.state_size for c in self._cells]))

  @property
  def state_size(self):
@ -1699,8 +1720,9 @@ class MultiRNNCell(RNNCell):
        cur_inp, new_state = cell(cur_inp, cur_state)
        new_states.append(new_state)

-    new_states = (tuple(new_states) if self._state_is_tuple else
-                  array_ops.concat(new_states, 1))
+    new_states = (
+        tuple(new_states) if self._state_is_tuple else array_ops.concat(
+            new_states, 1))

    return cur_inp, new_states

--- a/tensorflow/python/ops/script_ops.py
+++ b/tensorflow/python/ops/script_ops.py
@ -56,7 +56,7 @@ class EagerFunc(object):
    Args:
      func: The function to wrap.
      Tout: A list of datatypes for the output; an empty list if the output is
-            None.
+        None.
      is_grad_func: Whether this EagerFunc is the gradient of another
        EagerPyFunc.
    """
@ -229,6 +229,7 @@ class FuncRegistry(object):
      self._unique_id += 1
    return "pyfunc_%d" % uid

+
 # Global registry for py functions.
 _py_funcs = FuncRegistry()

@ -326,15 +327,15 @@ def eager_py_func(func, inp, Tout, name=None):
    if tf.abs(x) <= m:
      return x**2
    else:
-      return m**2 * (1 - 2 * tf.log(m) + tf.log(x**2))
+      return m**2 * (1 - 2 * tf.math.log(m) + tf.math.log(x**2))

-  x = tf.placeholder(tf.float32)
-  m = tf.placeholder(tf.float32)
+  x = tf.compat.v1.placeholder(tf.float32)
+  m = tf.compat.v1.placeholder(tf.float32)

  y = tf.py_function(func=log_huber, inp=[x, m], Tout=tf.float32)
  dy_dx = tf.gradients(y, x)[0]

-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
    # The session executes `log_huber` eagerly. Given the feed values below,
    # it will take the first branch, so `y` evaluates to 1.0 and
    # `dy_dx` evaluates to 2.0.
@ -350,15 +351,16 @@ def eager_py_func(func, inp, Tout, name=None):
  For more information on eager execution, see the
  [Eager guide](https://tensorflow.org/guide/eager).

-  `tf.py_function` is similar in spirit to `tf.py_func`, but unlike
+  `tf.py_function` is similar in spirit to `tf.compat.v1.py_func`, but unlike
  the latter, the former lets you use TensorFlow operations in the wrapped
-  Python function. In particular, while `tf.py_func` only runs on CPUs and
+  Python function. In particular, while `tf.compat.v1.py_func` only runs on CPUs
+  and
  wraps functions that take NumPy arrays as inputs and return NumPy arrays as
  outputs, `tf.py_function` can be placed on GPUs and wraps functions
  that take Tensors as inputs, execute TensorFlow operations in their bodies,
  and return Tensors as outputs.

-  Like `tf.py_func`, `tf.py_function` has the following limitations
+  Like `tf.compat.v1.py_func`, `tf.py_function` has the following limitations
  with respect to serialization and distribution:

  * The body of the function (i.e. `func`) will not be serialized in a
@ -367,17 +369,16 @@ def eager_py_func(func, inp, Tout, name=None):

  * The operation must run in the same address space as the Python program
    that calls `tf.py_function()`. If you are using distributed
-    TensorFlow, you must run a `tf.train.Server` in the same process as the
+    TensorFlow, you must run a `tf.distribute.Server` in the same process as the
    program that calls `tf.py_function()` and you must pin the created
    operation to a device in that server (e.g. using `with tf.device():`).


  Args:
-    func: A Python function which accepts a list of `Tensor` objects
-      having element types that match the corresponding `tf.Tensor` objects
-      in `inp` and returns a list of `Tensor` objects (or a single
-      `Tensor`, or `None`) having element types that match the
-      corresponding values in `Tout`.
+    func: A Python function which accepts a list of `Tensor` objects having
+      element types that match the corresponding `tf.Tensor` objects in `inp`
+      and returns a list of `Tensor` objects (or a single `Tensor`, or `None`)
+      having element types that match the corresponding values in `Tout`.
    inp: A list of `Tensor` objects.
    Tout: A list or tuple of tensorflow data types or a single tensorflow data
      type if there is only one, indicating what `func` returns; an empty list
@ -404,43 +405,44 @@ def py_func_common(func, inp, Tout, stateful=True, name=None):
  def my_func(x):
    # x will be a numpy array with the contents of the placeholder below
    return np.sinh(x)
-  input = tf.placeholder(tf.float32)
-  y = tf.py_func(my_func, [input], tf.float32)
+  input = tf.compat.v1.placeholder(tf.float32)
+  y = tf.compat.v1.py_func(my_func, [input], tf.float32)
  ```

-  **N.B.** The `tf.py_func()` operation has the following known limitations:
+  **N.B.** The `tf.compat.v1.py_func()` operation has the following known
+  limitations:

  * The body of the function (i.e. `func`) will not be serialized in a
    `GraphDef`. Therefore, you should not use this function if you need to
    serialize your model and restore it in a different environment.

  * The operation must run in the same address space as the Python program
-    that calls `tf.py_func()`. If you are using distributed TensorFlow, you
-    must run a `tf.train.Server` in the same process as the program that calls
-    `tf.py_func()` and you must pin the created operation to a device in that
+    that calls `tf.compat.v1.py_func()`. If you are using distributed
+    TensorFlow, you
+    must run a `tf.distribute.Server` in the same process as the program that
+    calls
+    `tf.compat.v1.py_func()` and you must pin the created operation to a device
+    in that
    server (e.g. using `with tf.device():`).

  Args:
    func: A Python function, which accepts `ndarray` objects as arguments and
      returns a list of `ndarray` objects (or a single `ndarray`). This function
      must accept as many arguments as there are tensors in `inp`, and these
-      argument types will match the corresponding `tf.Tensor` objects
-      in `inp`. The returns `ndarray`s must match the number and types defined
-      `Tout`.
+      argument types will match the corresponding `tf.Tensor` objects in `inp`.
+      The returns `ndarray`s must match the number and types defined `Tout`.
      Important Note: Input and output numpy `ndarray`s of `func` are not
-      guaranteed to be copies. In some cases their underlying memory will be
-      shared with the corresponding TensorFlow tensors.
-      In-place modification or storing `func` input or return values in
-      python datastructures without explicit (np.)copy
-      can have non-deterministic consequences.
+        guaranteed to be copies. In some cases their underlying memory will be
+        shared with the corresponding TensorFlow tensors. In-place modification
+        or storing `func` input or return values in python datastructures
+        without explicit (np.)copy can have non-deterministic consequences.
    inp: A list of `Tensor` objects.
    Tout: A list or tuple of tensorflow data types or a single tensorflow data
      type if there is only one, indicating what `func` returns.
-    stateful: (Boolean.) If True, the function should be considered stateful.
-      If a function is stateless, when given the same input it will return the
-      same output and have no observable side effects. Optimizations such as
-      common subexpression elimination are only performed on stateless
-      operations.
+    stateful: (Boolean.) If True, the function should be considered stateful. If
+      a function is stateless, when given the same input it will return the same
+      output and have no observable side effects. Optimizations such as common
+      subexpression elimination are only performed on stateless operations.
    name: A name for the operation (optional).

  Returns:
@ -489,6 +491,5 @@ def numpy_function(func, inp, Tout, name=None):
 numpy_function.__doc__ = py_func_common.__doc__.replace("py_func",
                                                        "numpy_function")

-
 ops.NotDifferentiable("PyFunc")
 ops.NotDifferentiable("PyFuncStateless")
--- a/tensorflow/python/ops/session_ops.py
+++ b/tensorflow/python/ops/session_ops.py
@ -162,10 +162,10 @@ def get_session_handle(data, name=None):

  ```python
  c = tf.multiply(a, b)
-  h = tf.get_session_handle(c)
+  h = tf.compat.v1.get_session_handle(c)
  h = sess.run(h)

-  p, a = tf.get_session_tensor(h.handle, tf.float32)
+  p, a = tf.compat.v1.get_session_tensor(h.handle, tf.float32)
  b = tf.multiply(a, 10)
  c = sess.run(b, feed_dict={p: h.handle})
  ```
@ -203,10 +203,10 @@ def get_session_tensor(handle, dtype, name=None):

  ```python
  c = tf.multiply(a, b)
-  h = tf.get_session_handle(c)
+  h = tf.compat.v1.get_session_handle(c)
  h = sess.run(h)

-  p, a = tf.get_session_tensor(h.handle, tf.float32)
+  p, a = tf.compat.v1.get_session_tensor(h.handle, tf.float32)
  b = tf.multiply(a, 10)
  c = sess.run(b, feed_dict={p: h.handle})
  ```
--- a/tensorflow/python/ops/sets_impl.py
+++ b/tensorflow/python/ops/sets_impl.py
@ -169,8 +169,8 @@ def set_intersection(a, b, validate_indices=True):
    ])
    b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])

-    # `tf.sets.set_intersection` is applied to each aligned pair of sets.
-    tf.sets.set_intersection(a, b)
+    # `tf.sets.intersection` is applied to each aligned pair of sets.
+    tf.sets.intersection(a, b)

    # The result will be equivalent to either of:
    #
@ -202,7 +202,7 @@ def set_intersection(a, b, validate_indices=True):


@tf_export(
-	   "sets.difference", v1=["sets.difference", "sets.set_difference"])
+    "sets.difference", v1=["sets.difference", "sets.set_difference"])
 def set_difference(a, b, aminusb=True, validate_indices=True):
  """Compute set difference of elements in last dimension of `a` and `b`.

@ -241,7 +241,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True):
    b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])

    # `set_difference` is applied to each aligned pair of sets.
-    tf.sets.set_difference(a, b)
+    tf.sets.difference(a, b)

    # The result will be equivalent to either of:
    #
@ -274,7 +274,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True):


@tf_export(
-	   "sets.union", v1=["sets.union", "sets.set_union"])
+    "sets.union", v1=["sets.union", "sets.set_union"])
 def set_union(a, b, validate_indices=True):
  """Compute set union of elements in last dimension of `a` and `b`.

@ -312,7 +312,7 @@ def set_union(a, b, validate_indices=True):
    b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])

    # `set_union` is applied to each aligned pair of sets.
-    tf.sets.set_union(a, b)
+    tf.sets.union(a, b)

    # The result will be a equivalent to either of:
    #
--- a/tensorflow/python/ops/signal/dct_ops.py
+++ b/tensorflow/python/ops/signal/dct_ops.py
@ -54,11 +54,11 @@ def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disabl
  """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`.

  Currently only Types I, II and III are supported.
-  Type I is implemented using a length `2N` padded `tf.spectral.rfft`.
-  Type II is implemented using a length `2N` padded `tf.spectral.rfft`, as
+  Type I is implemented using a length `2N` padded `tf.signal.rfft`.
+  Type II is implemented using a length `2N` padded `tf.signal.rfft`, as
  described here: [Type 2 DCT using 2N FFT padded (Makhoul)](https://dsp.stackexchange.com/a/10606).
  Type III is a fairly straightforward inverse of Type II
-  (i.e. using a length `2N` padded `tf.spectral.irfft`).
+  (i.e. using a length `2N` padded `tf.signal.irfft`).

  @compatibility(scipy)
  Equivalent to [scipy.fftpack.dct](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html)
--- a/tensorflow/python/ops/signal/mfcc_ops.py
+++ b/tensorflow/python/ops/signal/mfcc_ops.py
@ -47,7 +47,7 @@ def mfccs_from_log_mel_spectrograms(log_mel_spectrograms, name=None):
  ```python
  sample_rate = 16000.0
  # A Tensor of [batch_size, num_samples] mono PCM samples in the range [-1, 1].
-  pcm = tf.placeholder(tf.float32, [None, None])
+  pcm = tf.compat.v1.placeholder(tf.float32, [None, None])

  # A 1024-point STFT with frames of 64 ms and 75% overlap.
  stfts = tf.signal.stft(pcm, frame_length=1024, frame_step=256,
@ -66,7 +66,7 @@ def mfccs_from_log_mel_spectrograms(log_mel_spectrograms, name=None):
    linear_to_mel_weight_matrix.shape[-1:]))

  # Compute a stabilized log to get log-magnitude mel-scale spectrograms.
-  log_mel_spectrograms = tf.log(mel_spectrograms + 1e-6)
+  log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)

  # Compute MFCCs from log_mel_spectrograms and take the first 13.
  mfccs = tf.signal.mfccs_from_log_mel_spectrograms(
--- a/tensorflow/python/ops/signal/shape_ops.py
+++ b/tensorflow/python/ops/signal/shape_ops.py
@ -69,7 +69,7 @@ def frame(signal, frame_length, frame_step, pad_end=False, pad_value=0, axis=-1,
  For example:

  ```python
-  pcm = tf.placeholder(tf.float32, [None, 9152])
+  pcm = tf.compat.v1.placeholder(tf.float32, [None, 9152])
  frames = tf.signal.frame(pcm, 512, 180)
  magspec = tf.abs(tf.signal.rfft(frames, [512]))
  image = tf.expand_dims(magspec, 3)
--- a/tensorflow/python/ops/signal/spectral_ops.py
+++ b/tensorflow/python/ops/signal/spectral_ops.py
@ -171,7 +171,7 @@ def inverse_stft(stfts,
  ```python
  frame_length = 400
  frame_step = 160
-  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
+  waveform = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(waveform, frame_length, frame_step)
  inverse_stft = tf.signal.inverse_stft(
      stft, frame_length, frame_step,
@ -185,7 +185,7 @@ def inverse_stft(stfts,
  frame_length = 400
  frame_step = 160
  window_fn = functools.partial(window_ops.hamming_window, periodic=True),
-  waveform = tf.placeholder(dtype=tf.float32, shape=[1000])
+  waveform = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1000])
  stft = tf.signal.stft(
      waveform, frame_length, frame_step, window_fn=window_fn)
  inverse_stft = tf.signal.inverse_stft(
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@ -2384,7 +2384,7 @@ def sparse_softmax(sp_input, name=None):
  values = np.asarray([[[0., np.e], [1., 0.]], [[np.e, 0.], [np.e, np.e]]])
  indices = np.vstack(np.where(values)).astype(np.int64).T

-  result = tf.sparse_softmax(tf.SparseTensor(indices, values, shape))
+  result = tf.sparse.softmax(tf.SparseTensor(indices, values, shape))
  # ...returning a 3-D SparseTensor, equivalent to:
  # [?   1.]     [1    ?]
  # [1.  ? ] and [.5  .5]
@ -2416,7 +2416,7 @@ def sparse_maximum(sp_a, sp_b, name=None):
  ```python
  sp_zero = sparse_tensor.SparseTensor([[0]], [0], [7])
  sp_one = sparse_tensor.SparseTensor([[1]], [1], [7])
-  res = tf.sparse_maximum(sp_zero, sp_one).eval()
+  res = tf.sparse.maximum(sp_zero, sp_one).eval()
  # "res" should be equal to SparseTensor([[0], [1]], [0, 1], [7]).
  ```

@ -2454,7 +2454,7 @@ def sparse_minimum(sp_a, sp_b, name=None):
  ```python
  sp_zero = sparse_tensor.SparseTensor([[0]], [0], [7])
  sp_one = sparse_tensor.SparseTensor([[1]], [1], [7])
-  res = tf.sparse_minimum(sp_zero, sp_one).eval()
+  res = tf.sparse.minimum(sp_zero, sp_one).eval()
  # "res" should be equal to SparseTensor([[0], [1]], [0, 0], [7]).
  ```

--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@ -332,8 +332,8 @@ def scatter_nd_update(ref, indices, updates, use_locking=True, name=None):
      ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
      indices = tf.constant([[4], [3], [1] ,[7]])
      updates = tf.constant([9, 10, 11, 12])
-      update = tf.scatter_nd_update(ref, indices, updates)
-      with tf.Session() as sess:
+      update = tf.compat.v1.scatter_nd_update(ref, indices, updates)
+      with tf.compat.v1.Session() as sess:
        print sess.run(update)
  ```

@ -446,8 +446,8 @@ def scatter_nd_add(ref, indices, updates, use_locking=False, name=None):
  ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
  indices = tf.constant([[4], [3], [1], [7]])
  updates = tf.constant([9, 10, 11, 12])
-  add = tf.scatter_nd_add(ref, indices, updates)
-  with tf.Session() as sess:
+  add = tf.compat.v1.scatter_nd_add(ref, indices, updates)
+  with tf.compat.v1.Session() as sess:
    print sess.run(add)
  ```

@ -563,8 +563,8 @@ def scatter_nd_sub(ref, indices, updates, use_locking=False, name=None):
  ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
  indices = tf.constant([[4], [3], [1] ,[7]])
  updates = tf.constant([9, 10, 11, 12])
-  op = tf.scatter_nd_sub(ref, indices, updates)
-  with tf.Session() as sess:
+  op = tf.compat.v1.scatter_nd_sub(ref, indices, updates)
+  with tf.compat.v1.Session() as sess:
    print sess.run(op)
  ```

@ -819,7 +819,7 @@ def scatter_min(ref, indices, updates, use_locking=False, name=None):
@deprecation.deprecated(
    "2018-11-29", "Use the batch_scatter_update method of Variable instead.")
 def batch_scatter_update(ref, indices, updates, use_locking=True, name=None):
-  """Generalization of `tf.scatter_update` to axis different than 0.
+  """Generalization of `tf.compat.v1.scatter_update` to axis different than 0.

  Analogous to `batch_gather`. This assumes that `ref`, `indices` and `updates`
  have a series of leading dimensions that are the same for all of them, and the
@ -841,18 +841,19 @@ def batch_scatter_update(ref, indices, updates, use_locking=True, name=None):
  `var[i_1, ..., i_n, indices[i_1, ..., i_n, j]] = updates[i_1, ..., i_n, j]`

  When indices is a 1D tensor, this operation is equivalent to
-  `tf.scatter_update`.
+  `tf.compat.v1.scatter_update`.

  To avoid this operation there would be 2 alternatives:
  1) Reshaping the variable by merging the first `ndims` dimensions. However,
     this is not possible because `tf.reshape` returns a Tensor, which we
-     cannot use `tf.scatter_update` on.
+     cannot use `tf.compat.v1.scatter_update` on.
  2) Looping over the first `ndims` of the variable and using
-     `tf.scatter_update` on the subtensors that result of slicing the first
+     `tf.compat.v1.scatter_update` on the subtensors that result of slicing the
+     first
     dimension. This is a valid option for `ndims = 1`, but less efficient than
     this implementation.

-  See also `tf.scatter_update` and `tf.scatter_nd_update`.
+  See also `tf.compat.v1.scatter_update` and `tf.compat.v1.scatter_nd_update`.

  Args:
    ref: `Variable` to scatter onto.
@ -887,7 +888,7 @@ def batch_scatter_update(ref, indices, updates, use_locking=True, name=None):
    # coordinates we created with the original indices.

    # For example if indices.shape = [2, 3, 4], we should generate the following
-    # indices for tf.scatter_nd_update:
+    # indices for tf.compat.v1.scatter_nd_update:
    # nd_indices[:, :, 0] = [[0, 0, 0], [1, 1, 1]]
    # nd_indices[:, :, 1] = [[0, 1, 2], [0, 1, 2]]
    # nd_indices[:, :, 2] = indices
--- a/tensorflow/python/ops/stateless_random_ops.py
+++ b/tensorflow/python/ops/stateless_random_ops.py
@ -43,7 +43,7 @@ def stateless_random_uniform(shape,
                             name=None):
  """Outputs deterministic pseudorandom values from a uniform distribution.

-  This is a stateless version of `tf.random_uniform`: if run twice with the
+  This is a stateless version of `tf.random.uniform`: if run twice with the
  same seeds, it will produce the same pseudorandom numbers.  The output is
  consistent across multiple runs on the same hardware (and between CPU
  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
@ -110,7 +110,7 @@ def stateless_random_normal(shape,
                            name=None):
  """Outputs deterministic pseudorandom values from a normal distribution.

-  This is a stateless version of `tf.random_normal`: if run twice with the
+  This is a stateless version of `tf.random.normal`: if run twice with the
  same seeds, it will produce the same pseudorandom numbers.  The output is
  consistent across multiple runs on the same hardware (and between CPU
  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
@ -147,7 +147,8 @@ def stateless_truncated_normal(shape,
                               name=None):
  """Outputs deterministic pseudorandom values, truncated normally distributed.

-  This is a stateless version of `tf.truncated_normal`: if run twice with the
+  This is a stateless version of `tf.random.truncated_normal`: if run twice with
+  the
  same seeds, it will produce the same pseudorandom numbers.  The output is
  consistent across multiple runs on the same hardware (and between CPU
  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
@ -190,7 +191,7 @@ def stateless_multinomial(logits,
                          name=None):
  """Draws deterministic pseudorandom samples from a multinomial distribution.

-  This is a stateless version of `tf.multinomial`: if run twice with the
+  This is a stateless version of `tf.random.categorical`: if run twice with the
  same seeds, it will produce the same pseudorandom numbers.  The output is
  consistent across multiple runs on the same hardware (and between CPU
  and GPU), but may change between versions of TensorFlow or on non-CPU/GPU
@ -201,8 +202,8 @@ def stateless_multinomial(logits,
  ```python
  # samples has shape [1, 5], where each value is either 0 or 1 with equal
  # probability.
-  samples = tf.random.stateless_multinomial(
-      tf.log([[10., 10.]]), 5, seed=[7, 17])
+  samples = tf.random.stateless_categorical(
+      tf.math.log([[10., 10.]]), 5, seed=[7, 17])
  ```

  Args:
@ -241,7 +242,7 @@ def stateless_categorical(logits,
  # samples has shape [1, 5], where each value is either 0 or 1 with equal
  # probability.
  samples = tf.random.stateless_categorical(
-      tf.log([[10., 10.]]), 5, seed=[7, 17])
+      tf.math.log([[10., 10.]]), 5, seed=[7, 17])
  ```

  Args:
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@ -123,7 +123,7 @@ def string_format(template, inputs, placeholder="{}", summarize=3, name=None):
  Example:
    Formatting a single-tensor template:
    ```python
-    sess = tf.Session()
+    sess = tf.compat.v1.Session()
    with sess.as_default():
        tensor = tf.range(10)
        formatted = tf.strings.format("tensor: {}, suffix", tensor)
@ -135,7 +135,7 @@ def string_format(template, inputs, placeholder="{}", summarize=3, name=None):

    Formatting a multi-tensor template:
    ```python
-    sess = tf.Session()
+    sess = tf.compat.v1.Session()
    with sess.as_default():
        tensor_one = tf.reshape(tf.range(100), [10, 10])
        tensor_two = tf.range(10)
@ -459,7 +459,7 @@ def string_to_hash_bucket(input, num_buckets, name=None):

  Note that the hash function may change from time to time.
  This functionality will be deprecated and it's recommended to use
-  `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
+  `tf.strings.to_hash_bucket_fast()` or `tf.strings.to_hash_bucket_strong()`.

  Args:
    input: A `Tensor` of type `string`.
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@ -304,11 +304,11 @@ def initialize(
  which can happen before or after this function is called.

  Args:
-    graph: A `tf.Graph` or `tf.GraphDef` to output to the writer.
+    graph: A `tf.Graph` or `tf.compat.v1.GraphDef` to output to the writer.
      This function will not write the default graph by default. When
      writing to an event log file, the associated step will be zero.
    session: So this method can call `tf.Session.run`. This defaults
-      to `tf.get_default_session`.
+      to `tf.compat.v1.get_default_session`.

  Raises:
    RuntimeError: If  the current thread has no default
@ -756,7 +756,7 @@ def scalar(name, tensor, family=None, step=None):
      `int8`, `uint16`, `half`, `uint32`, `uint64`.
    family: Optional, the summary's family.
    step: The `int64` monotonic step variable, which defaults
-      to `tf.train.get_global_step`.
+      to `tf.compat.v1.train.get_global_step`.

  Returns:
    The created `tf.Operation` or a `tf.no_op` if summary writing has
@ -836,14 +836,14 @@ def graph(param, step=None, name=None):
  TensorBoard.

  When not using eager execution mode, the user should consider passing
-  the `graph` parameter to `tf.contrib.summary.initialize` instead of
+  the `graph` parameter to `tf.compat.v1.summary.initialize` instead of
  calling this function. Otherwise special care needs to be taken when
  using the graph to record the graph.

  Args:
    param: A `tf.Tensor` containing a serialized graph proto. When
      eager execution is enabled, this function will automatically
-      coerce `tf.Graph`, `tf.GraphDef`, and string types.
+      coerce `tf.Graph`, `tf.compat.v1.GraphDef`, and string types.
    step: The global step variable. This doesn't have useful semantics
      for graph summaries, but is used anyway, due to the structure of
      event log files. This defaults to the global step.
@ -875,7 +875,7 @@ _graph = graph  # for functions with a graph parameter


 def import_event(tensor, name=None):
-  """Writes a `tf.Event` binary proto.
+  """Writes a `tf.compat.v1.Event` binary proto.

  This can be used to import existing event logs into a new summary writer sink.
  Please note that this is lower level than the other summary functions and
@ -883,7 +883,7 @@ def import_event(tensor, name=None):

  Args:
    tensor: A `tf.Tensor` of type `string` containing a serialized
-      `tf.Event` proto.
+      `tf.compat.v1.Event` proto.
    name: A name for the operation (optional).

  Returns:
--- a/tensorflow/python/ops/template.py
+++ b/tensorflow/python/ops/template.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Provides templates which allow variable sharing."""
 from __future__ import absolute_import
 from __future__ import division
@ -33,13 +32,16 @@ from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export

-
 __all__ = ["make_template"]


@tf_export(v1=["make_template"])
-def make_template(name_, func_, create_scope_now_=False, unique_name_=None,
-                  custom_getter_=None, **kwargs):
+def make_template(name_,
+                  func_,
+                  create_scope_now_=False,
+                  unique_name_=None,
+                  custom_getter_=None,
+                  **kwargs):
  """Given an arbitrary function, wrap it so that it does variable sharing.

  This wraps `func_` in a Template and partially evaluates it. Templates are
@ -48,12 +50,14 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None,
  have the following properties:

  * The function should create all trainable variables and any variables that
-     should be reused by calling `tf.get_variable`. If a trainable variable is
+     should be reused by calling `tf.compat.v1.get_variable`. If a trainable
+     variable is
     created using `tf.Variable`, then a ValueError will be thrown. Variables
     that are intended to be locals can be created by specifying
     `tf.Variable(..., trainable=false)`.
  * The function may use variable scopes and other templates internally to
-      create and reuse variables, but it shouldn't use `tf.global_variables` to
+      create and reuse variables, but it shouldn't use
+      `tf.compat.v1.global_variables` to
      capture variables that are defined outside of the scope of the function.
  * Internal scopes and variable names should not depend on any arguments that
      are not supplied to `make_template`. In general you will get a ValueError
@ -67,12 +71,12 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None,

  ```python
  def my_op(x, scalar_name):
-    var1 = tf.get_variable(scalar_name,
+    var1 = tf.compat.v1.get_variable(scalar_name,
                           shape=[],
-                           initializer=tf.constant_initializer(1))
+                           initializer=tf.compat.v1.constant_initializer(1))
    return x * var1

-  scale_by_y = tf.make_template('scale_by_y', my_op, scalar_name='y')
+  scale_by_y = tf.compat.v1.make_template('scale_by_y', my_op, scalar_name='y')

  z = scale_by_y(input1)
  w = scale_by_y(input2)
@ -91,19 +95,21 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None,

  ```python
  def my_op(x, scalar_name):
-    var1 = tf.get_variable(scalar_name,
+    var1 = tf.compat.v1.get_variable(scalar_name,
                           shape=[],
-                           initializer=tf.constant_initializer(1))
+                           initializer=tf.compat.v1.constant_initializer(1))
    return x * var1

-  with tf.variable_scope('scope') as vs:
-    scale_by_y = tf.make_template('scale_by_y', my_op, scalar_name='y')
+  with tf.compat.v1.variable_scope('scope') as vs:
+    scale_by_y = tf.compat.v1.make_template('scale_by_y', my_op,
+    scalar_name='y')
    z = scale_by_y(input1)
    w = scale_by_y(input2)

  # Creates a template that reuses the variables above.
-  with tf.variable_scope(vs, reuse=True):
-    scale_by_y2 = tf.make_template('scale_by_y', my_op, scalar_name='y')
+  with tf.compat.v1.variable_scope(vs, reuse=True):
+    scale_by_y2 = tf.compat.v1.make_template('scale_by_y', my_op,
+    scalar_name='y')
    z2 = scale_by_y2(input1)
    w2 = scale_by_y2(input2)
  ```
@ -128,8 +134,8 @@ def make_template(name_, func_, create_scope_now_=False, unique_name_=None,
      template of the same scope/unique_name already exists and reuse is false,
      an error is raised. Defaults to None.
    custom_getter_: Optional custom getter for variables used in `func_`. See
-      the `tf.get_variable` `custom_getter` documentation for
-      more information.
+      the `tf.compat.v1.get_variable` `custom_getter` documentation for more
+      information.
    **kwargs: Keyword arguments to apply to `func_`.

  Returns:
@ -176,16 +182,16 @@ def make_template_internal(name_,
      template of the same scope/unique_name already exists and reuse is false,
      an error is raised. Defaults to None. If executing eagerly, must be None.
    custom_getter_: Optional custom getter for variables used in `func_`. See
-      the `tf.get_variable` `custom_getter` documentation for
-      more information.
+      the `tf.compat.v1.get_variable` `custom_getter` documentation for more
+      information.
    create_graph_function_: When True, `func_` will be executed as a graph
      function. This implies that `func_` must satisfy the properties that
      `function.defun` requires of functions: See the documentation of
-      `function.defun` for details. When executing eagerly, setting this flag to
-      True can improve performance. Regardless of whether eager execution is
-      enabled, enabling this flag gives the caller access to graph-function
-      semantics, i.e., accesses to variables are totally ordered and
-      side-effecting ops are not pruned.
+        `function.defun` for details. When executing eagerly, setting this flag
+        to True can improve performance. Regardless of whether eager execution
+        is enabled, enabling this flag gives the caller access to graph-function
+        semantics, i.e., accesses to variables are totally ordered and
+        side-effecting ops are not pruned.
    **kwargs: Keyword arguments to apply to `func_`.

  Returns:
@ -203,8 +209,8 @@ def make_template_internal(name_,
  """

  if kwargs:
-    func_ = tf_decorator.make_decorator(func_, functools.partial(
-        func_, **kwargs))
+    func_ = tf_decorator.make_decorator(func_,
+                                        functools.partial(func_, **kwargs))
  if context.executing_eagerly():
    if unique_name_ is not None:
      raise ValueError(
@ -244,24 +250,29 @@ class Template(trackable.Trackable):
  call.
  """

-  def __init__(self, name, func, create_scope_now=False, unique_name=None,
-               custom_getter=None, create_graph_function=False):
+  def __init__(self,
+               name,
+               func,
+               create_scope_now=False,
+               unique_name=None,
+               custom_getter=None,
+               create_graph_function=False):
    """Creates a template for the given function.

    Args:
-      name: A name for the scope created by this template. The
-        name will be made unique by appending `_N` to the it (see how
-        `tf.variable_scope` treats the `default_name` for details).
+      name: A name for the scope created by this template. The name will be made
+        unique by appending `_N` to the it (see how
+        `tf.compat.v1.variable_scope` treats the `default_name` for details).
      func: The function to apply each time.
      create_scope_now: Whether to create the scope at Template construction
        time, rather than first call. Defaults to false. Creating the scope at
        construction time may be more convenient if the template is to passed
-        through much lower level code, and you want to be sure of the scope
-        name without knowing exactly where it will be first called. If set to
-        True, the scope will be created in the constructor, and all subsequent
-        times in `__call__`, leading to a trailing numeral being added to the
-        names of all created Tensors. If set to False, the scope will be created
-        at the first call location.
+        through much lower level code, and you want to be sure of the scope name
+        without knowing exactly where it will be first called. If set to True,
+        the scope will be created in the constructor, and all subsequent times
+        in `__call__`, leading to a trailing numeral being added to the names of
+        all created Tensors. If set to False, the scope will be created at the
+        first call location.
      unique_name: When used, it overrides `name` and is not made unique. If a
        template of the same scope/unique_name already exists and reuse is
        false, an error is raised. Defaults to None.
@ -330,10 +341,10 @@ class Template(trackable.Trackable):
        # so log it.
        variables = ops.get_collection_ref(ops.GraphKeys.GLOBAL_VARIABLES)
        if vars_at_start != len(variables):
-          logging.info("New variables created when calling a template after "
-                       "the first time, perhaps you used tf.Variable when you "
-                       "meant tf.get_variable: %s",
-                       variables[vars_at_start:])
+          logging.info(
+              "New variables created when calling a template after "
+              "the first time, perhaps you used tf.Variable when you "
+              "meant tf.get_variable: %s", variables[vars_at_start:])
      elif self._first_call:
        self._first_call = False
        try:
@ -356,8 +367,9 @@ class Template(trackable.Trackable):
        arg0 = ""
      else:
        arg0 = args[0]
-      trace = "".join(_skip_common_stack_elements(self._stacktrace,
-                                                  traceback.format_stack()))
+      trace = "".join(
+          _skip_common_stack_elements(self._stacktrace,
+                                      traceback.format_stack()))
      arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace)
      new_args = [arg0]
      new_args.extend(args[1:])
@ -407,8 +419,7 @@ class Template(trackable.Trackable):

  @property
  def variables(self):
-    """Returns the list of global and local variables created by the Template.
-    """
+    """Returns the list of global and local variables created by the Template."""
    return self.global_variables + self.local_variables

  @property
@ -462,17 +473,16 @@ class Template(trackable.Trackable):
    return self.non_trainable_variables

  @property
-  @deprecated(
-      "2017-02-21", "The .var_scope property is deprecated. Please change your "
-      "code to use the .variable_scope property")
+  @deprecated("2017-02-21",
+              "The .var_scope property is deprecated. Please change your "
+              "code to use the .variable_scope property")
  def var_scope(self):
    """Returns the variable scope object created by this Template."""
    return self._variable_scope


 class _EagerTemplateVariableStore(object):
-  """Wrapper around EagerVariableStore to support nesting EagerTemplates.
-  """
+  """Wrapper around EagerVariableStore to support nesting EagerTemplates."""

  def __init__(self, variable_scope_name):
    self._variable_scope_name = variable_scope_name
@ -537,24 +547,28 @@ class EagerTemplate(Template):
  call.
  """

-  def __init__(self, name, func, create_scope_now=False, custom_getter=None,
+  def __init__(self,
+               name,
+               func,
+               create_scope_now=False,
+               custom_getter=None,
               create_graph_function=False):
    """Creates a template for the given function.

    Args:
-      name: A name for the scope created by this template. The
-        name will be made unique by appending `_N` to the it (see how
-        `tf.variable_scope` treats the `default_name` for details).
+      name: A name for the scope created by this template. The name will be made
+        unique by appending `_N` to the it (see how
+        `tf.compat.v1.variable_scope` treats the `default_name` for details).
      func: The function to apply each time.
      create_scope_now: Whether to create the scope at Template construction
        time, rather than first call. Defaults to false. Creating the scope at
        construction time may be more convenient if the template is passed
-        through much lower level code, and you want to be sure of the scope
-        name without knowing exactly where it will be first called. If set to
-        True, the scope will be created in the constructor, and all subsequent
-        times in `__call__`, leading to a trailing numeral being added to the
-        names of all created Tensors. If set to False, the scope will be created
-        at the first call location.
+        through much lower level code, and you want to be sure of the scope name
+        without knowing exactly where it will be first called. If set to True,
+        the scope will be created in the constructor, and all subsequent times
+        in `__call__`, leading to a trailing numeral being added to the names of
+        all created Tensors. If set to False, the scope will be created at the
+        first call location.
      custom_getter: optional custom getter to pass to `variable_scope()`
      create_graph_function: When True, `func` will be executed as a graph
        function. Enabling this flag allows the caller to reap the performance
@ -568,8 +582,7 @@ class EagerTemplate(Template):
    if not context.executing_eagerly():
      raise RuntimeError(
          "{} objects can only be used when eager execution is enabled, use "
-          "tf.Template for graph construction".
-          format(type(self)))
+          "tf.Template for graph construction".format(type(self)))
    super(EagerTemplate, self).__init__(name, func, create_scope_now, None,
                                        custom_getter, create_graph_function)
    if self._variable_scope is not None:
@ -601,21 +614,22 @@ class EagerTemplate(Template):
        # If a variable that we intend to train is created as a side effect
        # of creating a template, then that is almost certainly an error.
        if len(trainable_at_start) != len(trainable_variables):
-          raise ValueError("Trainable variable created when calling a template "
-                           "after the first time, perhaps you used tf.Variable "
-                           "when you meant tf.get_variable: %s" %
-                           list(set(trainable_variables) -
-                                set(trainable_at_start)))
+          raise ValueError(
+              "Trainable variable created when calling a template "
+              "after the first time, perhaps you used tf.Variable "
+              "when you meant tf.get_variable: %s" %
+              list(set(trainable_variables) - set(trainable_at_start)))

        # Non-trainable tracking variables are a legitimate reason why a new
        # variable would be created, but it is a relatively advanced use-case,
        # so log it.
        variables = self._template_store.variables()
        if len(vars_at_start) != len(variables):
-          logging.info("New variables created when calling a template after "
-                       "the first time, perhaps you used tf.Variable when you "
-                       "meant tf.get_variable: %s",
-                       list(set(variables) - set(vars_at_start)))
+          logging.info(
+              "New variables created when calling a template after "
+              "the first time, perhaps you used tf.Variable when you "
+              "meant tf.get_variable: %s",
+              list(set(variables) - set(vars_at_start)))
      else:
        self._variables_created = True
      return result
@ -627,8 +641,9 @@ class EagerTemplate(Template):
        arg0 = ""
      else:
        arg0 = args[0]
-      trace = "".join(_skip_common_stack_elements(self._stacktrace,
-                                                  traceback.format_stack()))
+      trace = "".join(
+          _skip_common_stack_elements(self._stacktrace,
+                                      traceback.format_stack()))
      arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace)
      new_args = [arg0]
      new_args.extend(args[1:])
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """A class to store named variables and a scope operator to manage sharing."""

 from __future__ import absolute_import
@ -55,15 +54,14 @@ __all__ = [


 class _PartitionInfo(object):
-  """Holds partition info used by initializer functions.
-  """
+  """Holds partition info used by initializer functions."""

  def __init__(self, full_shape, var_offset):
    """Constructor.

    Args:
-      full_shape: Tuple or list of `int` indicating the full combined shape
-        of the partitioned variables.
+      full_shape: Tuple or list of `int` indicating the full combined shape of
+        the partitioned variables.
      var_offset: Tuple or list of `int` specifying offset of this partition
        with respect to the full variable for each dimension.

@ -157,8 +155,9 @@ class _PartitionInfo(object):
    if len(shape) != len(self.full_shape):
      raise ValueError(
          "Expected equal length, but received shape={} of length {} while "
-          "self.full_shape={} is of length {}.".format(shape, len(
-              shape), self.full_shape, len(self.full_shape)))
+          "self.full_shape={} is of length {}.".format(shape, len(shape),
+                                                       self.full_shape,
+                                                       len(self.full_shape)))

    for i in xrange(len(shape)):
      if self.var_offset[i] + shape[i] > self.full_shape[i]:
@ -206,7 +205,6 @@ get_variable() should create the requested variable if it doesn't exist or, if
 it does exist, simply return it.
 """

-
 _DEFAULT_USE_RESOURCE = tf2.enabled()


@ -270,8 +268,8 @@ class _VariableStore(object):
  variables are initialized with the initializer passed to __init__.

  Attributes:
-    vars: a dictionary with string names (same as passed in GetVar) as keys
-          and the corresponding TensorFlow Variables as values.
+    vars: a dictionary with string names (same as passed in GetVar) as keys and
+      the corresponding TensorFlow Variables as values.
  """

  def __init__(self):
@ -304,7 +302,7 @@ class _VariableStore(object):

    Set `reuse` to `True` when you only want to reuse existing Variables.
    Set `reuse` to `False` when you only want to create new Variables.
-    Set `reuse` to None (the default) or tf.AUTO_REUSE when you want
+    Set `reuse` to None (the default) or tf.compat.v1.AUTO_REUSE when you want
    variables to be created if they don't exist or returned if they do.

    If initializer is `None` (the default), the default initializer passed in
@ -324,16 +322,15 @@ class _VariableStore(object):
      shape: Shape of the new or existing variable.
      dtype: Type of the new or existing variable (defaults to `DT_FLOAT`).
      initializer: Initializer for the variable.
-      regularizer: A (Tensor -> Tensor or None) function; the result of
-        applying it on a newly created variable will be added to the collection
+      regularizer: A (Tensor -> Tensor or None) function; the result of applying
+        it on a newly created variable will be added to the collection
        GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
-      reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation
-        of variables. When eager execution is enabled  this argument is always
+      reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation of
+        variables. When eager execution is enabled  this argument is always
        forced to be False.
      trainable: If `True` also add the variable to the graph collection
-        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-        `trainable` defaults to `True` unless `synchronization` is
-        set to `ON_READ`.
+        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). `trainable`
+        defaults to `True` unless `synchronization` is set to `ON_READ`.
      collections: List of graph collections keys to add the `Variable` to.
        Defaults to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`).
      caching_device: Optional device string or function describing where the
@ -349,34 +346,32 @@ class _VariableStore(object):
        must be known.
      use_resource: If False, creates a regular Variable. If True, creates
        instead an experimental ResourceVariable which has well-defined
-        semantics. Defaults to False (will later change to True).
-        When eager execution is enabled this argument is always forced to be
-        true.
+        semantics. Defaults to False (will later change to True). When eager
+        execution is enabled this argument is always forced to be true.
      custom_getter: Callable that takes as a first argument the true getter,
-        and allows overwriting the internal get_variable method.
-        The signature of `custom_getter` should match that of this method,
-        but the most future-proof version will allow for changes:
-        `def custom_getter(getter, *args, **kwargs)`.  Direct access to
-        all `get_variable` parameters is also allowed:
-        `def custom_getter(getter, name, *args, **kwargs)`.  A simple identity
+        and allows overwriting the internal get_variable method. The signature
+        of `custom_getter` should match that of this method,
+        but the most future-proof version will allow for changes: `def
+          custom_getter(getter, *args, **kwargs)`.  Direct access to
+        all `get_variable` parameters is also allowed: `def
+          custom_getter(getter, name, *args, **kwargs)`.  A simple identity
        custom getter that simply creates variables with modified names is:
-        ```python
-        def custom_getter(getter, name, *args, **kwargs):
-          return getter(name + '_suffix', *args, **kwargs)
-        ```
+          ```python
+        def custom_getter(getter, name, *args, **kwargs): return getter(name +
+          '_suffix', *args, **kwargs) ```
      constraint: An optional projection function to be applied to the variable
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
+        variable and return the Tensor for the projected value (which must have
+        the same shape). Constraints are not safe to use when doing asynchronous
+        distributed training.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
-        `AUTO` and the current `DistributionStrategy` chooses
-        when to synchronize. If `synchronization` is set to `ON_READ`,
-        `trainable` must not be set to `True`.
+        `AUTO` and the current `DistributionStrategy` chooses when to
+        synchronize. If `synchronization` is set to `ON_READ`, `trainable` must
+        not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
@ -393,8 +388,8 @@ class _VariableStore(object):
        EagerVariableStore.
    """
    if custom_getter is not None and not callable(custom_getter):
-      raise ValueError(
-          "Passed a custom_getter which is not callable: %s" % custom_getter)
+      raise ValueError("Passed a custom_getter which is not callable: %s" %
+                       custom_getter)

    with ops.init_scope():
      if context.executing_eagerly():
@ -448,14 +443,14 @@ class _VariableStore(object):
        constraint=None,
        synchronization=VariableSynchronization.AUTO,
        aggregation=VariableAggregation.NONE):
-      is_scalar = (shape is not None
-                   and isinstance(shape, collections_lib.Sequence)
-                   and not shape)
+      is_scalar = (
+          shape is not None and isinstance(shape, collections_lib.Sequence) and
+          not shape)
      # Partitioned variable case
      if partitioner is not None and not is_scalar:
        if not callable(partitioner):
-          raise ValueError(
-              "Partitioner must be callable, but received: %s" % partitioner)
+          raise ValueError("Partitioner must be callable, but received: %s" %
+                           partitioner)
        with ops.name_scope(None):
          return self._get_partitioned_variable(
              name=name,
@ -596,7 +591,7 @@ class _VariableStore(object):

    Set `reuse` to `True` when you only want to reuse existing Variables.
    Set `reuse` to `False` when you only want to create new Variables.
-    Set `reuse` to None (the default) or tf.AUTO_REUSE when you want
+    Set `reuse` to None (the default) or tf.compat.v1.AUTO_REUSE when you want
    variables to be created if they don't exist or returned if they do.

    If initializer is `None` (the default), the default initializer passed in
@ -617,14 +612,14 @@ class _VariableStore(object):
        and `dtype` of the Variable to be created, and returns a list of
        partitions for each axis (currently only one axis can be partitioned).
      shape: shape of the new or existing sharded variable.
-      dtype: type of the new or existing sharded variable
-        (defaults to `DT_FLOAT`).
+      dtype: type of the new or existing sharded variable (defaults to
+        `DT_FLOAT`).
      initializer: initializer for the sharded variable.
-      regularizer: a (Tensor -> Tensor or None) function; the result of
-        applying it on a newly created variable will be added to the collection
+      regularizer: a (Tensor -> Tensor or None) function; the result of applying
+        it on a newly created variable will be added to the collection
        GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
-      reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation
-        of variables.
+      reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation of
+        variables.
      trainable: If `True` also add the variable to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      collections: List of graph collections keys to add the Variable to.
@ -644,15 +639,15 @@ class _VariableStore(object):
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
+        variable and return the Tensor for the projected value (which must have
+        the same shape). Constraints are not safe to use when doing asynchronous
+        distributed training.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
-        `AUTO` and the current `DistributionStrategy` chooses
-        when to synchronize. If `synchronization` is set to `ON_READ`,
-        `trainable` must not be set to `True`.
+        `AUTO` and the current `DistributionStrategy` chooses when to
+        synchronize. If `synchronization` is set to `ON_READ`, `trainable` must
+        not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
@ -686,20 +681,17 @@ class _VariableStore(object):
      if reuse is False:
        raise ValueError(
            "Partitioned variable with name %s already exists. Did you mean to "
-            "set reuse=True or reuse=tf.AUTO_REUSE in VarScope?"
-            % name)
+            "set reuse=True or reuse=tf.AUTO_REUSE in VarScope?" % name)

      existing_var = self._partitioned_vars[name]
      if not shape.is_compatible_with(existing_var.get_shape()):
        raise ValueError(
            "Trying to reuse partitioned variable %s, but specified shape %s "
-            "and found shape %s."
-            % (name, shape, existing_var.get_shape()))
+            "and found shape %s." % (name, shape, existing_var.get_shape()))
      if not dtype.is_compatible_with(existing_var.dtype):
        raise ValueError(
            "Trying to reuse partitioned variable %s, but specified dtype %s "
-            "and found dtype %s."
-            % (name, dtype.name, existing_var.dtype.name))
+            "and found dtype %s." % (name, dtype.name, existing_var.dtype.name))

      # pylint: disable=protected-access
      if (partitions is not None and
@ -724,21 +716,18 @@ class _VariableStore(object):
        raise ValueError(
            "Partitioner returned a different partitioning than what was "
            "already found.  Partitioner returned %d shards, and shard "
-            "%s/part_0 was found, but %s/part_%d was not."
-            % (num_slices, name, name, num_slices - 1))
+            "%s/part_0 was found, but %s/part_%d was not." %
+            (num_slices, name, name, num_slices - 1))
      if "%s/part_%d" % (name, num_slices) in self._vars:
        raise ValueError(
            "Partitioner returned a different partitioning than what was "
            "already found.  Partitioner returned %d shards, and shard "
-            "%s/part_0 was found, but so was the extra shard %s/part_%d."
-            % (num_slices, name, name, num_slices))
+            "%s/part_0 was found, but so was the extra shard %s/part_%d." %
+            (num_slices, name, name, num_slices))

    vs = []
-    for i, (var_offset, var_shape) in enumerate(_iter_slices(
-        shape.as_list(),
-        num_slices,
-        slice_dim
-    )):
+    for i, (var_offset, var_shape) in enumerate(
+        _iter_slices(shape.as_list(), num_slices, slice_dim)):
      partition_info = _PartitionInfo(
          full_shape=shape.as_list(), var_offset=var_offset)
      var_full_name = "%s/part_%d" % (name, i)
@ -783,16 +772,18 @@ class _VariableStore(object):
            aggregation=aggregation)

      # pylint: disable=protected-access
-      var._set_save_slice_info(variables.Variable.SaveSliceInfo(
-          name, shape.as_list(), var_offset, var_shape))
+      var._set_save_slice_info(
+          variables.Variable.SaveSliceInfo(name, shape.as_list(), var_offset,
+                                           var_shape))
      vs.append(var)
      # pylint: enable=protected-access

-    partitioned_var = variables.PartitionedVariable(name=name,
-                                                    shape=shape,
-                                                    dtype=dtype,
-                                                    variable_list=vs,
-                                                    partitions=partitions)
+    partitioned_var = variables.PartitionedVariable(
+        name=name,
+        shape=shape,
+        dtype=dtype,
+        variable_list=vs,
+        partitions=partitions)
    if not context.executing_eagerly() or self._store_eager_variables:
      self._partitioned_vars[name] = partitioned_var
    return partitioned_var
@ -813,7 +804,9 @@ class _VariableStore(object):
                           constraint=None,
                           synchronization=VariableSynchronization.AUTO,
                           aggregation=VariableAggregation.NONE):
-    """Get or create a single Variable (e.g. a shard or entire variable).
+    """Get or create a single Variable (e.g.
+
+    a shard or entire variable).

    See the documentation of get_variable above (ignore partitioning components)
    for details.
@ -867,19 +860,19 @@ class _VariableStore(object):
        # functions to create variables) so we take more than needed in the
        # default case.
        tb = [x for x in tb if "tensorflow/python" not in x[0]][:5]
-        raise ValueError("%s Originally defined at:\n\n%s" % (err_msg, "".join(
-            traceback.format_list(tb))))
+        raise ValueError("%s Originally defined at:\n\n%s" %
+                         (err_msg, "".join(traceback.format_list(tb))))
      found_var = self._vars[name]
      if not shape.is_compatible_with(found_var.get_shape()):
        raise ValueError("Trying to share variable %s, but specified shape %s"
-                         " and found shape %s." % (name, shape,
-                                                   found_var.get_shape()))
+                         " and found shape %s." %
+                         (name, shape, found_var.get_shape()))
      if not dtype.is_compatible_with(found_var.dtype):
        dtype_str = dtype.name
        found_type_str = found_var.dtype.name
        raise ValueError("Trying to share variable %s, but specified dtype %s"
-                         " and found dtype %s." % (name, dtype_str,
-                                                   found_type_str))
+                         " and found dtype %s." %
+                         (name, dtype_str, found_type_str))
      return found_var

    # The code below handles only the case of creating a new variable.
@ -903,7 +896,9 @@ class _VariableStore(object):
          initializer = initializer(dtype=dtype)
        if shape is not None and shape.is_fully_defined():
          init_val = lambda: initializer(  # pylint: disable=g-long-lambda
-              shape.as_list(), dtype=dtype, partition_info=partition_info)
+              shape.as_list(),
+              dtype=dtype,
+              partition_info=partition_info)
          variable_dtype = dtype.base_dtype
        elif len(tf_inspect.getargspec(initializer).args) == len(
            tf_inspect.getargspec(initializer).defaults or []):
@ -960,8 +955,9 @@ class _VariableStore(object):
          else:
            v_name = v.name
            loss_name = loss.name
-          logging.vlog(1, "Applied regularizer to %s and added the result %s "
-                       "to REGULARIZATION_LOSSES.", v_name, loss_name)
+          logging.vlog(
+              1, "Applied regularizer to %s and added the result %s "
+              "to REGULARIZATION_LOSSES.", v_name, loss_name)
          ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, loss)
    return v

@ -987,14 +983,14 @@ class _VariableStore(object):
      initializing_from_value = False
    # If dtype is DT_INT/DT_UINT, provide a default value `zero`
    # If dtype is DT_BOOL, provide a default value `FALSE`
-    elif (dtype.is_integer or dtype.is_unsigned or dtype.is_bool
-          or dtype == dtypes.string):
+    elif (dtype.is_integer or dtype.is_unsigned or dtype.is_bool or
+          dtype == dtypes.string):
      initializer = init_ops.zeros_initializer()
      initializing_from_value = False
    # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here?
    else:
-      raise ValueError("An initializer for variable %s of %s is required"
-                       % (name, dtype.base_dtype))
+      raise ValueError("An initializer for variable %s of %s is required" %
+                       (name, dtype.base_dtype))

    return initializer, initializing_from_value

@ -1018,7 +1014,7 @@ class VariableScope(object):
    name: name of the current scope, used as prefix in get_variable.
    initializer: default initializer passed to get_variable.
    regularizer: default regularizer passed to get_variable.
-    reuse: Boolean, None, or tf.AUTO_REUSE, setting the reuse in
+    reuse: Boolean, None, or tf.compat.v1.AUTO_REUSE, setting the reuse in
      get_variable. When eager execution is enabled this argument is always
      forced to be False.
    caching_device: string, callable, or None: the caching device passed to
@ -1028,16 +1024,16 @@ class VariableScope(object):
    name_scope: The name passed to `tf.name_scope`.
    dtype: default type passed to get_variable (defaults to DT_FLOAT).
    use_resource: if False, create a normal Variable; if True create an
-      experimental ResourceVariable with well-defined semantics. Defaults
-      to False (will later change to True). When eager execution is enabled
-      this argument is always forced to be True.
+      experimental ResourceVariable with well-defined semantics. Defaults to
+      False (will later change to True). When eager execution is enabled this
+      argument is always forced to be True.
    constraint: An optional projection function to be applied to the variable
      after being updated by an `Optimizer` (e.g. used to implement norm
      constraints or value constraints for layer weights). The function must
      take as input the unprojected Tensor representing the value of the
-      variable and return the Tensor for the projected value
-      (which must have the same shape). Constraints are not safe to
-      use when doing asynchronous distributed training.
+      variable and return the Tensor for the projected value (which must have
+      the same shape). Constraints are not safe to use when doing asynchronous
+      distributed training.
  """

  def __init__(self,
@ -1399,7 +1395,7 @@ class EagerVariableStore(object):
    container = tfe.EagerVariableStore()
    for input in dataset_iterator:
      with container.as_default():
-        x = tf.layers.dense(input, name="l1")
+        x = tf.compat.v1.layers.dense(input, name="l1")
    print(container.variables)  # Should print the variables used in the layer.
  ```
  """
@ -1456,9 +1452,7 @@ class EagerVariableStore(object):

      # Create new variable with same value, name, and "trainable" flag.
      new_var = resource_variable_ops.ResourceVariable(
-          var.read_value(),
-          name=stripped_var_name,
-          trainable=var.trainable)
+          var.read_value(), name=stripped_var_name, trainable=var.trainable)
      new_store._store._vars[key] = new_var
    return new_store
    # pylint: enable=protected-access
@ -1603,8 +1597,7 @@ Raises:
    and `dtype` don't match. Reuse is set inside `variable_scope`.
 """)
 get_variable.__doc__ = get_variable_or_local_docstring % (
-    "Gets an existing variable with these parameters or create a new one.",
-    "",
+    "Gets an existing variable with these parameters or create a new one.", "",
    "trainable: If `True` also add the variable to the graph collection\n"
    "    `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).\n  ",
    "GraphKeys.GLOBAL_VARIABLES")
@ -1655,9 +1648,7 @@ get_local_variable.__doc__ = get_variable_or_local_docstring % (
    "Gets an existing *local* variable or creates a new one.",
    "Behavior is the same as in `get_variable`, except that variables are\n"
    "added to the `LOCAL_VARIABLES` collection and `trainable` is set to\n"
-    "`False`.\n",
-    "",
-    "GraphKeys.LOCAL_VARIABLES")
+    "`False`.\n", "", "GraphKeys.LOCAL_VARIABLES")


 def _get_partitioned_variable(name,
@ -1703,24 +1694,24 @@ def _get_partitioned_variable(name,
    shape: Shape of the new or existing variable.
    dtype: Type of the new or existing variable (defaults to `DT_FLOAT`).
    initializer: Initializer for the variable if one is created.
-    regularizer: A (Tensor -> Tensor or None) function; the result of
-      applying it on a newly created variable will be added to the collection
+    regularizer: A (Tensor -> Tensor or None) function; the result of applying
+      it on a newly created variable will be added to the collection
      GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
    trainable: If `True` also add the variable to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
-    collections: List of graph collections keys to add the Variable to.
-      Defaults to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`).
+    collections: List of graph collections keys to add the Variable to. Defaults
+      to `[GraphKeys.GLOBAL_VARIABLES]` (see `tf.Variable`).
    caching_device: Optional device string or function describing where the
-      Variable should be cached for reading.  Defaults to the Variable's
-      device.  If not `None`, caches on another device.  Typical use is to
-      cache on the device where the Ops using the Variable reside, to
-      deduplicate copying through `Switch` and other conditional statements.
+      Variable should be cached for reading.  Defaults to the Variable's device.
+      If not `None`, caches on another device.  Typical use is to cache on the
+      device where the Ops using the Variable reside, to deduplicate copying
+      through `Switch` and other conditional statements.
    partitioner: Optional callable that accepts a fully defined `TensorShape`
      and `dtype` of the Variable to be created, and returns a list of
      partitions for each axis (currently only one axis can be partitioned).
-    validate_shape: If False, allows the variable to be initialized with a
-        value of unknown shape. If True, the default, the shape of initial_value
-        must be known.
+    validate_shape: If False, allows the variable to be initialized with a value
+      of unknown shape. If True, the default, the shape of initial_value must be
+      known.
    use_resource: If False, creates a regular Variable. If True, creates an
      experimental ResourceVariable instead which has well-defined semantics.
      Defaults to False (will later change to True).
@ -1728,15 +1719,15 @@ def _get_partitioned_variable(name,
      after being updated by an `Optimizer` (e.g. used to implement norm
      constraints or value constraints for layer weights). The function must
      take as input the unprojected Tensor representing the value of the
-      variable and return the Tensor for the projected value
-      (which must have the same shape). Constraints are not safe to
-      use when doing asynchronous distributed training.
-    synchronization: Indicates when a distributed a variable will be
-      aggregated. Accepted values are constants defined in the class
+      variable and return the Tensor for the projected value (which must have
+      the same shape). Constraints are not safe to use when doing asynchronous
+      distributed training.
+    synchronization: Indicates when a distributed a variable will be aggregated.
+      Accepted values are constants defined in the class
      `tf.VariableSynchronization`. By default the synchronization is set to
-      `AUTO` and the current `DistributionStrategy` chooses
-      when to synchronize. If `synchronization` is set to `ON_READ`,
-      `trainable` must not be set to `True`.
+      `AUTO` and the current `DistributionStrategy` chooses when to synchronize.
+      If `synchronization` is set to `ON_READ`, `trainable` must not be set to
+      `True`.
    aggregation: Indicates how a distributed variable will be aggregated.
      Accepted values are constants defined in the class
      `tf.VariableAggregation`.
@ -1802,8 +1793,8 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name

    Args:
      name_or_scope: `string` or `VariableScope`: the scope to open.
-      reuse: `True` or None, or tf.AUTO_REUSE; if `None`, we inherit the parent
-        scope's reuse flag.
+      reuse: `True` or None, or tf.compat.v1.AUTO_REUSE; if `None`, we inherit
+        the parent scope's reuse flag.
      initializer: default initializer for variables within this scope.
      regularizer: default regularizer for variables within this scope.
      caching_device: default caching device for variables within this scope.
@ -1818,9 +1809,9 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
+        variable and return the Tensor for the projected value (which must have
+        the same shape). Constraints are not safe to use when doing asynchronous
+        distributed training.
    """
    self._name_or_scope = name_or_scope
    self._reuse = reuse
@ -1865,8 +1856,8 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
        variable_scope_object.set_partitioner(self._partitioner)
      if self._custom_getter is not None:
        variable_scope_object.set_custom_getter(
-            _maybe_wrap_custom_getter(
-                self._custom_getter, self._name_or_scope.custom_getter))
+            _maybe_wrap_custom_getter(self._custom_getter,
+                                      self._name_or_scope.custom_getter))
      if self._dtype is not None:
        variable_scope_object.set_dtype(self._dtype)
      if self._use_resource is not None:
@ -1894,10 +1885,10 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
      #   VariableScope with name extended by the provided one, and inherited
      #   reuse and initializer (except if the user provided values to set).
      self._new_name = (
-          self._old.name + "/" + self._name_or_scope if self._old.name
-          else self._name_or_scope)
-      self._reuse = (self._reuse
-                     or self._old.reuse)  # Re-using is inherited by sub-scopes.
+          self._old.name + "/" +
+          self._name_or_scope if self._old.name else self._name_or_scope)
+      self._reuse = (self._reuse or
+                     self._old.reuse)  # Re-using is inherited by sub-scopes.
      if self._old_name_scope is None:
        name_scope = self._name_or_scope
      else:
@ -1936,8 +1927,8 @@ class _pure_variable_scope(object):  # pylint: disable=invalid-name
    return variable_scope_object

  def __exit__(self, type_arg, value_arg, traceback_arg):
-    if (self._var_scope_store.current_scope is not
-        self._last_variable_scope_object):
+    if (self._var_scope_store.current_scope is
+        not self._last_variable_scope_object):
      raise RuntimeError("Improper nesting of variable_scope.")
    # If jumping out from a non-prolonged scope, restore counts.
    if isinstance(self._name_or_scope, VariableScope):
@ -1961,9 +1952,8 @@ def _maybe_wrap_custom_getter(custom_getter, old_getter):
    # will call the true_getter, perform any intermediate
    # processing, and return the results to the current
    # getter, which will also perform additional processing.
-    return custom_getter(
-        functools.partial(old_getter, getter),
-        *args, **kwargs)
+    return custom_getter(functools.partial(old_getter, getter), *args, **kwargs)
+
  return wrapped_custom_getter


@ -2004,24 +1994,24 @@ class variable_scope(object):
  Simple example of how to create a new variable:

  ```python
-  with tf.variable_scope("foo"):
-      with tf.variable_scope("bar"):
-          v = tf.get_variable("v", [1])
+  with tf.compat.v1.variable_scope("foo"):
+      with tf.compat.v1.variable_scope("bar"):
+          v = tf.compat.v1.get_variable("v", [1])
          assert v.name == "foo/bar/v:0"
  ```

  Simple example of how to reenter a premade variable scope safely:

  ```python
-  with tf.variable_scope("foo") as vs:
+  with tf.compat.v1.variable_scope("foo") as vs:
    pass

  # Re-enter the variable scope.
-  with tf.variable_scope(vs,
+  with tf.compat.v1.variable_scope(vs,
                         auxiliary_name_scope=False) as vs1:
    # Restore the original name_scope.
    with tf.name_scope(vs1.original_name_scope):
-        v = tf.get_variable("v", [1])
+        v = tf.compat.v1.get_variable("v", [1])
        assert v.name == "foo/v:0"
        c = tf.constant([1], name="c")
        assert c.name == "foo/c:0"
@ -2031,8 +2021,8 @@ class variable_scope(object):

  ```python
  def foo():
-    with tf.variable_scope("foo", reuse=tf.AUTO_REUSE):
-      v = tf.get_variable("v", [1])
+    with tf.compat.v1.variable_scope("foo", reuse=tf.compat.v1.AUTO_REUSE):
+      v = tf.compat.v1.get_variable("v", [1])
    return v

  v1 = foo()  # Creates v.
@ -2043,20 +2033,20 @@ class variable_scope(object):
  Basic example of sharing a variable with reuse=True:

  ```python
-  with tf.variable_scope("foo"):
-      v = tf.get_variable("v", [1])
-  with tf.variable_scope("foo", reuse=True):
-      v1 = tf.get_variable("v", [1])
+  with tf.compat.v1.variable_scope("foo"):
+      v = tf.compat.v1.get_variable("v", [1])
+  with tf.compat.v1.variable_scope("foo", reuse=True):
+      v1 = tf.compat.v1.get_variable("v", [1])
  assert v1 == v
  ```

  Sharing a variable by capturing a scope and setting reuse:

  ```python
-  with tf.variable_scope("foo") as scope:
-      v = tf.get_variable("v", [1])
+  with tf.compat.v1.variable_scope("foo") as scope:
+      v = tf.compat.v1.get_variable("v", [1])
      scope.reuse_variables()
-      v1 = tf.get_variable("v", [1])
+      v1 = tf.compat.v1.get_variable("v", [1])
  assert v1 == v
  ```

@ -2064,9 +2054,9 @@ class variable_scope(object):
  an existing variable in a non-reusing scope.

  ```python
-  with tf.variable_scope("foo"):
-      v = tf.get_variable("v", [1])
-      v1 = tf.get_variable("v", [1])
+  with tf.compat.v1.variable_scope("foo"):
+      v = tf.compat.v1.get_variable("v", [1])
+      v1 = tf.compat.v1.get_variable("v", [1])
      #  Raises ValueError("... v already exists ...").
  ```

@ -2074,8 +2064,8 @@ class variable_scope(object):
  exist in reuse mode.

  ```python
-  with tf.variable_scope("foo", reuse=True):
-      v = tf.get_variable("v", [1])
+  with tf.compat.v1.variable_scope("foo", reuse=True):
+      v = tf.compat.v1.get_variable("v", [1])
      #  Raises ValueError("... v does not exists ...").
  ```

@ -2145,14 +2135,14 @@ class variable_scope(object):
      caching_device: default caching device for variables within this scope.
      partitioner: default partitioner for variables within this scope.
      custom_getter: default custom getter for variables within this scope.
-      reuse: `True`, None, or tf.AUTO_REUSE; if `True`, we go into reuse mode
-        for this scope as well as all sub-scopes; if tf.AUTO_REUSE, we create
-        variables if they do not exist, and return them otherwise; if None, we
-        inherit the parent scope's reuse flag. When eager execution is enabled,
-        new variables are always created unless an EagerVariableStore or
-        template is currently active.
-      dtype: type of variables created in this scope (defaults to the type
-        in the passed scope, or inherited from parent scope).
+      reuse: `True`, None, or tf.compat.v1.AUTO_REUSE; if `True`, we go into
+        reuse mode for this scope as well as all sub-scopes; if
+        tf.compat.v1.AUTO_REUSE, we create variables if they do not exist, and
+        return them otherwise; if None, we inherit the parent scope's reuse
+        flag. When eager execution is enabled, new variables are always created
+        unless an EagerVariableStore or template is currently active.
+      dtype: type of variables created in this scope (defaults to the type in
+        the passed scope, or inherited from parent scope).
      use_resource: If False, all variables will be regular Variables. If True,
        experimental ResourceVariables with well-defined semantics will be used
        instead. Defaults to False (will later change to True). When eager
@ -2161,13 +2151,13 @@ class variable_scope(object):
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
+        variable and return the Tensor for the projected value (which must have
+        the same shape). Constraints are not safe to use when doing asynchronous
+        distributed training.
      auxiliary_name_scope: If `True`, we create an auxiliary name scope with
-        the scope. If `False`, we don't create it. Note that the argument is
-        not inherited, and it only takes effect for once when creating. You
-        should only use it for re-entering a premade variable scope.
+        the scope. If `False`, we don't create it. Note that the argument is not
+        inherited, and it only takes effect for once when creating. You should
+        only use it for re-entering a premade variable scope.

    Returns:
      A scope that can be captured and reused.
@ -2355,8 +2345,8 @@ class variable_scope(object):
      return entered_pure_variable_scope

  def __exit__(self, type_arg, value_arg, traceback_arg):
-    self._cached_pure_variable_scope.__exit__(
-        type_arg, value_arg, traceback_arg)
+    self._cached_pure_variable_scope.__exit__(type_arg, value_arg,
+                                              traceback_arg)
    if self._current_name_scope:
      self._current_name_scope.__exit__(type_arg, value_arg, traceback_arg)
    if self._in_graph_mode and not self._building_function:
@ -2381,18 +2371,19 @@ def variable_op_scope(values,
  """Deprecated: context manager for defining an op that creates variables."""
  logging.warn("tf.variable_op_scope(values, name, default_name) is deprecated,"
               " use tf.variable_scope(name, default_name, values)")
-  with variable_scope(name_or_scope,
-                      default_name=default_name,
-                      values=values,
-                      initializer=initializer,
-                      regularizer=regularizer,
-                      caching_device=caching_device,
-                      partitioner=partitioner,
-                      custom_getter=custom_getter,
-                      reuse=reuse,
-                      dtype=dtype,
-                      use_resource=use_resource,
-                      constraint=constraint) as scope:
+  with variable_scope(
+      name_or_scope,
+      default_name=default_name,
+      values=values,
+      initializer=initializer,
+      regularizer=regularizer,
+      caching_device=caching_device,
+      partitioner=partitioner,
+      custom_getter=custom_getter,
+      reuse=reuse,
+      dtype=dtype,
+      use_resource=use_resource,
+      constraint=constraint) as scope:
    yield scope


@ -2400,10 +2391,10 @@ def _call_partitioner(partitioner, shape, dtype):
  """Call partitioner validating its inputs/output.

  Args:
-    partitioner: a function mapping `Tensor` shape and dtype to a
-        list of partitions.
+    partitioner: a function mapping `Tensor` shape and dtype to a list of
+      partitions.
    shape: shape of the `Tensor` to partition, must have at least two
-        dimensions.
+      dimensions.
    dtype: dtype of the elements in the `Tensor`.

  Returns:
@ -2419,20 +2410,18 @@ def _call_partitioner(partitioner, shape, dtype):

  slicing = partitioner(shape=shape, dtype=dtype)
  if not isinstance(slicing, collections_lib.Sequence):
-    raise ValueError("Partitioner must return a sequence, but saw: %s"
-                     % slicing)
+    raise ValueError("Partitioner must return a sequence, but saw: %s" %
+                     slicing)
  if len(slicing) != shape.ndims:
    raise ValueError(
        "Partitioner returned a partition list that does not match the "
        "Variable's rank: %s vs. %s" % (slicing, shape))
  if any(p < 1 for p in slicing):
-    raise ValueError(
-        "Partitioner returned zero partitions for some axes: %s" %
-        slicing)
+    raise ValueError("Partitioner returned zero partitions for some axes: %s" %
+                     slicing)
  if sum(p > 1 for p in slicing) > 1:
-    raise ValueError(
-        "Can only slice a variable along one dimension: "
-        "shape: %s, partitioning: %s" % (shape, slicing))
+    raise ValueError("Can only slice a variable along one dimension: "
+                     "shape: %s, partitioning: %s" % (shape, slicing))
  return slicing


@ -2489,20 +2478,34 @@ def default_variable_creator(next_creator=None, **kwargs):
  if use_resource:
    distribute_strategy = kwargs.get("distribute_strategy", None)
    return resource_variable_ops.ResourceVariable(
-        initial_value=initial_value, trainable=trainable,
-        collections=collections, validate_shape=validate_shape,
-        caching_device=caching_device, name=name, dtype=dtype,
-        constraint=constraint, variable_def=variable_def,
-        import_scope=import_scope, distribute_strategy=distribute_strategy,
-        synchronization=synchronization, aggregation=aggregation)
+        initial_value=initial_value,
+        trainable=trainable,
+        collections=collections,
+        validate_shape=validate_shape,
+        caching_device=caching_device,
+        name=name,
+        dtype=dtype,
+        constraint=constraint,
+        variable_def=variable_def,
+        import_scope=import_scope,
+        distribute_strategy=distribute_strategy,
+        synchronization=synchronization,
+        aggregation=aggregation)
  else:
    return variables.RefVariable(
-        initial_value=initial_value, trainable=trainable,
-        collections=collections, validate_shape=validate_shape,
-        caching_device=caching_device, name=name, dtype=dtype,
-        constraint=constraint, variable_def=variable_def,
-        expected_shape=expected_shape, import_scope=import_scope,
-        synchronization=synchronization, aggregation=aggregation)
+        initial_value=initial_value,
+        trainable=trainable,
+        collections=collections,
+        validate_shape=validate_shape,
+        caching_device=caching_device,
+        name=name,
+        dtype=dtype,
+        constraint=constraint,
+        variable_def=variable_def,
+        expected_shape=expected_shape,
+        import_scope=import_scope,
+        synchronization=synchronization,
+        aggregation=aggregation)


 def default_variable_creator_v2(next_creator=None, **kwargs):
@ -2522,11 +2525,18 @@ def default_variable_creator_v2(next_creator=None, **kwargs):
  aggregation = kwargs.get("aggregation", None)

  return resource_variable_ops.ResourceVariable(
-      initial_value=initial_value, trainable=trainable,
-      validate_shape=validate_shape, caching_device=caching_device,
-      name=name, dtype=dtype, constraint=constraint, variable_def=variable_def,
-      import_scope=import_scope, distribute_strategy=distribute_strategy,
-      synchronization=synchronization, aggregation=aggregation)
+      initial_value=initial_value,
+      trainable=trainable,
+      validate_shape=validate_shape,
+      caching_device=caching_device,
+      name=name,
+      dtype=dtype,
+      constraint=constraint,
+      variable_def=variable_def,
+      import_scope=import_scope,
+      distribute_strategy=distribute_strategy,
+      synchronization=synchronization,
+      aggregation=aggregation)


 variables.default_variable_creator = default_variable_creator
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@ -306,7 +306,7 @@ class Variable(six.with_metaclass(VariableMetaclass,

  ```python
  # Launch the graph in a session.
-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
      # Run the variable initializer.
      sess.run(w.initializer)
      # ...you now can run ops that use the value of 'w'...
@ -318,10 +318,10 @@ class Variable(six.with_metaclass(VariableMetaclass,

  ```python
  # Add an Op to initialize global variables.
-  init_op = tf.global_variables_initializer()
+  init_op = tf.compat.v1.global_variables_initializer()

  # Launch the graph in a session.
-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
      # Run the Op that initializes global variables.
      sess.run(init_op)
      # ...you can now run any Op that uses variable values...
@ -372,8 +372,8 @@ class Variable(six.with_metaclass(VariableMetaclass,
  not have these issues:

  * Add `use_resource=True` when constructing `tf.Variable`;
-  * Call `tf.get_variable_scope().set_use_resource(True)` inside a
-    `tf.variable_scope` before the `tf.get_variable()` call.
+  * Call `tf.compat.v1.get_variable_scope().set_use_resource(True)` inside a
+    `tf.compat.v1.variable_scope` before the `tf.compat.v1.get_variable()` call.
  """

  def __init__(self,
@ -512,14 +512,14 @@ class Variable(six.with_metaclass(VariableMetaclass,

    This convenience method requires a session where the graph
    containing this variable has been launched. If no session is
-    passed, the default session is used.  See `tf.Session` for more
+    passed, the default session is used.  See `tf.compat.v1.Session` for more
    information on launching a graph and on sessions.

    ```python
    v = tf.Variable([1, 2])
-    init = tf.global_variables_initializer()
+    init = tf.compat.v1.global_variables_initializer()

-    with tf.Session() as sess:
+    with tf.compat.v1.Session() as sess:
        sess.run(init)
        # Usage passing the session explicitly.
        print(v.eval(sess))
@ -549,7 +549,7 @@ class Variable(six.with_metaclass(VariableMetaclass,

    ```python
    # Initialize 'v' with a random tensor.
-    v = tf.Variable(tf.truncated_normal([10, 40]))
+    v = tf.Variable(tf.random.truncated_normal([10, 40]))
    # Use `initialized_value` to guarantee that `v` has been
    # initialized before its value is used to initialize `w`.
    # The random values are picked only once.
@ -767,7 +767,7 @@ class Variable(six.with_metaclass(VariableMetaclass,
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        op = v.scatter_nd_sub(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(op)
    ```

@ -818,7 +818,7 @@ class Variable(six.with_metaclass(VariableMetaclass,
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        add = v.scatter_nd_add(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(add)
    ```

@ -869,7 +869,7 @@ class Variable(six.with_metaclass(VariableMetaclass,
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        op = v.scatter_nd_assign(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(op)
    ```

@ -958,14 +958,14 @@ class Variable(six.with_metaclass(VariableMetaclass,

    This convenience method requires a session where the graph
    containing this variable has been launched. If no session is
-    passed, the default session is used.  See `tf.Session` for more
+    passed, the default session is used.  See `tf.compat.v1.Session` for more
    information on launching a graph and on sessions.

    ```python
    v = tf.Variable([1, 2])
-    init = tf.global_variables_initializer()
+    init = tf.compat.v1.global_variables_initializer()

-    with tf.Session() as sess:
+    with tf.compat.v1.Session() as sess:
        sess.run(init)
        # Usage passing the session explicitly.
        v.load([2, 3], sess)
@ -1280,7 +1280,7 @@ class VariableV1(Variable):

  ```python
  # Launch the graph in a session.
-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
      # Run the variable initializer.
      sess.run(w.initializer)
      # ...you now can run ops that use the value of 'w'...
@ -1292,10 +1292,10 @@ class VariableV1(Variable):

  ```python
  # Add an Op to initialize global variables.
-  init_op = tf.global_variables_initializer()
+  init_op = tf.compat.v1.global_variables_initializer()

  # Launch the graph in a session.
-  with tf.Session() as sess:
+  with tf.compat.v1.Session() as sess:
      # Run the Op that initializes global variables.
      sess.run(init_op)
      # ...you can now run any Op that uses variable values...
@ -1345,8 +1345,8 @@ class VariableV1(Variable):
  not have these issues:

  * Add `use_resource=True` when constructing `tf.Variable`;
-  * Call `tf.get_variable_scope().set_use_resource(True)` inside a
-    `tf.variable_scope` before the `tf.get_variable()` call.
+  * Call `tf.compat.v1.get_variable_scope().set_use_resource(True)` inside a
+    `tf.compat.v1.variable_scope` before the `tf.compat.v1.get_variable()` call.
  """

  def __init__(self,  # pylint: disable=super-init-not-called
@ -1844,14 +1844,14 @@ class RefVariable(VariableV1):

    This convenience method requires a session where the graph
    containing this variable has been launched. If no session is
-    passed, the default session is used.  See `tf.Session` for more
+    passed, the default session is used.  See `tf.compat.v1.Session` for more
    information on launching a graph and on sessions.

    ```python
    v = tf.Variable([1, 2])
-    init = tf.global_variables_initializer()
+    init = tf.compat.v1.global_variables_initializer()

-    with tf.Session() as sess:
+    with tf.compat.v1.Session() as sess:
        sess.run(init)
        # Usage passing the session explicitly.
        print(v.eval(sess))
@ -2106,7 +2106,7 @@ class RefVariable(VariableV1):
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        op = ref.scatter_nd_sub(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(op)
    ```

@ -2158,7 +2158,7 @@ class RefVariable(VariableV1):
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        add = ref.scatter_nd_add(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(add)
    ```

@ -2210,7 +2210,7 @@ class RefVariable(VariableV1):
        indices = tf.constant([[4], [3], [1] ,[7]])
        updates = tf.constant([9, 10, 11, 12])
        op = ref.scatter_nd_update(indices, updates)
-        with tf.Session() as sess:
+        with tf.compat.v1.Session() as sess:
          print sess.run(op)
    ```

@ -2822,7 +2822,7 @@ def global_variables(scope=None):
  This convenience function returns the contents of that collection.

  An alternative to global variables are local variables. See
-  `tf.local_variables`
+  `tf.compat.v1.local_variables`

  Args:
    scope: (Optional.) A string. If supplied, the resulting list is filtered
@ -2840,7 +2840,7 @@ def global_variables(scope=None):
@tf_export(v1=["all_variables"])
@deprecated("2017-03-02", "Please use tf.global_variables instead.")
 def all_variables():
-  """Use `tf.global_variables` instead."""
+  """Use `tf.compat.v1.global_variables` instead."""
  return global_variables()


@ -2875,7 +2875,7 @@ def local_variables(scope=None):
  This convenience function returns the contents of that collection.

  An alternative to local variables are global variables. See
-  `tf.global_variables`
+  `tf.compat.v1.global_variables`

  Args:
    scope: (Optional.) A string. If supplied, the resulting list is filtered
@ -2981,7 +2981,7 @@ def variables_initializer(var_list, name="init"):
@tf_should_use.should_use_result
@deprecated("2017-03-02", "Use `tf.variables_initializer` instead.")
 def initialize_variables(var_list, name="init"):
-  """See `tf.variables_initializer`."""
+  """See `tf.compat.v1.variables_initializer`."""
  return variables_initializer(var_list, name=name)


@ -3003,7 +3003,7 @@ def global_variables_initializer():
@tf_should_use.should_use_result
@deprecated("2017-03-02", "Use `tf.global_variables_initializer` instead.")
 def initialize_all_variables():
-  """See `tf.global_variables_initializer`."""
+  """See `tf.compat.v1.global_variables_initializer`."""
  return global_variables_initializer()


@ -3025,7 +3025,7 @@ def local_variables_initializer():
@tf_should_use.should_use_result
@deprecated("2017-03-02", "Use `tf.local_variables_initializer` instead.")
 def initialize_local_variables():
-  """See `tf.local_variables_initializer`."""
+  """See `tf.compat.v1.local_variables_initializer`."""
  return local_variables_initializer()