From 9d297ebabca9f306c38662110f7b5ac08c42115c Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 13 Mar 2020 11:37:29 -0700
Subject: [PATCH] Remove all frozen copy of Keras code.

PiperOrigin-RevId: 300795615
Change-Id: Ibe8e69cddeb992aaa00a87da4c6543c8804f7b14
---
 tensorflow/python/frozen_keras/BUILD          |  175 -
 tensorflow/python/frozen_keras/README.md      |   15 -
 tensorflow/python/frozen_keras/activations.py |  453 --
 tensorflow/python/frozen_keras/backend.py     | 6094 -----------------
 .../python/frozen_keras/backend_config.py     |  140 -
 .../frozen_keras/backend_config_test.py       |   55 -
 .../python/frozen_keras/backend_test.py       | 2180 ------
 tensorflow/python/frozen_keras/constraints.py |  282 -
 tensorflow/python/frozen_keras/engine/BUILD   |  151 -
 .../frozen_keras/engine/base_layer_utils.py   |  781 ---
 .../engine/base_layer_utils_test.py           |   71 -
 .../python/frozen_keras/engine/input_spec.py  |  233 -
 .../frozen_keras/engine/input_spec_test.py    |   66 -
 .../frozen_keras/engine/legacy_base_layer.py  | 2784 --------
 .../engine/legacy_base_layer_test.py          | 1274 ----
 tensorflow/python/frozen_keras/engine/node.py |  190 -
 .../python/frozen_keras/initializers.py       |  198 -
 .../python/frozen_keras/regularizers.py       |  266 -
 tensorflow/python/frozen_keras/utils/BUILD    |  106 -
 .../python/frozen_keras/utils/conv_utils.py   |  482 --
 .../frozen_keras/utils/conv_utils_test.py     |  340 -
 .../frozen_keras/utils/generic_utils.py       |  612 --
 .../frozen_keras/utils/generic_utils_test.py  |  321 -
 .../python/frozen_keras/utils/layer_utils.py  |  403 --
 .../python/frozen_keras/utils/tf_utils.py     |  524 --
 .../frozen_keras/utils/tf_utils_test.py       |  162 -
 tensorflow/python/keras/engine/BUILD          |    1 -
 tensorflow/python/keras/engine/sequential.py  |    4 +-
 tensorflow/tools/pip_package/BUILD            |    1 -
 29 files changed, 1 insertion(+), 18363 deletions(-)
 delete mode 100644 tensorflow/python/frozen_keras/BUILD
 delete mode 100644 tensorflow/python/frozen_keras/README.md
 delete mode 100644 tensorflow/python/frozen_keras/activations.py
 delete mode 100644 tensorflow/python/frozen_keras/backend.py
 delete mode 100644 tensorflow/python/frozen_keras/backend_config.py
 delete mode 100644 tensorflow/python/frozen_keras/backend_config_test.py
 delete mode 100644 tensorflow/python/frozen_keras/backend_test.py
 delete mode 100644 tensorflow/python/frozen_keras/constraints.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/BUILD
 delete mode 100644 tensorflow/python/frozen_keras/engine/base_layer_utils.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/base_layer_utils_test.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/input_spec.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/input_spec_test.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/legacy_base_layer.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/legacy_base_layer_test.py
 delete mode 100644 tensorflow/python/frozen_keras/engine/node.py
 delete mode 100644 tensorflow/python/frozen_keras/initializers.py
 delete mode 100644 tensorflow/python/frozen_keras/regularizers.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/BUILD
 delete mode 100644 tensorflow/python/frozen_keras/utils/conv_utils.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/conv_utils_test.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/generic_utils.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/generic_utils_test.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/layer_utils.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/tf_utils.py
 delete mode 100644 tensorflow/python/frozen_keras/utils/tf_utils_test.py

diff --git a/tensorflow/python/frozen_keras/BUILD b/tensorflow/python/frozen_keras/BUILD
deleted file mode 100644
index ca01753ca3e..00000000000
--- a/tensorflow/python/frozen_keras/BUILD
+++ /dev/null
@@ -1,175 +0,0 @@
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
-
-package(
-    default_visibility = ["//tensorflow:__subpackages__"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-#TODO(scottzhu): Cleanup all the deps to python/keras
-
-py_library(
-    name = "frozen_keras",
-    deps = [
-        ":backend",
-        ":backend_config",
-        ":constraint",
-        ":initializers",
-        ":regularizers",
-        "//tensorflow/python/frozen_keras/engine:legacy_base_layer",
-    ],
-)
-
-py_library(
-    name = "activations",
-    srcs = ["activations.py"],
-    deps = [
-        ":backend",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:nn",
-        "//tensorflow/python/frozen_keras/utils:generic_utils",
-        "@six_archive//:six",
-    ],
-)
-
-py_library(
-    name = "backend",
-    srcs = ["backend.py"],
-    deps = [
-        ":backend_config",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:clip_ops",
-        "//tensorflow/python:composite_tensor",
-        "//tensorflow/python:config",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:control_flow_util",
-        "//tensorflow/python:ctc_ops",
-        "//tensorflow/python:device",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:func_graph",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:image_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:linalg_ops",
-        "//tensorflow/python:logging_ops",
-        "//tensorflow/python:map_fn",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:nn",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:session",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:tensor_array_grad",
-        "//tensorflow/python:tensor_array_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_util",
-        "//tensorflow/python:tf2",
-        "//tensorflow/python:training_lib",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/distribute:distribute_coordinator",
-        "//tensorflow/python/distribute:distribute_coordinator_context",
-        "//tensorflow/python/distribute:distribute_lib",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:function",
-        "//tensorflow/python/eager:lift_to_graph",
-        "//tensorflow/python/ops/ragged:ragged_concat_ops",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "backend_config",
-    srcs = ["backend_config.py"],
-    deps = [],
-)
-
-py_library(
-    name = "constraint",
-    srcs = ["constraints.py"],
-    deps = [
-        ":backend",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python/frozen_keras/utils:generic_utils",
-        "@six_archive//:six",
-    ],
-)
-
-py_library(
-    name = "initializers",
-    srcs = ["initializers.py"],
-    deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:init_ops_v2",
-        "//tensorflow/python:tf2",
-        "//tensorflow/python/frozen_keras/utils:generic_utils",
-        "@six_archive//:six",
-    ],
-)
-
-py_library(
-    name = "regularizers",
-    srcs = ["regularizers.py"],
-    deps = [
-        ":backend",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/frozen_keras/utils:generic_utils",
-        "@six_archive//:six",
-    ],
-)
-
-tf_py_test(
-    name = "backend_test",
-    size = "medium",
-    srcs = ["backend_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    deps = [
-        ":backend",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:config",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:extra_py_tests_deps",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:nn",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:def_function",
-        "//tensorflow/python/frozen_keras/engine:base_layer_utils",
-        "//tensorflow/python/keras:combinations",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/layers:advanced_activations",
-        "//tensorflow/python/keras/layers:normalization",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "backend_config_test",
-    size = "medium",
-    srcs = ["backend_config_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":backend",
-        ":backend_config",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras:combinations",
-    ],
-)
diff --git a/tensorflow/python/frozen_keras/README.md b/tensorflow/python/frozen_keras/README.md
deleted file mode 100644
index d00189593a5..00000000000
--- a/tensorflow/python/frozen_keras/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# DO NOT USE
-
-Everything under this package is for internal usage, and only serves a
-dependency from legacy TF v1 APIs that relies on Keras. Any active development
-should happen in third_party/tensorflow/python/keras instead.
-
-## Background
-
-In order to build a more modular Tensorflow and Keras, we decided to split the
-Keras code into its own repository. Having TensorFlow depend on
-Keras is a red flag as it is a reverse dependency. As some legacy TF V1 APIs
-are using Keras classes as base classes, like `Layer`, we decided to keep a copy
-of the trimmed Keras code to resolve the reverse dependency. This will also
-ensure the stability of the TF V1 API will be not affected by the active
-development of the Keras project.
diff --git a/tensorflow/python/frozen_keras/activations.py b/tensorflow/python/frozen_keras/activations.py
deleted file mode 100644
index 70dfa1a826b..00000000000
--- a/tensorflow/python/frozen_keras/activations.py
+++ /dev/null
@@ -1,453 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Built-in activation functions."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import six
-
-from tensorflow.python.frozen_keras import backend as K
-from tensorflow.python.frozen_keras.utils.generic_utils import deserialize_keras_object
-from tensorflow.python.frozen_keras.utils.generic_utils import serialize_keras_object
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-
-# b/123041942
-# In TF 2.x, if the `tf.nn.softmax` is used as an activation function in Keras
-# layers, it gets serialized as 'softmax_v2' instead of 'softmax' as the
-# internal method name is returned in serialization. This results in errors in
-# model exporting and loading as Keras can't find any activation function with
-# the name of `softmax_v2`.
-
-# This dict maps the activation function name from its v2 version to its
-# canonical name.
-_TF_ACTIVATIONS_V2 = {
-    'softmax_v2': 'softmax',
-}
-
-
-def softmax(x, axis=-1):
-  """Softmax converts a real vector to a vector of categorical probabilities.
-
-  The elements of the output vector are in range (0, 1) and sum to 1.
-
-  Each vector is handled independently. The `axis` argument sets which axis
-  of the input the function is applied along.
-
-  Softmax is often used as the activation for the last
-  layer of a classification network because the result could be interpreted as
-  a probability distribution.
-
-  The softmax of each vector x is calculated by `exp(x)/tf.reduce_sum(exp(x))`.
-  The input values in are the log-odds of the resulting probability.
-
-  Arguments:
-      x : Input tensor.
-      axis: Integer, axis along which the softmax normalization is applied.
-
-  Returns:
-      Tensor, output of softmax transformation (all values are non-negative
-        and sum to 1).
-
-  Raises:
-      ValueError: In case `dim(x) == 1`.
-  """
-  ndim = K.ndim(x)
-  if ndim == 2:
-    return nn.softmax(x)
-  elif ndim > 2:
-    e = math_ops.exp(x - math_ops.reduce_max(x, axis=axis, keepdims=True))
-    s = math_ops.reduce_sum(e, axis=axis, keepdims=True)
-    return e / s
-  else:
-    raise ValueError('Cannot apply softmax to a tensor that is 1D. '
-                     'Received input: %s' % (x,))
-
-
-def elu(x, alpha=1.0):
-  """Exponential linear unit.
-
-  Arguments:
-      x: Input tensor.
-      alpha: A scalar, slope of negative section.
-
-  Returns:
-      The exponential linear activation: `x` if `x > 0` and
-        `alpha * (exp(x)-1)` if `x < 0`.
-
-  Reference:
-      - [Fast and Accurate Deep Network Learning by Exponential
-        Linear Units (ELUs)](https://arxiv.org/abs/1511.07289)
-  """
-  return K.elu(x, alpha)
-
-
-def selu(x):
-  """Scaled Exponential Linear Unit (SELU).
-
-  The Scaled Exponential Linear Unit (SELU) activation function is:
-  `scale * x` if `x > 0` and `scale * alpha * (exp(x) - 1)` if `x < 0`
-  where `alpha` and `scale` are pre-defined constants
-  (`alpha = 1.67326324`
-  and `scale = 1.05070098`).
-  The SELU activation function multiplies  `scale` > 1 with the
-  `[elu](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/activations/elu)`
-  (Exponential Linear Unit (ELU)) to ensure a slope larger than one
-  for positive net inputs.
-
-  The values of `alpha` and `scale` are
-  chosen so that the mean and variance of the inputs are preserved
-  between two consecutive layers as long as the weights are initialized
-  correctly (see [`lecun_normal` initialization]
-  (https://www.tensorflow.org/api_docs/python/tf/keras/initializers/lecun_normal))
-  and the number of inputs is "large enough"
-  (see references for more information).
-
-  ![]https://cdn-images-1.medium.com/max/1600/1*m0e8lZU_Zrkh4ESfQkY2Pw.png
-  (Courtesy: Blog on Towards DataScience at
-  https://towardsdatascience.com/selu-make-fnns-great-again-snn-8d61526802a9)
-
-  Example Usage:
-
-  >>> n_classes = 10  #10-class problem
-  >>> from tensorflow.python.keras.layers import Dense
-  >>> model = tf.keras.Sequential()
-  >>> model.add(Dense(64, kernel_initializer='lecun_normal',
-  ...                 activation='selu', input_shape=(28, 28, 1)))
-  >>> model.add(Dense(32, kernel_initializer='lecun_normal',
-  ...                 activation='selu'))
-  >>> model.add(Dense(16, kernel_initializer='lecun_normal',
-  ...                 activation='selu'))
-  >>> model.add(Dense(n_classes, activation='softmax'))
-
-  Arguments:
-      x: A tensor or variable to compute the activation function for.
-
-  Returns:
-      The scaled exponential unit activation: `scale * elu(x, alpha)`.
-
-  # Note
-      - To be used together with the initialization "[lecun_normal]
-      (https://www.tensorflow.org/api_docs/python/tf/keras/initializers/lecun_normal)".
-      - To be used together with the dropout variant "[AlphaDropout]
-      (https://www.tensorflow.org/api_docs/python/tf/keras/layers/AlphaDropout)".
-
-  References:
-      [Self-Normalizing Neural Networks (Klambauer et al, 2017)]
-      (https://arxiv.org/abs/1706.02515)
-  """
-  return nn.selu(x)
-
-
-def softplus(x):
-  """Softplus activation function.
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      The softplus activation: `log(exp(x) + 1)`.
-  """
-  return nn.softplus(x)
-
-
-def softsign(x):
-  """Softsign activation function.
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      The softsign activation: `x / (abs(x) + 1)`.
-  """
-  return nn.softsign(x)
-
-
-def swish(x):
-  """Swish activation function.
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      The swish activation applied to `x`.
-  """
-  return nn.swish(x)
-
-
-def relu(x, alpha=0., max_value=None, threshold=0):
-  """Applies the rectified linear unit activation function.
-
-  With default values, this returns the standard ReLU activation:
-  `max(x, 0)`, the element-wise maximum of 0 and the input tensor.
-
-  Modifying default parameters allows you to use non-zero thresholds,
-  change the max value of the activation,
-  and to use a non-zero multiple of the input for values below the threshold.
-
-  For example:
-
-  >>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
-  >>> tf.keras.activations.relu(foo).numpy()
-  array([ 0.,  0.,  0.,  5., 10.], dtype=float32)
-  >>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
-  array([-5. , -2.5,  0. ,  5. , 10. ], dtype=float32)
-  >>> tf.keras.activations.relu(foo, max_value=5).numpy()
-  array([0., 0., 0., 5., 5.], dtype=float32)
-  >>> tf.keras.activations.relu(foo, threshold=5).numpy()
-  array([-0., -0.,  0.,  0., 10.], dtype=float32)
-
-  Arguments:
-      x: Input `tensor` or `variable`.
-      alpha: A `float` that governs the slope for values lower than the
-        threshold.
-      max_value: A `float` that sets the saturation threshold (the largest value
-        the function will return).
-      threshold: A `float` giving the threshold value of the activation function
-        below which values will be damped or set to zero.
-
-  Returns:
-      A `Tensor` representing the input tensor,
-      transformed by the relu activation function.
-      Tensor will be of the same shape and dtype of input `x`.
-  """
-  return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
-
-
-def tanh(x):
-  """Hyperbolic tangent activation function.
-
-  For example:
-
-  >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
-  >>> b = tf.keras.activations.tanh(a)
-  >>> b.numpy()
-  array([-0.9950547, -0.7615942,  0.       ,  0.7615942,  0.9950547],
-          dtype=float32)
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      Tensor of same shape and dtype of input `x`, with tanh activation:
-      `tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))`.
-  """
-  return nn.tanh(x)
-
-
-def sigmoid(x):
-  """Sigmoid activation function.
-
-  Applies the sigmoid activation function. The sigmoid function is defined as
-  1 divided by (1 + exp(-x)). It's curve is like an "S" and is like a smoothed
-  version of the Heaviside (Unit Step Function) function. For small values
-  (<-5) the sigmoid returns a value close to zero and for larger values (>5)
-  the result of the function gets close to 1.
-
-  Sigmoid is equivalent to a 2-element Softmax, where the second element is
-  assumed to be zero.
-
-  For example:
-
-  >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)
-  >>> b = tf.keras.activations.sigmoid(a)
-  >>> b.numpy() >= 0.0
-  array([ True,  True,  True,  True,  True])
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      Tensor with the sigmoid activation: `(1.0 / (1.0 + exp(-x)))`.
-      Tensor will be of same shape and dtype of input `x`.
-  """
-  return nn.sigmoid(x)
-
-
-def exponential(x):
-  """Exponential activation function.
-
-  For example:
-
-  >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
-  >>> b = tf.keras.activations.exponential(a)
-  >>> b.numpy()
-  array([ 0.04978707,  0.36787945,  1.        ,  2.7182817 , 20.085537  ],
-        dtype=float32)
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      Tensor with exponential activation: `exp(x)`. Tensor will be of same
-      shape and dtype of input `x`.
-  """
-  return math_ops.exp(x)
-
-
-def hard_sigmoid(x):
-  """Hard sigmoid activation function.
-
-  Faster to compute than sigmoid activation.
-
-  For example:
-
-  >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
-  >>> b = tf.keras.activations.hard_sigmoid(a)
-  >>> b.numpy()
-  array([0. , 0.3, 0.5, 0.7, 1. ], dtype=float32)
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-    The hard sigmoid activation:
-
-      - `0` if `x < -2.5`
-      - `1` if `x > 2.5`
-      - `0.2 * x + 0.5` if `-2.5 <= x <= 2.5`.
-  """
-  return K.hard_sigmoid(x)
-
-
-def linear(x):
-  """Linear activation function.
-
-  For example:
-
-  >>> a = tf.constant([-3.0,-1.0, 0.0,1.0,3.0], dtype = tf.float32)
-  >>> b = tf.keras.activations.linear(a)
-  >>> b.numpy()
-  array([-3., -1.,  0.,  1.,  3.], dtype=float32)
-
-  Arguments:
-      x: Input tensor.
-
-  Returns:
-      the input unmodified.
-  """
-  return x
-
-
-def serialize(activation):
-  """Returns name attribute (`__name__`) of function.
-
-  Arguments:
-      activation : Function
-
-  Returns:
-      String denoting the name attribute of the input function
-
-  For example:
-
-  >>> tf.keras.activations.serialize(tf.keras.activations.tanh)
-  'tanh'
-  >>> tf.keras.activations.serialize(tf.keras.activations.sigmoid)
-  'sigmoid'
-  >>> tf.keras.activations.serialize('abcd')
-  Traceback (most recent call last):
-  ...
-  ValueError: ('Cannot serialize', 'abcd')
-
-  Raises:
-      ValueError: The input function is not a valid one.
-  """
-  if (hasattr(activation, '__name__') and
-      activation.__name__ in _TF_ACTIVATIONS_V2):
-    return _TF_ACTIVATIONS_V2[activation.__name__]
-  return serialize_keras_object(activation)
-
-
-def deserialize(name, custom_objects=None):
-  """Returns activation function denoted by input string.
-
-  Arguments:
-      x : String
-
-  Returns:
-      TensorFlow Activation function denoted by input string.
-
-  For example:
-
-  >>> tf.keras.activations.deserialize('linear')
-   <function linear at 0x1239596a8>
-  >>> tf.keras.activations.deserialize('sigmoid')
-   <function sigmoid at 0x123959510>
-  >>> tf.keras.activations.deserialize('abcd')
-  Traceback (most recent call last):
-  ...
-  ValueError: Unknown activation function:abcd
-
-  Args:
-    name: The name of the activation function.
-    custom_objects: A {name:value} dictionary for activations not build into
-      keras.
-
-  Raises:
-      ValueError: `Unknown activation function` if the input string does not
-      denote any defined Tensorflow activation function.
-  """
-  return deserialize_keras_object(
-      name,
-      module_objects=globals(),
-      custom_objects=custom_objects,
-      printable_module_name='activation function')
-
-
-def get(identifier):
-  """Returns function.
-
-  Arguments:
-      identifier: Function or string
-
-  Returns:
-      Activation function denoted by input:
-      - `Linear activation function` if input is `None`.
-      - Function corresponding to the input string or input function.
-
-  For example:
-
-  >>> tf.keras.activations.get('softmax')
-   <function softmax at 0x1222a3d90>
-  >>> tf.keras.activations.get(tf.keras.activations.softmax)
-   <function softmax at 0x1222a3d90>
-  >>> tf.keras.activations.get(None)
-   <function linear at 0x1239596a8>
-  >>> tf.keras.activations.get(abs)
-   <built-in function abs>
-  >>> tf.keras.activations.get('abcd')
-  Traceback (most recent call last):
-  ...
-  ValueError: Unknown activation function:abcd
-
-  Raises:
-      ValueError: Input is an unknown function or string, i.e., the input does
-      not denote any defined function.
-  """
-  if identifier is None:
-    return linear
-  if isinstance(identifier, six.string_types):
-    identifier = str(identifier)
-    return deserialize(identifier)
-  elif callable(identifier):
-    return identifier
-  elif isinstance(identifier, dict):
-    return deserialize_keras_object(
-        identifier, printable_module_name='activation')
-  else:
-    raise TypeError(
-        'Could not interpret activation function identifier: {}'.format(
-            repr(identifier)))
diff --git a/tensorflow/python/frozen_keras/backend.py b/tensorflow/python/frozen_keras/backend.py
deleted file mode 100644
index 1b87ba0cbc6..00000000000
--- a/tensorflow/python/frozen_keras/backend.py
+++ /dev/null
@@ -1,6094 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=protected-access
-# pylint: disable=redefined-outer-name
-# pylint: disable=redefined-builtin
-"""Keras backend API."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import itertools
-import json
-import os
-import sys
-import threading
-import weakref
-
-import numpy as np
-
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python import tf2
-from tensorflow.python.client import session as session_module
-from tensorflow.python.distribute import distribute_coordinator as dc
-from tensorflow.python.distribute import distribute_coordinator_context as dc_context
-from tensorflow.python.distribute import distribution_strategy_context
-from tensorflow.python.eager import context
-from tensorflow.python.eager import function as eager_function
-from tensorflow.python.eager import lift_to_graph
-from tensorflow.python.framework import composite_tensor
-from tensorflow.python.framework import config
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import device as tfdev
-from tensorflow.python.framework import dtypes as dtypes_module
-from tensorflow.python.framework import func_graph
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.frozen_keras import backend_config
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import control_flow_util
-from tensorflow.python.ops import ctc_ops as ctc
-from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import gradients as gradients_module
-from tensorflow.python.ops import image_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import logging_ops
-from tensorflow.python.ops import map_fn as map_fn_lib
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-import
-from tensorflow.python.ops import tensor_array_ops
-from tensorflow.python.ops import variables as variables_module
-from tensorflow.python.ops.ragged import ragged_concat_ops
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import moving_averages
-from tensorflow.python.util import nest
-from tensorflow.python.util import object_identity
-from tensorflow.python.util import tf_contextlib
-from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.deprecation import deprecated
-
-py_all = all
-py_sum = sum
-py_any = any
-
-# INTERNAL UTILS
-
-# The internal graph maintained by Keras and used by the symbolic Keras APIs
-# while executing eagerly (such as the functional API for model-building).
-_GRAPH = None
-
-# A graph which is used for constructing functions in eager mode.
-_CURRENT_SCRATCH_GRAPH = None
-
-# This is a thread local object that will hold the default internal TF session
-# used by Keras. It can be set manually via `set_session(sess)`.
-_SESSION = threading.local()
-
-
-# TODO(scottzhu): Remove all unused functions.
-# _DUMMY_EAGER_GRAPH.key is used as a key in _GRAPH_LEARNING_PHASES.
-# We keep a separate reference to it to make sure it does not get removed from
-# _GRAPH_LEARNING_PHASES.
-# _DummyEagerGraph inherits from threading.local to make its `key` attribute
-# thread local. This is needed to make set_learning_phase affect only the
-# current thread during eager execution (see b/123096885 for more details).
-class _DummyEagerGraph(threading.local):
-  """_DummyEagerGraph provides a thread local `key` attribute.
-
-  We can't use threading.local directly, i.e. without subclassing, because
-  gevent monkey patches threading.local and its version does not support
-  weak references.
-  """
-
-  class _WeakReferencableClass(object):
-    """This dummy class is needed for two reasons.
-
-    - We need something that supports weak references. Basic types like string
-    and ints don't.
-    - We need something whose hash and equality are based on object identity
-    to make sure they are treated as different keys to _GRAPH_LEARNING_PHASES.
-
-    An empty Python class satisfies both of these requirements.
-    """
-    pass
-
-  def __init__(self):
-    # Constructors for classes subclassing threading.local run once
-    # per thread accessing something in the class. Thus, each thread will
-    # get a different key.
-    super(_DummyEagerGraph, self).__init__()
-    self.key = _DummyEagerGraph._WeakReferencableClass()
-
-
-_DUMMY_EAGER_GRAPH = _DummyEagerGraph()
-
-# This boolean flag can be set to True to leave variable initialization
-# up to the user.
-# Change its value via `manual_variable_initialization(value)`.
-_MANUAL_VAR_INIT = False
-
-# This list holds the available devices.
-# It is populated when `_get_available_gpus()` is called for the first time.
-# We assume our devices don't change henceforth.
-_LOCAL_DEVICES = None
-
-# The below functions are kept accessible from backend for compatibility.
-epsilon = backend_config.epsilon
-floatx = backend_config.floatx
-image_data_format = backend_config.image_data_format
-set_epsilon = backend_config.set_epsilon
-set_floatx = backend_config.set_floatx
-set_image_data_format = backend_config.set_image_data_format
-
-
-def backend():
-  """Publicly accessible method for determining the current backend.
-
-  Only exists for API compatibility with multi-backend Keras.
-
-  Returns:
-      The string "tensorflow".
-  """
-  return 'tensorflow'
-
-
-def cast_to_floatx(x):
-  """Cast a Numpy array to the default Keras float type.
-
-  Arguments:
-      x: Numpy array or TensorFlow tensor.
-
-  Returns:
-      The same array (Numpy array if `x` was a Numpy array, or TensorFlow tensor
-      if `x` was a tensor), cast to its new type.
-
-  Example:
-
-  >>> tf.keras.backend.floatx()
-  'float32'
-  >>> arr = np.array([1.0, 2.0], dtype='float64')
-  >>> arr.dtype
-  dtype('float64')
-  >>> new_arr = cast_to_floatx(arr)
-  >>> new_arr
-  array([1.,  2.], dtype=float32)
-  >>> new_arr.dtype
-  dtype('float32')
-
-  """
-  if isinstance(x, (ops.Tensor,
-                    variables_module.Variable,
-                    sparse_tensor.SparseTensor)):
-    return math_ops.cast(x, dtype=floatx())
-  return np.asarray(x, dtype=floatx())
-
-
-# A global dictionary mapping graph objects to an index of counters used
-# for various layer/optimizer names in each graph.
-# Allows to give unique autogenerated names to layers, in a graph-specific way.
-PER_GRAPH_OBJECT_NAME_UIDS = weakref.WeakKeyDictionary()
-
-
-def get_uid(prefix=''):
-  """Associates a string prefix with an integer counter in a TensorFlow graph.
-
-  Arguments:
-    prefix: String prefix to index.
-
-  Returns:
-    Unique integer ID.
-
-  Example:
-
-  >>> get_uid('dense')
-  1
-  >>> get_uid('dense')
-  2
-
-  """
-  graph = get_graph()
-  if graph not in PER_GRAPH_OBJECT_NAME_UIDS:
-    PER_GRAPH_OBJECT_NAME_UIDS[graph] = collections.defaultdict(int)
-  layer_name_uids = PER_GRAPH_OBJECT_NAME_UIDS[graph]
-  layer_name_uids[prefix] += 1
-  return layer_name_uids[prefix]
-
-
-def reset_uids():
-  """Resets graph identifiers.
-  """
-
-  PER_GRAPH_OBJECT_NAME_UIDS.clear()
-
-
-def clear_session():
-  """Resets all state generated by Keras.
-
-  Keras manages a global state, which it uses to implement the Functional
-  model-building API and to uniquify autogenerated layer names.
-
-  If you are creating many models in a loop, this global state will consume
-  an increasing amount of memory over time, and you may want to clear it.
-  Calling `clear_session()` releases the global state: this helps avoid clutter
-  from old models and layers, especially when memory is limited.
-
-  Example 1: calling `clear_session()` when creating models in a loop
-
-  ```python
-  for _ in range(100):
-    # Without `clear_session()`, each iteration of this loop will
-    # slightly increase the size of the global state managed by Keras
-    model = tf.keras.Sequential([tf.keras.layers.Dense(10) for _ in range(10)])
-
-  for _ in range(100):
-    # With `clear_session()` called at the beginning,
-    # Keras starts with a blank state at each iteration
-    # and memory consumption is constant over time.
-    tf.keras.backend.clear_session()
-    model = tf.keras.Sequential([tf.keras.layers.Dense(10) for _ in range(10)])
-  ```
-
-  Example 2: resetting the layer name generation counter
-
-  >>> import tensorflow as tf
-  >>> layers = [tf.keras.layers.Dense(10) for _ in range(10)]
-  >>> new_layer = tf.keras.layers.Dense(10)
-  >>> print(new_layer.name)
-  dense_10
-  >>> tf.keras.backend.set_learning_phase(1)
-  >>> print(tf.keras.backend.learning_phase())
-  1
-  >>> tf.keras.backend.clear_session()
-  >>> new_layer = tf.keras.layers.Dense(10)
-  >>> print(new_layer.name)
-  dense
-  """
-  global _SESSION
-  global _GRAPH_LEARNING_PHASES  # pylint: disable=global-variable-not-assigned
-  global _GRAPH_VARIABLES  # pylint: disable=global-variable-not-assigned
-  global _GRAPH_TF_OPTIMIZERS  # pylint: disable=global-variable-not-assigned
-  global _GRAPH
-  global _FREEZABLE_VARS
-  _GRAPH = None
-  ops.reset_default_graph()
-  reset_uids()
-  _SESSION.session = None
-  graph = get_graph()
-  with graph.as_default():
-    _GRAPH_LEARNING_PHASES.clear()
-    # Create the learning phase placeholder in graph using the default factory.
-    _GRAPH_LEARNING_PHASES.setdefault(graph)
-    _GRAPH_VARIABLES.pop(graph, None)
-    _GRAPH_TF_OPTIMIZERS.pop(graph, None)
-    _FREEZABLE_VARS.pop(graph, None)
-
-
-def manual_variable_initialization(value):
-  """Sets the manual variable initialization flag.
-
-  This boolean flag determines whether
-  variables should be initialized
-  as they are instantiated (default), or if
-  the user should handle the initialization
-  (e.g. via `tf.compat.v1.initialize_all_variables()`).
-
-  Arguments:
-      value: Python boolean.
-  """
-  global _MANUAL_VAR_INIT
-  _MANUAL_VAR_INIT = value
-
-
-def learning_phase():
-  """Returns the learning phase flag.
-
-  The learning phase flag is a bool tensor (0 = test, 1 = train)
-  to be passed as input to any Keras function
-  that uses a different behavior at train time and test time.
-
-  Returns:
-      Learning phase (scalar integer tensor or Python integer).
-  """
-  graph = ops.get_default_graph()
-  if graph is _GRAPH:
-    # Don't enter an init_scope for the learning phase if eager execution
-    # is enabled but we're inside the Keras workspace graph.
-    learning_phase = symbolic_learning_phase()
-  else:
-    with ops.init_scope():
-      # We always check & set the learning phase inside the init_scope,
-      # otherwise the wrong default_graph will be used to look up the learning
-      # phase inside of functions & defuns.
-      #
-      # This is because functions & defuns (both in graph & in eager mode)
-      # will always execute non-eagerly using a function-specific default
-      # subgraph.
-      learning_phase = _GRAPH_LEARNING_PHASES[None]
-  _mark_func_graph_as_unsaveable(graph, learning_phase)
-  return learning_phase
-
-
-def global_learning_phase_is_set():
-  return _DUMMY_EAGER_GRAPH.key in _GRAPH_LEARNING_PHASES
-
-
-def _mark_func_graph_as_unsaveable(graph, learning_phase):
-  """Mark func graph as unsaveable due to use of symbolic keras learning phase.
-
-  Functions that capture the symbolic learning phase cannot be exported to
-  SavedModel. Mark the funcgraph as unsaveable, so that an error will be raised
-  if it is exported.
-
-  Args:
-    graph: Graph or FuncGraph object.
-    learning_phase: Learning phase placeholder or int defined in the graph.
-  """
-  if graph.building_function and is_placeholder(learning_phase):
-    graph.mark_as_unsaveable(
-        'The keras learning phase placeholder was used inside a function. '
-        'Exporting placeholders is not supported when saving out a SavedModel. '
-        'Please call `tf.keras.backend.set_learning_phase(0)` in the function '
-        'to set the learning phase to a constant value.')
-
-
-def symbolic_learning_phase():
-  graph = get_graph()
-  with graph.as_default():
-    return _GRAPH_LEARNING_PHASES[graph]
-
-
-def _default_learning_phase():
-  if context.executing_eagerly():
-    return 0
-  else:
-    with name_scope(''):
-      return array_ops.placeholder_with_default(
-          False, shape=(), name='keras_learning_phase')
-
-
-def set_learning_phase(value):
-  """Sets the learning phase to a fixed value.
-
-  Arguments:
-      value: Learning phase value, either 0 or 1 (integers).
-             0 = test, 1 = train
-
-  Raises:
-      ValueError: if `value` is neither `0` nor `1`.
-  """
-  global _GRAPH_LEARNING_PHASES  # pylint: disable=global-variable-not-assigned
-  if value not in {0, 1}:
-    raise ValueError('Expected learning phase to be 0 or 1.')
-  with ops.init_scope():
-    if context.executing_eagerly():
-      # In an eager context, the learning phase values applies to both the eager
-      # context and the internal Keras graph.
-      _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] = value
-    _GRAPH_LEARNING_PHASES[get_graph()] = value
-
-
-@tf_contextlib.contextmanager
-def learning_phase_scope(value):
-  """Provides a scope within which the learning phase is equal to `value`.
-
-  The learning phase gets restored to its original value upon exiting the scope.
-
-  Arguments:
-     value: Learning phase value, either 0 or 1 (integers).
-            0 = test, 1 = train
-
-  Yields:
-    None.
-
-  Raises:
-     ValueError: if `value` is neither `0` nor `1`.
-  """
-  global _GRAPH_LEARNING_PHASES  # pylint: disable=global-variable-not-assigned
-  if value not in {0, 1}:
-    raise ValueError('Expected learning phase to be 0 or 1.')
-
-  with ops.init_scope():
-    if context.executing_eagerly():
-      previous_eager_value = _GRAPH_LEARNING_PHASES.get(
-          _DUMMY_EAGER_GRAPH.key, None)
-    previous_graph_value = _GRAPH_LEARNING_PHASES.get(get_graph(), None)
-
-  try:
-    set_learning_phase(value)
-    yield
-  finally:
-    # Restore learning phase to initial value.
-    with ops.init_scope():
-      if context.executing_eagerly():
-        if previous_eager_value is not None:
-          _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] = previous_eager_value
-        elif _DUMMY_EAGER_GRAPH.key in _GRAPH_LEARNING_PHASES:
-          del _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key]
-
-      graph = get_graph()
-      if previous_graph_value is not None:
-        _GRAPH_LEARNING_PHASES[graph] = previous_graph_value
-      elif graph in _GRAPH_LEARNING_PHASES:
-        del _GRAPH_LEARNING_PHASES[graph]
-
-
-@tf_contextlib.contextmanager
-def eager_learning_phase_scope(value):
-  """Internal scope that sets the learning phase in eager / tf.function only.
-
-  Arguments:
-      value: Learning phase value, either 0 or 1 (integers).
-             0 = test, 1 = train
-
-  Yields:
-    None.
-
-  Raises:
-     ValueError: if `value` is neither `0` nor `1`.
-  """
-  global _GRAPH_LEARNING_PHASES  # pylint: disable=global-variable-not-assigned
-  assert value in {0, 1}
-  assert ops.executing_eagerly_outside_functions()
-  global_learning_phase_was_set = global_learning_phase_is_set()
-  if global_learning_phase_was_set:
-    previous_value = learning_phase()
-  try:
-    _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] = value
-    yield
-  finally:
-    # Restore learning phase to initial value or unset.
-    if global_learning_phase_was_set:
-      _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key] = previous_value
-    else:
-      del _GRAPH_LEARNING_PHASES[_DUMMY_EAGER_GRAPH.key]
-
-
-def _current_graph(op_input_list):
-  """Return the graph members of `op_input_list`, or the current graph."""
-  return ops._get_graph_from_inputs(op_input_list)
-
-
-def _get_session(op_input_list=()):
-  """Returns the session object for the current thread."""
-  global _SESSION
-  default_session = ops.get_default_session()
-  if default_session is not None:
-    session = default_session
-  else:
-    if ops.inside_function():
-      raise RuntimeError('Cannot get session inside Tensorflow graph function.')
-    # If we don't have a session, or that session does not match the current
-    # graph, create and cache a new session.
-    if (getattr(_SESSION, 'session', None) is None or
-        _SESSION.session.graph is not _current_graph(op_input_list)):
-      # If we are creating the Session inside a tf.distribute.Strategy scope,
-      # we ask the strategy for the right session options to use.
-      if distribution_strategy_context.has_strategy():
-        configure_and_create_distributed_session(
-            distribution_strategy_context.get_strategy())
-      else:
-        _SESSION.session = session_module.Session(
-            config=get_default_session_config())
-    session = _SESSION.session
-  return session
-
-
-def get_session(op_input_list=()):
-  """Returns the TF session to be used by the backend.
-
-  If a default TensorFlow session is available, we will return it.
-
-  Else, we will return the global Keras session assuming it matches
-  the current graph.
-
-  If no global Keras session exists at this point:
-  we will create a new global session.
-
-  Note that you can manually set the global session
-  via `K.set_session(sess)`.
-
-  Arguments:
-      op_input_list: An option sequence of tensors or ops, which will be used
-        to determine the current graph. Otherwise the default graph will be
-        used.
-
-  Returns:
-      A TensorFlow session.
-  """
-  session = _get_session(op_input_list)
-  if not _MANUAL_VAR_INIT:
-    with session.graph.as_default():
-      _initialize_variables(session)
-  return session
-
-
-def get_graph():
-  if context.executing_eagerly():
-    global _GRAPH
-    if _GRAPH is None:
-      _GRAPH = func_graph.FuncGraph('keras_graph')
-    return _GRAPH
-  else:
-    return ops.get_default_graph()
-
-
-@tf_contextlib.contextmanager
-def _scratch_graph(graph=None):
-  """Retrieve a shared and temporary func graph.
-
-  The eager execution path lifts a subgraph from the keras global graph into
-  a scratch graph in order to create a function. DistributionStrategies, in
-  turn, constructs multiple functions as well as a final combined function. In
-  order for that logic to work correctly, all of the functions need to be
-  created on the same scratch FuncGraph.
-
-  Args:
-    graph: A graph to be used as the current scratch graph. If not set then
-      a scratch graph will either be retrieved or created:
-
-  Yields:
-    The current scratch graph.
-  """
-  global _CURRENT_SCRATCH_GRAPH
-  if (_CURRENT_SCRATCH_GRAPH is not None and graph is not None and
-      _CURRENT_SCRATCH_GRAPH is not graph):
-    raise ValueError('Multiple scratch graphs specified.')
-
-  if _CURRENT_SCRATCH_GRAPH:
-    yield _CURRENT_SCRATCH_GRAPH
-    return
-
-  graph = graph or func_graph.FuncGraph('keras_scratch_graph')
-  try:
-    _CURRENT_SCRATCH_GRAPH = graph
-    yield graph
-  finally:
-    _CURRENT_SCRATCH_GRAPH = None
-
-
-def set_session(session):
-  """Sets the global TensorFlow session.
-
-  Arguments:
-      session: A TF Session.
-  """
-  global _SESSION
-  _SESSION.session = session
-
-
-def get_default_session_config():
-  if os.environ.get('OMP_NUM_THREADS'):
-    logging.warning(
-        'OMP_NUM_THREADS is no longer used by the default Keras config. '
-        'To configure the number of threads, use tf.config.threading APIs.')
-
-  config = context.context().config
-  config.allow_soft_placement = True
-
-  return config
-
-
-def get_default_graph_uid_map():
-  graph = ops.get_default_graph()
-  name_uid_map = PER_GRAPH_OBJECT_NAME_UIDS.get(graph, None)
-  if name_uid_map is None:
-    name_uid_map = collections.defaultdict(int)
-    PER_GRAPH_OBJECT_NAME_UIDS[graph] = name_uid_map
-  return name_uid_map
-
-
-# DEVICE MANIPULATION
-
-
-class _TfDeviceCaptureOp(object):
-  """Class for capturing the TF device scope."""
-
-  def __init__(self):
-    self.device = None
-
-  def _set_device(self, device):
-    """This method captures TF's explicit device scope setting."""
-    if tfdev.is_device_spec(device):
-      device = device.to_string()
-    self.device = device
-
-  def _set_device_from_string(self, device_str):
-    self.device = device_str
-
-
-def _get_current_tf_device():
-  """Return explicit device of current context, otherwise returns `None`.
-
-  Returns:
-      If the current device scope is explicitly set, it returns a string with
-      the device (`CPU` or `GPU`). If the scope is not explicitly set, it will
-      return `None`.
-  """
-  graph = get_graph()
-  op = _TfDeviceCaptureOp()
-  graph._apply_device_functions(op)
-  return tfdev.DeviceSpec.from_string(op.device)
-
-
-def _is_current_explicit_device(device_type):
-  """Check if the current device is explicitly set on the device type specified.
-
-  Arguments:
-      device_type: A string containing `GPU` or `CPU` (case-insensitive).
-
-  Returns:
-      A boolean indicating if the current device scope is explicitly set on the
-      device type.
-
-  Raises:
-      ValueError: If the `device_type` string indicates an unsupported device.
-  """
-  device_type = device_type.upper()
-  if device_type not in ['CPU', 'GPU']:
-    raise ValueError('`device_type` should be either "CPU" or "GPU".')
-  device = _get_current_tf_device()
-  return device is not None and device.device_type == device_type.upper()
-
-
-def _get_available_gpus():
-  """Get a list of available gpu devices (formatted as strings).
-
-  Returns:
-      A list of available GPU devices.
-  """
-  if ops.executing_eagerly_outside_functions():
-    # Returns names of devices directly.
-    return [d.name for d in config.list_logical_devices('GPU')]
-
-  global _LOCAL_DEVICES
-  if _LOCAL_DEVICES is None:
-    _LOCAL_DEVICES = get_session().list_devices()
-  return [x.name for x in _LOCAL_DEVICES if x.device_type == 'GPU']
-
-
-def _has_nchw_support():
-  """Check whether the current scope supports NCHW ops.
-
-  TensorFlow does not support NCHW on CPU. Therefore we check if we are not
-  explicitly put on
-  CPU, and have GPUs available. In this case there will be soft-placing on the
-  GPU device.
-
-  Returns:
-      bool: if the current scope device placement would support nchw
-  """
-  explicitly_on_cpu = _is_current_explicit_device('CPU')
-  gpus_available = bool(_get_available_gpus())
-  return not explicitly_on_cpu and gpus_available
-
-
-# VARIABLE MANIPULATION
-
-
-def _constant_to_tensor(x, dtype):
-  """Convert the input `x` to a tensor of type `dtype`.
-
-  This is slightly faster than the _to_tensor function, at the cost of
-  handling fewer cases.
-
-  Arguments:
-      x: An object to be converted (numpy arrays, floats, ints and lists of
-        them).
-      dtype: The destination type.
-
-  Returns:
-      A tensor.
-  """
-  return constant_op.constant(x, dtype=dtype)
-
-
-def _to_tensor(x, dtype):
-  """Convert the input `x` to a tensor of type `dtype`.
-
-  Arguments:
-      x: An object to be converted (numpy array, list, tensors).
-      dtype: The destination type.
-
-  Returns:
-      A tensor.
-  """
-  return ops.convert_to_tensor_v2(x, dtype=dtype)
-
-
-def is_sparse(tensor):
-  """Returns whether a tensor is a sparse tensor.
-
-  Arguments:
-      tensor: A tensor instance.
-
-  Returns:
-      A boolean.
-
-  Example:
-
-
-  >>> a = tf.keras.backend.placeholder((2, 2), sparse=False)
-  >>> print(tf.keras.backend.is_sparse(a))
-  False
-  >>> b = tf.keras.backend.placeholder((2, 2), sparse=True)
-  >>> print(tf.keras.backend.is_sparse(b))
-  True
-
-  """
-  return isinstance(tensor, sparse_tensor.SparseTensor)
-
-
-def to_dense(tensor):
-  """Converts a sparse tensor into a dense tensor and returns it.
-
-  Arguments:
-      tensor: A tensor instance (potentially sparse).
-
-  Returns:
-      A dense tensor.
-
-  Examples:
-
-
-  >>> b = tf.keras.backend.placeholder((2, 2), sparse=True)
-  >>> print(tf.keras.backend.is_sparse(b))
-  True
-  >>> c = tf.keras.backend.to_dense(b)
-  >>> print(tf.keras.backend.is_sparse(c))
-  False
-
-  """
-  if is_sparse(tensor):
-    return sparse_ops.sparse_tensor_to_dense(tensor)
-  else:
-    return tensor
-
-
-def name_scope(name):
-  """A context manager for use when defining a Python op.
-
-  This context manager pushes a name scope, which will make the name of all
-  operations added within it have a prefix.
-
-  For example, to define a new Python op called `my_op`:
-
-
-  def my_op(a):
-    with tf.name_scope("MyOp") as scope:
-      a = tf.convert_to_tensor(a, name="a")
-      # Define some computation that uses `a`.
-      return foo_op(..., name=scope)
-
-
-  When executed, the Tensor `a` will have the name `MyOp/a`.
-
-  Args:
-    name: The prefix to use on all names created within the name scope.
-
-  Returns:
-    Name scope context manager.
-  """
-  return ops.name_scope_v2(name)
-
-
-def variable(value, dtype=None, name=None, constraint=None):
-  """Instantiates a variable and returns it.
-
-  Arguments:
-      value: Numpy array, initial value of the tensor.
-      dtype: Tensor type.
-      name: Optional name string for the tensor.
-      constraint: Optional projection function to be
-          applied to the variable after an optimizer update.
-
-  Returns:
-      A variable instance (with Keras metadata included).
-
-  Examples:
-
-  >>> val = np.array([[1, 2], [3, 4]])
-  >>> kvar = tf.keras.backend.variable(value=val, dtype='float64',
-  ...                                  name='example_var')
-  >>> tf.keras.backend.dtype(kvar)
-  'float64'
-  >>> print(kvar)
-  <tf.Variable 'example_var:...' shape=(2, 2) dtype=float64, numpy=
-    array([[1., 2.],
-           [3., 4.]])>
-
-  """
-  if dtype is None:
-    dtype = floatx()
-  if hasattr(value, 'tocoo'):
-    sparse_coo = value.tocoo()
-    indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), np.expand_dims(
-        sparse_coo.col, 1)), 1)
-    v = sparse_tensor.SparseTensor(
-        indices=indices, values=sparse_coo.data, dense_shape=sparse_coo.shape)
-    v._keras_shape = sparse_coo.shape
-    return v
-  v = variables_module.Variable(
-      value,
-      dtype=dtypes_module.as_dtype(dtype),
-      name=name,
-      constraint=constraint)
-  if isinstance(value, np.ndarray):
-    v._keras_shape = value.shape
-  elif hasattr(value, 'shape'):
-    v._keras_shape = int_shape(value)
-  track_variable(v)
-  return v
-
-
-def track_tf_optimizer(tf_optimizer):
-  """Tracks the given TF optimizer for initialization of its variables."""
-  if context.executing_eagerly():
-    return
-  optimizers = _GRAPH_TF_OPTIMIZERS[None]
-  optimizers.add(tf_optimizer)
-
-
-def track_variable(v):
-  """Tracks the given variable for initialization."""
-  if context.executing_eagerly():
-    return
-  graph = v.graph if hasattr(v, 'graph') else get_graph()
-  _GRAPH_VARIABLES[graph].add(v)
-
-
-def unique_object_name(name,
-                       name_uid_map=None,
-                       avoid_names=None,
-                       namespace='',
-                       zero_based=False):
-  """Makes a object name (or arbitrary string) unique within a TensorFlow graph.
-
-  Arguments:
-    name: String name to make unique.
-    name_uid_map: An optional defaultdict(int) to use when creating unique
-      names. If None (default), uses a per-Graph dictionary.
-    avoid_names: An optional set or dict with names which should not be used. If
-      None (default) does not avoid any names.
-    namespace: Gets a name which is unique within the (graph, namespace). Layers
-      which are not Networks use a blank namespace and so get graph-global
-      names.
-    zero_based: If True, name sequences start with no suffix (e.g. "dense",
-      "dense_1"). If False, naming is one-based ("dense_1", "dense_2").
-
-  Returns:
-    Unique string name.
-
-  Example:
-
-
-  _unique_layer_name('dense')  # dense_1
-  _unique_layer_name('dense')  # dense_2
-
-  """
-  if name_uid_map is None:
-    name_uid_map = get_default_graph_uid_map()
-  if avoid_names is None:
-    avoid_names = set()
-  proposed_name = None
-  while proposed_name is None or proposed_name in avoid_names:
-    name_key = (namespace, name)
-    if zero_based:
-      number = name_uid_map[name_key]
-      if number:
-        proposed_name = name + '_' + str(number)
-      else:
-        proposed_name = name
-      name_uid_map[name_key] += 1
-    else:
-      name_uid_map[name_key] += 1
-      proposed_name = name + '_' + str(name_uid_map[name_key])
-  return proposed_name
-
-
-def _get_variables(graph=None):
-  """Returns variables corresponding to the given graph for initialization."""
-  assert not context.executing_eagerly()
-  variables = _GRAPH_VARIABLES[graph]
-  for opt in _GRAPH_TF_OPTIMIZERS[graph]:
-    variables.update(opt.optimizer.variables())
-  return variables
-
-
-def _initialize_variables(session):
-  """Utility to initialize uninitialized variables on the fly."""
-  variables = _get_variables(get_graph())
-  candidate_vars = []
-  for v in variables:
-    if not getattr(v, '_keras_initialized', False):
-      candidate_vars.append(v)
-  if candidate_vars:
-    # This step is expensive, so we only run it on variables not already
-    # marked as initialized.
-    is_initialized = session.run(
-        [variables_module.is_variable_initialized(v) for v in candidate_vars])
-    # TODO(kathywu): Some metric variables loaded from SavedModel are never
-    # actually used, and do not have an initializer.
-    should_be_initialized = [
-        (not is_initialized[n]) and v.initializer is not None
-        for n, v in enumerate(candidate_vars)]
-    uninitialized_vars = []
-    for flag, v in zip(should_be_initialized, candidate_vars):
-      if flag:
-        uninitialized_vars.append(v)
-      v._keras_initialized = True
-    if uninitialized_vars:
-      session.run(variables_module.variables_initializer(uninitialized_vars))
-
-
-def constant(value, dtype=None, shape=None, name=None):
-  """Creates a constant tensor.
-
-  Arguments:
-      value: A constant value (or list)
-      dtype: The type of the elements of the resulting tensor.
-      shape: Optional dimensions of resulting tensor.
-      name: Optional name for the tensor.
-
-  Returns:
-      A Constant Tensor.
-  """
-  if dtype is None:
-    dtype = floatx()
-
-  return constant_op.constant(value, dtype=dtype, shape=shape, name=name)
-
-
-def is_keras_tensor(x):
-  """Returns whether `x` is a Keras tensor.
-
-  A "Keras tensor" is a tensor that was returned by a Keras layer,
-  (`Layer` class) or by `Input`.
-
-  Arguments:
-      x: A candidate tensor.
-
-  Returns:
-      A boolean: Whether the argument is a Keras tensor.
-
-  Raises:
-      ValueError: In case `x` is not a symbolic tensor.
-
-  Examples:
-
-  >>> np_var = np.array([1, 2])
-  >>> # A numpy array is not a symbolic tensor.
-  >>> tf.keras.backend.is_keras_tensor(np_var)
-  Traceback (most recent call last):
-  ...
-  ValueError: Unexpectedly found an instance of type `<class 'numpy.ndarray'>`.
-  Expected a symbolic tensor instance.
-  >>> keras_var = tf.keras.backend.variable(np_var)
-  >>> # A variable created with the keras backend is not a Keras tensor.
-  >>> tf.keras.backend.is_keras_tensor(keras_var)
-  False
-  >>> keras_placeholder = tf.keras.backend.placeholder(shape=(2, 4, 5))
-  >>> # A placeholder is not a Keras tensor.
-  >>> tf.keras.backend.is_keras_tensor(keras_placeholder)
-  False
-  >>> keras_input = tf.keras.layers.Input([10])
-  >>> # An Input is a Keras tensor.
-  >>> tf.keras.backend.is_keras_tensor(keras_input)
-  True
-  >>> keras_layer_output = tf.keras.layers.Dense(10)(keras_input)
-  >>> # Any Keras layer output is a Keras tensor.
-  >>> tf.keras.backend.is_keras_tensor(keras_layer_output)
-  True
-
-  """
-  if not isinstance(x,
-                    (ops.Tensor, variables_module.Variable,
-                     sparse_tensor.SparseTensor, ragged_tensor.RaggedTensor)):
-    raise ValueError('Unexpectedly found an instance of type `' + str(type(x)) +
-                     '`. Expected a symbolic tensor instance.')
-  return hasattr(x, '_keras_history')
-
-
-def placeholder(shape=None,
-                ndim=None,
-                dtype=None,
-                sparse=False,
-                name=None,
-                ragged=False):
-  """Instantiates a placeholder tensor and returns it.
-
-  Arguments:
-      shape: Shape of the placeholder
-          (integer tuple, may include `None` entries).
-      ndim: Number of axes of the tensor.
-          At least one of {`shape`, `ndim`} must be specified.
-          If both are specified, `shape` is used.
-      dtype: Placeholder type.
-      sparse: Boolean, whether the placeholder should have a sparse type.
-      name: Optional name string for the placeholder.
-      ragged: Boolean, whether the placeholder should have a ragged type.
-          In this case, values of 'None' in the 'shape' argument represent
-          ragged dimensions. For more information about RaggedTensors, see this
-          [guide](https://www.tensorflow.org/guide/ragged_tensors).
-
-  Raises:
-      ValueError: If called with eager execution
-      ValueError: If called with sparse = True and ragged = True.
-
-  Returns:
-      Tensor instance (with Keras metadata included).
-
-  Examples:
-
-
-  >>> input_ph = tf.keras.backend.placeholder(shape=(2, 4, 5))
-  >>> input_ph
-  <tf.Tensor 'Placeholder_...' shape=(2, 4, 5) dtype=float32>
-
-  """
-  if sparse and ragged:
-    raise ValueError(
-        'Cannot set both sparse and ragged to True when creating a placeholder.'
-    )
-
-  if dtype is None:
-    dtype = floatx()
-  if not shape:
-    if ndim:
-      shape = (None,) * ndim
-  with get_graph().as_default():
-    if sparse:
-      x = array_ops.sparse_placeholder(dtype, shape=shape, name=name)
-    elif ragged:
-      ragged_rank = 0
-      for i in range(1, len(shape)):
-        if shape[i] is None:
-          ragged_rank = i
-      type_spec = ragged_tensor.RaggedTensorSpec(
-          shape=shape, dtype=dtype, ragged_rank=ragged_rank)
-      def tensor_spec_to_placeholder(tensorspec):
-        return array_ops.placeholder(tensorspec.dtype, tensorspec.shape)
-      x = nest.map_structure(tensor_spec_to_placeholder, type_spec,
-                             expand_composites=True)
-    else:
-      x = array_ops.placeholder(dtype, shape=shape, name=name)
-  return x
-
-
-def is_placeholder(x):
-  """Returns whether `x` is a placeholder.
-
-  Arguments:
-      x: A candidate placeholder.
-
-  Returns:
-      Boolean.
-  """
-  try:
-    if isinstance(x, composite_tensor.CompositeTensor):
-      flat_components = nest.flatten(x, expand_composites=True)
-      return py_any(is_placeholder(c) for c in flat_components)
-    else:
-      return x.op.type == 'Placeholder'
-  except AttributeError:
-    return False
-
-
-def freezable_variable(value, shape=None, name=None):
-  """A tensor-like object whose value can be updated only up until execution.
-
-  After creating the freezable variable, you can update its value by calling
-  `var.update_value(new_value)` (similar to a regular variable).
-  Unlike an actual variable, the value used during execution is the current
-  value at the time the execution function (`backend.function()`) was created.
-
-  This is an internal API, expected to be temporary. It is used to implement a
-  mutable `trainable` property for `BatchNormalization` layers, with a frozen
-  value after model compilation.
-
-  We don't use a plain variable in this case because we need the value used
-  in a specific model to be frozen after `compile` has been called
-  (e.g. GAN use case).
-
-  Arguments:
-    value: The initial value for the tensor-like object.
-    shape: The shape for the tensor-like object (cannot be changed).
-    name: The name for the tensor-like object.
-
-  Returns:
-    A tensor-like object with a static value that can be updated via
-    `x.update_value(new_value)`, up until creating an execution function
-    (afterwards the value is fixed).
-  """
-  graph = get_graph()
-  with graph.as_default():
-    x = array_ops.placeholder_with_default(
-        value, shape=shape, name=name)
-    x._initial_value = value
-    x._current_value = value
-
-    def update_value(new_value):
-      x._current_value = new_value
-
-    def get_value():
-      return x._current_value
-
-    x.update_value = update_value
-    x.get_value = get_value
-
-    global _FREEZABLE_VARS
-    _FREEZABLE_VARS[graph].add(x)
-  return x
-
-
-def shape(x):
-  """Returns the symbolic shape of a tensor or variable.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A symbolic shape (which is itself a tensor).
-
-  Examples:
-
-  >>> val = np.array([[1, 2], [3, 4]])
-  >>> kvar = tf.keras.backend.variable(value=val)
-  >>> tf.keras.backend.shape(kvar)
-  <tf.Tensor: shape=(2,), dtype=int32, numpy=array([2, 2], dtype=int32)>
-  >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5))
-  >>> tf.keras.backend.shape(input)
-  <tf.Tensor 'Shape_...' shape=(3,) dtype=int32>
-
-  """
-  return array_ops.shape(x)
-
-
-def int_shape(x):
-  """Returns the shape of tensor or variable as a tuple of int or None entries.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tuple of integers (or None entries).
-
-  Examples:
-
-  >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5))
-  >>> tf.keras.backend.int_shape(input)
-  (2, 4, 5)
-  >>> val = np.array([[1, 2], [3, 4]])
-  >>> kvar = tf.keras.backend.variable(value=val)
-  >>> tf.keras.backend.int_shape(kvar)
-  (2, 2)
-
-  """
-  try:
-    shape = x.shape
-    if not isinstance(shape, tuple):
-      shape = tuple(shape.as_list())
-    return shape
-  except ValueError:
-    return None
-
-
-def ndim(x):
-  """Returns the number of axes in a tensor, as an integer.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      Integer (scalar), number of axes.
-
-  Examples:
-
-
-  >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5))
-  >>> val = np.array([[1, 2], [3, 4]])
-  >>> kvar = tf.keras.backend.variable(value=val)
-  >>> tf.keras.backend.ndim(input)
-  3
-  >>> tf.keras.backend.ndim(kvar)
-  2
-
-  """
-  dims = x.shape._dims
-  if dims is not None:
-    return len(dims)
-  return None
-
-
-def dtype(x):
-  """Returns the dtype of a Keras tensor or variable, as a string.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      String, dtype of `x`.
-
-  Examples:
-
-  >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5)))
-  'float32'
-  >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5),
-  ...                                                     dtype='float32'))
-  'float32'
-  >>> tf.keras.backend.dtype(tf.keras.backend.placeholder(shape=(2,4,5),
-  ...                                                     dtype='float64'))
-  'float64'
-  >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]))
-  >>> tf.keras.backend.dtype(kvar)
-  'float32'
-  >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]),
-  ...                                  dtype='float32')
-  >>> tf.keras.backend.dtype(kvar)
-  'float32'
-
-  """
-  return x.dtype.base_dtype.name
-
-
-def eval(x):
-  """Evaluates the value of a variable.
-
-  Arguments:
-      x: A variable.
-
-  Returns:
-      A Numpy array.
-
-  Examples:
-
-  >>> kvar = tf.keras.backend.variable(np.array([[1, 2], [3, 4]]),
-  ...                                  dtype='float32')
-  >>> tf.keras.backend.eval(kvar)
-  array([[1.,  2.],
-         [3.,  4.]], dtype=float32)
-
-  """
-  return get_value(to_dense(x))
-
-
-def zeros(shape, dtype=None, name=None):
-  """Instantiates an all-zeros variable and returns it.
-
-  Arguments:
-      shape: Tuple or list of integers, shape of returned Keras variable
-      dtype: data type of returned Keras variable
-      name: name of returned Keras variable
-
-  Returns:
-      A variable (including Keras metadata), filled with `0.0`.
-      Note that if `shape` was symbolic, we cannot return a variable,
-      and will return a dynamically-shaped tensor instead.
-
-  Example:
-
-  >>> kvar = tf.keras.backend.zeros((3,4))
-  >>> tf.keras.backend.eval(kvar)
-  array([[0.,  0.,  0.,  0.],
-         [0.,  0.,  0.,  0.],
-         [0.,  0.,  0.,  0.]], dtype=float32)
-  >>> A = tf.constant([1,2,3])
-  >>> kvar2 = tf.keras.backend.zeros(A.shape) # [0., 0., 0.]
-  >>> tf.keras.backend.eval(kvar2)
-  array([0., 0., 0.], dtype=float32)
-  >>> kvar3 = tf.keras.backend.zeros(A.shape,dtype=tf.int32)
-  >>> tf.keras.backend.eval(kvar3)
-  array([0, 0, 0], dtype=int32)
-  >>> kvar4 = tf.keras.backend.zeros([2,3])
-  >>> tf.keras.backend.eval(kvar4)
-  array([[0., 0., 0.],
-         [0., 0., 0.]], dtype=float32)
-
-  """
-  with ops.init_scope():
-    if dtype is None:
-      dtype = floatx()
-    tf_dtype = dtypes_module.as_dtype(dtype)
-    v = array_ops.zeros(shape=shape, dtype=tf_dtype, name=name)
-    if py_all(v.shape.as_list()):
-      return variable(v, dtype=dtype, name=name)
-    return v
-
-
-def ones(shape, dtype=None, name=None):
-  """Instantiates an all-ones variable and returns it.
-
-  Arguments:
-      shape: Tuple of integers, shape of returned Keras variable.
-      dtype: String, data type of returned Keras variable.
-      name: String, name of returned Keras variable.
-
-  Returns:
-      A Keras variable, filled with `1.0`.
-      Note that if `shape` was symbolic, we cannot return a variable,
-      and will return a dynamically-shaped tensor instead.
-
-  Example:
-
-
-  >>> kvar = tf.keras.backend.ones((3,4))
-  >>> tf.keras.backend.eval(kvar)
-  array([[1.,  1.,  1.,  1.],
-         [1.,  1.,  1.,  1.],
-         [1.,  1.,  1.,  1.]], dtype=float32)
-
-  """
-  with ops.init_scope():
-    if dtype is None:
-      dtype = floatx()
-    tf_dtype = dtypes_module.as_dtype(dtype)
-    v = array_ops.ones(shape=shape, dtype=tf_dtype, name=name)
-    if py_all(v.shape.as_list()):
-      return variable(v, dtype=dtype, name=name)
-    return v
-
-
-def eye(size, dtype=None, name=None):
-  """Instantiate an identity matrix and returns it.
-
-  Arguments:
-      size: Integer, number of rows/columns.
-      dtype: String, data type of returned Keras variable.
-      name: String, name of returned Keras variable.
-
-  Returns:
-      A Keras variable, an identity matrix.
-
-  Example:
-
-
-  >>> kvar = tf.keras.backend.eye(3)
-  >>> tf.keras.backend.eval(kvar)
-  array([[1.,  0.,  0.],
-         [0.,  1.,  0.],
-         [0.,  0.,  1.]], dtype=float32)
-
-
-  """
-  if dtype is None:
-    dtype = floatx()
-  tf_dtype = dtypes_module.as_dtype(dtype)
-  return variable(linalg_ops.eye(size, dtype=tf_dtype), dtype, name)
-
-
-def zeros_like(x, dtype=None, name=None):
-  """Instantiates an all-zeros variable of the same shape as another tensor.
-
-  Arguments:
-      x: Keras variable or Keras tensor.
-      dtype: dtype of returned Keras variable.
-             `None` uses the dtype of `x`.
-      name: name for the variable to create.
-
-  Returns:
-      A Keras variable with the shape of `x` filled with zeros.
-
-  Example:
-
-
-  from tensorflow.keras import backend as K
-  kvar = K.variable(np.random.random((2,3)))
-  kvar_zeros = K.zeros_like(kvar)
-  K.eval(kvar_zeros)
-  # array([[ 0.,  0.,  0.], [ 0.,  0.,  0.]], dtype=float32)
-
-
-  """
-  return array_ops.zeros_like(x, dtype=dtype, name=name)
-
-
-def ones_like(x, dtype=None, name=None):
-  """Instantiates an all-ones variable of the same shape as another tensor.
-
-  Arguments:
-      x: Keras variable or tensor.
-      dtype: String, dtype of returned Keras variable.
-           None uses the dtype of x.
-      name: String, name for the variable to create.
-
-  Returns:
-      A Keras variable with the shape of x filled with ones.
-
-  Example:
-
-  >>> kvar = tf.keras.backend.variable(np.random.random((2,3)))
-  >>> kvar_ones = tf.keras.backend.ones_like(kvar)
-  >>> tf.keras.backend.eval(kvar_ones)
-  array([[1.,  1.,  1.],
-         [1.,  1.,  1.]], dtype=float32)
-
-  """
-  return array_ops.ones_like(x, dtype=dtype, name=name)
-
-
-def identity(x, name=None):
-  """Returns a tensor with the same content as the input tensor.
-
-  Arguments:
-      x: The input tensor.
-      name: String, name for the variable to create.
-
-  Returns:
-      A tensor of the same shape, type and content.
-  """
-  return array_ops.identity(x, name=name)
-
-
-def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None):
-  """Instantiates a variable with values drawn from a uniform distribution.
-
-  Arguments:
-      shape: Tuple of integers, shape of returned Keras variable.
-      low: Float, lower boundary of the output interval.
-      high: Float, upper boundary of the output interval.
-      dtype: String, dtype of returned Keras variable.
-      name: String, name of returned Keras variable.
-      seed: Integer, random seed.
-
-  Returns:
-      A Keras variable, filled with drawn samples.
-
-  Example:
-
-  >>> kvar = tf.keras.backend.random_uniform_variable((2,3), 0, 1)
-  >>> kvar
-  <tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=...,
-  dtype=float32)>
-  """
-  if dtype is None:
-    dtype = floatx()
-  tf_dtype = dtypes_module.as_dtype(dtype)
-  if seed is None:
-    # ensure that randomness is conditioned by the Numpy RNG
-    seed = np.random.randint(10e8)
-  value = init_ops.random_uniform_initializer(
-      low, high, dtype=tf_dtype, seed=seed)(shape)
-  return variable(value, dtype=dtype, name=name)
-
-
-def random_normal_variable(shape, mean, scale, dtype=None, name=None,
-                           seed=None):
-  """Instantiates a variable with values drawn from a normal distribution.
-
-  Arguments:
-      shape: Tuple of integers, shape of returned Keras variable.
-      mean: Float, mean of the normal distribution.
-      scale: Float, standard deviation of the normal distribution.
-      dtype: String, dtype of returned Keras variable.
-      name: String, name of returned Keras variable.
-      seed: Integer, random seed.
-
-  Returns:
-      A Keras variable, filled with drawn samples.
-
-  Example:
-
-  >>> kvar = tf.keras.backend.random_normal_variable((2,3), 0, 1)
-  >>> kvar
-  <tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=...,
-  dtype=float32)>
-  """
-  if dtype is None:
-    dtype = floatx()
-  tf_dtype = dtypes_module.as_dtype(dtype)
-  if seed is None:
-    # ensure that randomness is conditioned by the Numpy RNG
-    seed = np.random.randint(10e8)
-  value = init_ops.random_normal_initializer(
-      mean, scale, dtype=tf_dtype, seed=seed)(shape)
-  return variable(value, dtype=dtype, name=name)
-
-
-def count_params(x):
-  """Returns the static number of elements in a variable or tensor.
-
-  Arguments:
-      x: Variable or tensor.
-
-  Returns:
-      Integer, the number of scalars in `x`.
-
-  Example:
-
-  >>> kvar = tf.keras.backend.zeros((2,3))
-  >>> tf.keras.backend.count_params(kvar)
-  6
-  >>> tf.keras.backend.eval(kvar)
-  array([[0.,  0.,  0.],
-         [0.,  0.,  0.]], dtype=float32)
-
-  """
-  return np.prod(x.shape.as_list())
-
-
-def cast(x, dtype):
-  """Casts a tensor to a different dtype and returns it.
-
-  You can cast a Keras variable but it still returns a Keras tensor.
-
-  Arguments:
-      x: Keras tensor (or variable).
-      dtype: String, either (`'float16'`, `'float32'`, or `'float64'`).
-
-  Returns:
-      Keras tensor with dtype `dtype`.
-
-  Examples:
-      Cast a float32 variable to a float64 tensor
-
-  >>> input = tf.keras.backend.ones(shape=(1,3))
-  >>> print(input)
-  <tf.Variable 'Variable:0' shape=(1, 3) dtype=float32,
-  numpy=array([[1., 1., 1.]], dtype=float32)>
-  >>> cast_input = tf.keras.backend.cast(input, dtype='float64')
-  >>> print(cast_input)
-  tf.Tensor([[1. 1. 1.]], shape=(1, 3), dtype=float64)
-
-  """
-  return math_ops.cast(x, dtype)
-
-
-# UPDATES OPS
-
-
-def update(x, new_x):
-  return state_ops.assign(x, new_x)
-
-
-def update_add(x, increment):
-  """Update the value of `x` by adding `increment`.
-
-  Arguments:
-      x: A Variable.
-      increment: A tensor of same shape as `x`.
-
-  Returns:
-      The variable `x` updated.
-  """
-  return state_ops.assign_add(x, increment)
-
-
-def update_sub(x, decrement):
-  """Update the value of `x` by subtracting `decrement`.
-
-  Arguments:
-      x: A Variable.
-      decrement: A tensor of same shape as `x`.
-
-  Returns:
-      The variable `x` updated.
-  """
-  return state_ops.assign_sub(x, decrement)
-
-
-def moving_average_update(x, value, momentum):
-  """Compute the moving average of a variable.
-
-  Arguments:
-      x: A Variable.
-      value: A tensor with the same shape as `variable`.
-      momentum: The moving average momentum.
-
-  Returns:
-      An Operation to update the variable.
-  """
-  zero_debias = not tf2.enabled()
-  return moving_averages.assign_moving_average(
-      x, value, momentum, zero_debias=zero_debias)
-
-
-# LINEAR ALGEBRA
-
-
-def dot(x, y):
-  """Multiplies 2 tensors (and/or variables) and returns a tensor.
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A tensor, dot product of `x` and `y`.
-
-  Examples:
-
-  >>> x = tf.keras.backend.placeholder(shape=(2, 3))
-  >>> y = tf.keras.backend.placeholder(shape=(3, 4))
-  >>> xy = tf.keras.backend.dot(x, y)
-  >>> xy
-  <tf.Tensor ... shape=(2, 4) dtype=float32>
-
-  >>> x = tf.keras.backend.placeholder(shape=(32, 28, 3))
-  >>> y = tf.keras.backend.placeholder(shape=(3, 4))
-  >>> xy = tf.keras.backend.dot(x, y)
-  >>> xy
-  <tf.Tensor ... shape=(32, 28, 4) dtype=float32>
-
-  >>> x = tf.keras.backend.random_uniform_variable(shape=(2, 3), low=0, high=1)
-  >>> y = tf.keras.backend.ones((4, 3, 5))
-  >>> xy = tf.keras.backend.dot(x, y)
-  >>> tf.keras.backend.int_shape(xy)
-  (2, 4, 5)
-  """
-  if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2):
-    x_shape = []
-    for i, s in zip(int_shape(x), array_ops.unstack(array_ops.shape(x))):
-      if i is not None:
-        x_shape.append(i)
-      else:
-        x_shape.append(s)
-    x_shape = tuple(x_shape)
-    y_shape = []
-    for i, s in zip(int_shape(y), array_ops.unstack(array_ops.shape(y))):
-      if i is not None:
-        y_shape.append(i)
-      else:
-        y_shape.append(s)
-    y_shape = tuple(y_shape)
-    y_permute_dim = list(range(ndim(y)))
-    y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim
-    xt = array_ops.reshape(x, [-1, x_shape[-1]])
-    yt = array_ops.reshape(
-        array_ops.transpose(y, perm=y_permute_dim), [y_shape[-2], -1])
-    return array_ops.reshape(
-        math_ops.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:])
-  if is_sparse(x):
-    out = sparse_ops.sparse_tensor_dense_matmul(x, y)
-  else:
-    out = math_ops.matmul(x, y)
-  return out
-
-
-def batch_dot(x, y, axes=None):
-  """Batchwise dot product.
-
-  `batch_dot` is used to compute dot product of `x` and `y` when
-  `x` and `y` are data in batch, i.e. in a shape of
-  `(batch_size, :)`.
-  `batch_dot` results in a tensor or variable with less dimensions
-  than the input. If the number of dimensions is reduced to 1,
-  we use `expand_dims` to make sure that ndim is at least 2.
-
-  Arguments:
-    x: Keras tensor or variable with `ndim >= 2`.
-    y: Keras tensor or variable with `ndim >= 2`.
-    axes: Tuple or list of integers with target dimensions, or single integer.
-      The sizes of `x.shape[axes[0]]` and `y.shape[axes[1]]` should be equal.
-
-  Returns:
-    A tensor with shape equal to the concatenation of `x`'s shape
-    (less the dimension that was summed over) and `y`'s shape
-    (less the batch dimension and the dimension that was summed over).
-    If the final rank is 1, we reshape it to `(batch_size, 1)`.
-
-  Examples:
-
-  >>> x_batch = tf.keras.backend.ones(shape=(32, 20, 1))
-  >>> y_batch = tf.keras.backend.ones(shape=(32, 30, 20))
-  >>> xy_batch_dot = tf.keras.backend.batch_dot(x_batch, y_batch, axes=(1, 2))
-  >>> tf.keras.backend.int_shape(xy_batch_dot)
-  (32, 1, 30)
-
-  Shape inference:
-    Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`.
-    If `axes` is (1, 2), to find the output shape of resultant tensor,
-        loop through each dimension in `x`'s shape and `y`'s shape:
-    * `x.shape[0]` : 100 : append to output shape
-    * `x.shape[1]` : 20 : do not append to output shape,
-        dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1)
-    * `y.shape[0]` : 100 : do not append to output shape,
-        always ignore first dimension of `y`
-    * `y.shape[1]` : 30 : append to output shape
-    * `y.shape[2]` : 20 : do not append to output shape,
-        dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2)
-    `output_shape` = `(100, 30)`
-  """
-  x_shape = int_shape(x)
-  y_shape = int_shape(y)
-
-  x_ndim = len(x_shape)
-  y_ndim = len(y_shape)
-
-  if x_ndim < 2 or y_ndim < 2:
-    raise ValueError('Cannot do batch_dot on inputs '
-                     'with rank < 2. '
-                     'Received inputs with shapes ' +
-                     str(x_shape) + ' and ' +
-                     str(y_shape) + '.')
-
-  x_batch_size = x_shape[0]
-  y_batch_size = y_shape[0]
-
-  if x_batch_size is not None and y_batch_size is not None:
-    if x_batch_size != y_batch_size:
-      raise ValueError('Cannot do batch_dot on inputs '
-                       'with different batch sizes. '
-                       'Received inputs with shapes ' +
-                       str(x_shape) + ' and ' +
-                       str(y_shape) + '.')
-  if isinstance(axes, int):
-    axes = [axes, axes]
-
-  if axes is None:
-    if y_ndim == 2:
-      axes = [x_ndim - 1, y_ndim - 1]
-    else:
-      axes = [x_ndim - 1, y_ndim - 2]
-
-  if py_any(isinstance(a, (list, tuple)) for a in axes):
-    raise ValueError('Multiple target dimensions are not supported. ' +
-                     'Expected: None, int, (int, int), ' +
-                     'Provided: ' + str(axes))
-
-  # if tuple, convert to list.
-  axes = list(axes)
-
-  # convert negative indices.
-  if axes[0] < 0:
-    axes[0] += x_ndim
-  if axes[1] < 0:
-    axes[1] += y_ndim
-
-  # sanity checks
-  if 0 in axes:
-    raise ValueError('Cannot perform batch_dot over axis 0. '
-                     'If your inputs are not batched, '
-                     'add a dummy batch dimension to your '
-                     'inputs using K.expand_dims(x, 0)')
-  a0, a1 = axes
-  d1 = x_shape[a0]
-  d2 = y_shape[a1]
-
-  if d1 is not None and d2 is not None and d1 != d2:
-    raise ValueError('Cannot do batch_dot on inputs with shapes ' +
-                     str(x_shape) + ' and ' + str(y_shape) +
-                     ' with axes=' + str(axes) + '. x.shape[%d] != '
-                     'y.shape[%d] (%d != %d).' % (axes[0], axes[1], d1, d2))
-
-  # backup ndims. Need them later.
-  orig_x_ndim = x_ndim
-  orig_y_ndim = y_ndim
-
-  # if rank is 2, expand to 3.
-  if x_ndim == 2:
-    x = array_ops.expand_dims(x, 1)
-    a0 += 1
-    x_ndim += 1
-  if y_ndim == 2:
-    y = array_ops.expand_dims(y, 2)
-    y_ndim += 1
-
-  # bring x's dimension to be reduced to last axis.
-  if a0 != x_ndim - 1:
-    pattern = list(range(x_ndim))
-    for i in range(a0, x_ndim - 1):
-      pattern[i] = pattern[i + 1]
-    pattern[-1] = a0
-    x = array_ops.transpose(x, pattern)
-
-  # bring y's dimension to be reduced to axis 1.
-  if a1 != 1:
-    pattern = list(range(y_ndim))
-    for i in range(a1, 1, -1):
-      pattern[i] = pattern[i - 1]
-    pattern[1] = a1
-    y = array_ops.transpose(y, pattern)
-
-  # normalize both inputs to rank 3.
-  if x_ndim > 3:
-    # squash middle dimensions of x.
-    x_shape = shape(x)
-    x_mid_dims = x_shape[1:-1]
-    x_squashed_shape = array_ops.stack(
-        [x_shape[0], -1, x_shape[-1]])
-    x = array_ops.reshape(x, x_squashed_shape)
-    x_squashed = True
-  else:
-    x_squashed = False
-
-  if y_ndim > 3:
-    # squash trailing dimensions of y.
-    y_shape = shape(y)
-    y_trail_dims = y_shape[2:]
-    y_squashed_shape = array_ops.stack(
-        [y_shape[0], y_shape[1], -1])
-    y = array_ops.reshape(y, y_squashed_shape)
-    y_squashed = True
-  else:
-    y_squashed = False
-
-  result = math_ops.matmul(x, y)
-
-  # if inputs were squashed, we have to reshape the matmul output.
-  output_shape = array_ops.shape(result)
-  do_reshape = False
-
-  if x_squashed:
-    output_shape = array_ops.concat(
-        [output_shape[:1],
-         x_mid_dims,
-         output_shape[-1:]], 0)
-    do_reshape = True
-
-  if y_squashed:
-    output_shape = array_ops.concat([output_shape[:-1], y_trail_dims], 0)
-    do_reshape = True
-
-  if do_reshape:
-    result = array_ops.reshape(result, output_shape)
-
-  # if the inputs were originally rank 2, we remove the added 1 dim.
-  if orig_x_ndim == 2:
-    result = array_ops.squeeze(result, 1)
-  elif orig_y_ndim == 2:
-    result = array_ops.squeeze(result, -1)
-
-  return result
-
-
-def transpose(x):
-  """Transposes a tensor and returns it.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-
-  Examples:
-
-  >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]])
-  >>> tf.keras.backend.eval(var)
-  array([[1.,  2.,  3.],
-         [4.,  5.,  6.]], dtype=float32)
-  >>> var_transposed = tf.keras.backend.transpose(var)
-  >>> tf.keras.backend.eval(var_transposed)
-  array([[1.,  4.],
-         [2.,  5.],
-         [3.,  6.]], dtype=float32)
-  >>> input = tf.keras.backend.placeholder((2, 3))
-  >>> input
-  <tf.Tensor 'Placeholder_...' shape=(2, 3) dtype=float32>
-  >>> input_transposed = tf.keras.backend.transpose(input)
-  >>> input_transposed
-  <tf.Tensor 'Transpose_...' shape=(3, 2) dtype=float32>
-  """
-  return array_ops.transpose(x)
-
-
-def gather(reference, indices):
-  """Retrieves the elements of indices `indices` in the tensor `reference`.
-
-  Arguments:
-      reference: A tensor.
-      indices: An integer tensor of indices.
-
-  Returns:
-      A tensor of same type as `reference`.
-
-  Examples:
-
-  >>> var = tf.keras.backend.variable([[1, 2, 3], [4, 5, 6]])
-  >>> tf.keras.backend.eval(var)
-  array([[1., 2., 3.],
-         [4., 5., 6.]], dtype=float32)
-  >>> var_gathered = tf.keras.backend.gather(var, [0])
-  >>> tf.keras.backend.eval(var_gathered)
-  array([[1., 2., 3.]], dtype=float32)
-  >>> var_gathered = tf.keras.backend.gather(var, [1])
-  >>> tf.keras.backend.eval(var_gathered)
-  array([[4., 5., 6.]], dtype=float32)
-  >>> var_gathered = tf.keras.backend.gather(var, [0,1,0])
-  >>> tf.keras.backend.eval(var_gathered)
-  array([[1., 2., 3.],
-         [4., 5., 6.],
-         [1., 2., 3.]], dtype=float32)
-  """
-  return array_ops.gather(reference, indices)
-
-
-# ELEMENT-WISE OPERATIONS
-
-
-def max(x, axis=None, keepdims=False):
-  """Maximum value in a tensor.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to find maximum values.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`,
-          the reduced dimension is retained with length 1.
-
-  Returns:
-      A tensor with maximum values of `x`.
-  """
-  return math_ops.reduce_max(x, axis, keepdims)
-
-
-def min(x, axis=None, keepdims=False):
-  """Minimum value in a tensor.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to find minimum values.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`,
-          the reduced dimension is retained with length 1.
-
-  Returns:
-      A tensor with minimum values of `x`.
-  """
-  return math_ops.reduce_min(x, axis, keepdims)
-
-
-def sum(x, axis=None, keepdims=False):
-  """Sum of the values in a tensor, alongside the specified axis.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to sum over.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`,
-          the reduced dimension is retained with length 1.
-
-  Returns:
-      A tensor with sum of `x`.
-  """
-  return math_ops.reduce_sum(x, axis, keepdims)
-
-
-def prod(x, axis=None, keepdims=False):
-  """Multiplies the values in a tensor, alongside the specified axis.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to compute the product.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`,
-          the reduced dimension is retained with length 1.
-
-  Returns:
-      A tensor with the product of elements of `x`.
-  """
-  return math_ops.reduce_prod(x, axis, keepdims)
-
-
-def cumsum(x, axis=0):
-  """Cumulative sum of the values in a tensor, alongside the specified axis.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to compute the sum.
-
-  Returns:
-      A tensor of the cumulative sum of values of `x` along `axis`.
-  """
-  return math_ops.cumsum(x, axis=axis)
-
-
-def cumprod(x, axis=0):
-  """Cumulative product of the values in a tensor, alongside the specified axis.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to compute the product.
-
-  Returns:
-      A tensor of the cumulative product of values of `x` along `axis`.
-  """
-  return math_ops.cumprod(x, axis=axis)
-
-
-def var(x, axis=None, keepdims=False):
-  """Variance of a tensor, alongside the specified axis.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to compute the variance.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`,
-          the reduced dimension is retained with length 1.
-
-  Returns:
-      A tensor with the variance of elements of `x`.
-  """
-  if x.dtype.base_dtype == dtypes_module.bool:
-    x = math_ops.cast(x, floatx())
-  return math_ops.reduce_variance(x, axis=axis, keepdims=keepdims)
-
-
-def std(x, axis=None, keepdims=False):
-  """Standard deviation of a tensor, alongside the specified axis.
-
-  It is an alias to `tf.math.reduce_std`.
-
-  Arguments:
-      x: A tensor or variable. It should have numerical dtypes. Boolean type
-        inputs will be converted to float.
-      axis: An integer, the axis to compute the standard deviation. If `None`
-        (the default), reduces all dimensions. Must be in the range
-        `[-rank(x), rank(x))`.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`, the reduced dimension is retained with
-          length 1.
-
-  Returns:
-      A tensor with the standard deviation of elements of `x` with same dtype.
-      Boolean type input will be converted to float.
-  """
-  if x.dtype.base_dtype == dtypes_module.bool:
-    x = math_ops.cast(x, floatx())
-  return math_ops.reduce_std(x, axis=axis, keepdims=keepdims)
-
-
-def mean(x, axis=None, keepdims=False):
-  """Mean of a tensor, alongside the specified axis.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: A list of integer. Axes to compute the mean.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1 for each entry in `axis`. If `keepdims` is `True`,
-          the reduced dimensions are retained with length 1.
-
-  Returns:
-      A tensor with the mean of elements of `x`.
-  """
-  if x.dtype.base_dtype == dtypes_module.bool:
-    x = math_ops.cast(x, floatx())
-  return math_ops.reduce_mean(x, axis, keepdims)
-
-
-def any(x, axis=None, keepdims=False):
-  """Bitwise reduction (logical OR).
-
-  Arguments:
-      x: Tensor or variable.
-      axis: axis along which to perform the reduction.
-      keepdims: whether the drop or broadcast the reduction axes.
-
-  Returns:
-      A uint8 tensor (0s and 1s).
-  """
-  x = math_ops.cast(x, dtypes_module.bool)
-  return math_ops.reduce_any(x, axis, keepdims)
-
-
-def all(x, axis=None, keepdims=False):
-  """Bitwise reduction (logical AND).
-
-  Arguments:
-      x: Tensor or variable.
-      axis: axis along which to perform the reduction.
-      keepdims: whether the drop or broadcast the reduction axes.
-
-  Returns:
-      A uint8 tensor (0s and 1s).
-  """
-  x = math_ops.cast(x, dtypes_module.bool)
-  return math_ops.reduce_all(x, axis, keepdims)
-
-
-def argmax(x, axis=-1):
-  """Returns the index of the maximum value along an axis.
-
-  Arguments:
-      x: Tensor or variable.
-      axis: axis along which to perform the reduction.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.argmax(x, axis)
-
-
-def argmin(x, axis=-1):
-  """Returns the index of the minimum value along an axis.
-
-  Arguments:
-      x: Tensor or variable.
-      axis: axis along which to perform the reduction.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.argmin(x, axis)
-
-
-def square(x):
-  """Element-wise square.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.square(x)
-
-
-def abs(x):
-  """Element-wise absolute value.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.abs(x)
-
-
-def sqrt(x):
-  """Element-wise square root.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  zero = _constant_to_tensor(0., x.dtype.base_dtype)
-  inf = _constant_to_tensor(np.inf, x.dtype.base_dtype)
-  x = clip_ops.clip_by_value(x, zero, inf)
-  return math_ops.sqrt(x)
-
-
-def exp(x):
-  """Element-wise exponential.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.exp(x)
-
-
-def log(x):
-  """Element-wise log.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.log(x)
-
-
-def logsumexp(x, axis=None, keepdims=False):
-  """Computes log(sum(exp(elements across dimensions of a tensor))).
-
-  This function is more numerically stable than log(sum(exp(x))).
-  It avoids overflows caused by taking the exp of large inputs and
-  underflows caused by taking the log of small inputs.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: An integer, the axis to reduce over.
-      keepdims: A boolean, whether to keep the dimensions or not.
-          If `keepdims` is `False`, the rank of the tensor is reduced
-          by 1. If `keepdims` is `True`, the reduced dimension is
-          retained with length 1.
-
-  Returns:
-      The reduced tensor.
-  """
-  return math_ops.reduce_logsumexp(x, axis, keepdims)
-
-
-def round(x):
-  """Element-wise rounding to the closest integer.
-
-  In case of tie, the rounding mode used is "half to even".
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.round(x)
-
-
-def sign(x):
-  """Element-wise sign.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.sign(x)
-
-
-def pow(x, a):
-  """Element-wise exponentiation.
-
-  Arguments:
-      x: Tensor or variable.
-      a: Python integer.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.pow(x, a)
-
-
-def clip(x, min_value, max_value):
-  """Element-wise value clipping.
-
-  Arguments:
-      x: Tensor or variable.
-      min_value: Python float, integer, or tensor.
-      max_value: Python float, integer, or tensor.
-
-  Returns:
-      A tensor.
-  """
-  if (isinstance(min_value, (int, float)) and
-      isinstance(max_value, (int, float))):
-    if max_value < min_value:
-      max_value = min_value
-  if min_value is None:
-    min_value = -np.inf
-  if max_value is None:
-    max_value = np.inf
-  return clip_ops.clip_by_value(x, min_value, max_value)
-
-
-def equal(x, y):
-  """Element-wise equality between two tensors.
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A bool tensor.
-  """
-  return math_ops.equal(x, y)
-
-
-def not_equal(x, y):
-  """Element-wise inequality between two tensors.
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A bool tensor.
-  """
-  return math_ops.not_equal(x, y)
-
-
-def greater(x, y):
-  """Element-wise truth value of (x > y).
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A bool tensor.
-  """
-  return math_ops.greater(x, y)
-
-
-def greater_equal(x, y):
-  """Element-wise truth value of (x >= y).
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A bool tensor.
-  """
-  return math_ops.greater_equal(x, y)
-
-
-def less(x, y):
-  """Element-wise truth value of (x < y).
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A bool tensor.
-  """
-  return math_ops.less(x, y)
-
-
-def less_equal(x, y):
-  """Element-wise truth value of (x <= y).
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A bool tensor.
-  """
-  return math_ops.less_equal(x, y)
-
-
-def maximum(x, y):
-  """Element-wise maximum of two tensors.
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A tensor with the element wise maximum value(s) of `x` and `y`.
-
-  Examples:
-
-  >>> x = tf.Variable([[1, 2], [3, 4]])
-  >>> y = tf.Variable([[2, 1], [0, -1]])
-  >>> m = tf.keras.backend.maximum(x, y)
-  >>> m
-  <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
-  array([[2, 2],
-         [3, 4]], dtype=int32)>
-  """
-  return math_ops.maximum(x, y)
-
-
-def minimum(x, y):
-  """Element-wise minimum of two tensors.
-
-  Arguments:
-      x: Tensor or variable.
-      y: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.minimum(x, y)
-
-
-def sin(x):
-  """Computes sin of x element-wise.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.sin(x)
-
-
-def cos(x):
-  """Computes cos of x element-wise.
-
-  Arguments:
-      x: Tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return math_ops.cos(x)
-
-
-def _regular_normalize_batch_in_training(x,
-                                         gamma,
-                                         beta,
-                                         reduction_axes,
-                                         epsilon=1e-3):
-  """Non-fused version of `normalize_batch_in_training`.
-
-  Arguments:
-      x: Input tensor or variable.
-      gamma: Tensor by which to scale the input.
-      beta: Tensor with which to center the input.
-      reduction_axes: iterable of integers,
-          axes over which to normalize.
-      epsilon: Fuzz factor.
-
-  Returns:
-      A tuple length of 3, `(normalized_tensor, mean, variance)`.
-  """
-  mean, var = nn.moments(x, reduction_axes, None, None, False)
-  normed = nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
-  return normed, mean, var
-
-
-def _broadcast_normalize_batch_in_training(x,
-                                           gamma,
-                                           beta,
-                                           reduction_axes,
-                                           epsilon=1e-3):
-  """Non-fused, broadcast version of `normalize_batch_in_training`.
-
-  Arguments:
-      x: Input tensor or variable.
-      gamma: Tensor by which to scale the input.
-      beta: Tensor with which to center the input.
-      reduction_axes: iterable of integers,
-          axes over which to normalize.
-      epsilon: Fuzz factor.
-
-  Returns:
-      A tuple length of 3, `(normalized_tensor, mean, variance)`.
-  """
-  mean, var = nn.moments(x, reduction_axes, None, None, False)
-  target_shape = []
-  for axis in range(ndim(x)):
-    if axis in reduction_axes:
-      target_shape.append(1)
-    else:
-      target_shape.append(array_ops.shape(x)[axis])
-  target_shape = array_ops.stack(target_shape)
-
-  broadcast_mean = array_ops.reshape(mean, target_shape)
-  broadcast_var = array_ops.reshape(var, target_shape)
-  if gamma is None:
-    broadcast_gamma = None
-  else:
-    broadcast_gamma = array_ops.reshape(gamma, target_shape)
-  if beta is None:
-    broadcast_beta = None
-  else:
-    broadcast_beta = array_ops.reshape(beta, target_shape)
-
-  normed = nn.batch_normalization(x, broadcast_mean, broadcast_var,
-                                  broadcast_beta, broadcast_gamma, epsilon)
-  return normed, mean, var
-
-
-def _fused_normalize_batch_in_training(x,
-                                       gamma,
-                                       beta,
-                                       reduction_axes,
-                                       epsilon=1e-3):
-  """Fused version of `normalize_batch_in_training`.
-
-  Arguments:
-      x: Input tensor or variable.
-      gamma: Tensor by which to scale the input.
-      beta: Tensor with which to center the input.
-      reduction_axes: iterable of integers,
-          axes over which to normalize.
-      epsilon: Fuzz factor.
-
-  Returns:
-      A tuple length of 3, `(normalized_tensor, mean, variance)`.
-  """
-  if list(reduction_axes) == [0, 1, 2]:
-    normalization_axis = 3
-    tf_data_format = 'NHWC'
-  else:
-    normalization_axis = 1
-    tf_data_format = 'NCHW'
-
-  if gamma is None:
-    gamma = constant_op.constant(
-        1.0, dtype=x.dtype, shape=[x.shape[normalization_axis]])
-  if beta is None:
-    beta = constant_op.constant(
-        0.0, dtype=x.dtype, shape=[x.shape[normalization_axis]])
-
-  return nn.fused_batch_norm(
-      x, gamma, beta, epsilon=epsilon, data_format=tf_data_format)
-
-
-def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3):
-  """Computes mean and std for batch then apply batch_normalization on batch.
-
-  Arguments:
-      x: Input tensor or variable.
-      gamma: Tensor by which to scale the input.
-      beta: Tensor with which to center the input.
-      reduction_axes: iterable of integers,
-          axes over which to normalize.
-      epsilon: Fuzz factor.
-
-  Returns:
-      A tuple length of 3, `(normalized_tensor, mean, variance)`.
-  """
-  if ndim(x) == 4 and list(reduction_axes) in [[0, 1, 2], [0, 2, 3]]:
-    if not _has_nchw_support() and list(reduction_axes) == [0, 2, 3]:
-      return _broadcast_normalize_batch_in_training(
-          x, gamma, beta, reduction_axes, epsilon=epsilon)
-    return _fused_normalize_batch_in_training(
-        x, gamma, beta, reduction_axes, epsilon=epsilon)
-  else:
-    if sorted(reduction_axes) == list(range(ndim(x)))[:-1]:
-      return _regular_normalize_batch_in_training(
-          x, gamma, beta, reduction_axes, epsilon=epsilon)
-    else:
-      return _broadcast_normalize_batch_in_training(
-          x, gamma, beta, reduction_axes, epsilon=epsilon)
-
-
-def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3):
-  """Applies batch normalization on x given mean, var, beta and gamma.
-
-  I.e. returns:
-  `output = (x - mean) / (sqrt(var) + epsilon) * gamma + beta`
-
-  Arguments:
-      x: Input tensor or variable.
-      mean: Mean of batch.
-      var: Variance of batch.
-      beta: Tensor with which to center the input.
-      gamma: Tensor by which to scale the input.
-      axis: Integer, the axis that should be normalized.
-          (typically the features axis).
-      epsilon: Fuzz factor.
-
-  Returns:
-      A tensor.
-  """
-  if ndim(x) == 4:
-    # The CPU implementation of `fused_batch_norm` only supports NHWC
-    if axis == 1 or axis == -3:
-      tf_data_format = 'NCHW'
-    elif axis == 3 or axis == -1:
-      tf_data_format = 'NHWC'
-    else:
-      tf_data_format = None
-
-    if (tf_data_format == 'NHWC' or
-        tf_data_format == 'NCHW' and _has_nchw_support()):
-      # The mean / var / beta / gamma tensors may be broadcasted
-      # so they may have extra axes of size 1, which should be squeezed.
-      if ndim(mean) > 1:
-        mean = array_ops.reshape(mean, [-1])
-      if ndim(var) > 1:
-        var = array_ops.reshape(var, [-1])
-      if beta is None:
-        beta = zeros_like(mean)
-      elif ndim(beta) > 1:
-        beta = array_ops.reshape(beta, [-1])
-      if gamma is None:
-        gamma = ones_like(mean)
-      elif ndim(gamma) > 1:
-        gamma = array_ops.reshape(gamma, [-1])
-    y, _, _ = nn.fused_batch_norm(
-        x,
-        gamma,
-        beta,
-        epsilon=epsilon,
-        mean=mean,
-        variance=var,
-        data_format=tf_data_format,
-        is_training=False
-    )
-    return y
-  return nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
-
-
-# SHAPE OPERATIONS
-
-
-def concatenate(tensors, axis=-1):
-  """Concatenates a list of tensors alongside the specified axis.
-
-  Arguments:
-      tensors: list of tensors to concatenate.
-      axis: concatenation axis.
-
-  Returns:
-      A tensor.
-
-  Example:
-
-      >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-      >>> b = tf.constant([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
-      >>> tf.keras.backend.concatenate((a, b), axis=-1)
-      <tf.Tensor: shape=(3, 6), dtype=int32, numpy=
-      array([[ 1,  2,  3, 10, 20, 30],
-             [ 4,  5,  6, 40, 50, 60],
-             [ 7,  8,  9, 70, 80, 90]], dtype=int32)>
-
-  """
-  if axis < 0:
-    rank = ndim(tensors[0])
-    if rank:
-      axis %= rank
-    else:
-      axis = 0
-
-  if py_all(is_sparse(x) for x in tensors):
-    return sparse_ops.sparse_concat(axis, tensors)
-  elif py_all(isinstance(x, ragged_tensor.RaggedTensor) for x in tensors):
-    return ragged_concat_ops.concat(tensors, axis)
-  else:
-    return array_ops.concat([to_dense(x) for x in tensors], axis)
-
-
-def reshape(x, shape):
-  """Reshapes a tensor to the specified shape.
-
-  Arguments:
-      x: Tensor or variable.
-      shape: Target shape tuple.
-
-  Returns:
-      A tensor.
-
-  Example:
-
-    >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
-    >>> a
-    <tf.Tensor: shape=(4, 3), dtype=int32, numpy=
-    array([[ 1,  2,  3],
-           [ 4,  5,  6],
-           [ 7,  8,  9],
-           [10, 11, 12]], dtype=int32)>
-    >>> tf.keras.backend.reshape(a, shape=(2, 6))
-    <tf.Tensor: shape=(2, 6), dtype=int32, numpy=
-    array([[ 1,  2,  3,  4,  5,  6],
-           [ 7,  8,  9, 10, 11, 12]], dtype=int32)>
-
-  """
-  return array_ops.reshape(x, shape)
-
-
-def permute_dimensions(x, pattern):
-  """Permutes axes in a tensor.
-
-  Arguments:
-      x: Tensor or variable.
-      pattern: A tuple of
-          dimension indices, e.g. `(0, 2, 1)`.
-
-  Returns:
-      A tensor.
-
-  Example:
-
-    >>> a = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
-    >>> a
-    <tf.Tensor: shape=(4, 3), dtype=int32, numpy=
-    array([[ 1,  2,  3],
-           [ 4,  5,  6],
-           [ 7,  8,  9],
-           [10, 11, 12]], dtype=int32)>
-    >>> tf.keras.backend.permute_dimensions(a, pattern=(1, 0))
-    <tf.Tensor: shape=(3, 4), dtype=int32, numpy=
-    array([[ 1,  4,  7, 10],
-           [ 2,  5,  8, 11],
-           [ 3,  6,  9, 12]], dtype=int32)>
-
-  """
-  return array_ops.transpose(x, perm=pattern)
-
-
-def resize_images(x, height_factor, width_factor, data_format,
-                  interpolation='nearest'):
-  """Resizes the images contained in a 4D tensor.
-
-  Arguments:
-      x: Tensor or variable to resize.
-      height_factor: Positive integer.
-      width_factor: Positive integer.
-      data_format: One of `"channels_first"`, `"channels_last"`.
-      interpolation: A string, one of `nearest` or `bilinear`.
-
-  Returns:
-      A tensor.
-
-  Raises:
-      ValueError: in case of incorrect value for
-        `data_format` or `interpolation`.
-  """
-  if data_format == 'channels_first':
-    rows, cols = 2, 3
-  elif data_format == 'channels_last':
-    rows, cols = 1, 2
-  else:
-    raise ValueError('Invalid `data_format` argument: %s' % (data_format,))
-
-  original_shape = int_shape(x)
-  new_shape = array_ops.shape(x)[rows:cols + 1]
-  new_shape *= constant_op.constant(
-      np.array([height_factor, width_factor], dtype='int32'))
-
-  if data_format == 'channels_first':
-    x = permute_dimensions(x, [0, 2, 3, 1])
-  if interpolation == 'nearest':
-    x = image_ops.resize_images_v2(
-        x, new_shape, method=image_ops.ResizeMethod.NEAREST_NEIGHBOR)
-  elif interpolation == 'bilinear':
-    x = image_ops.resize_images_v2(x, new_shape,
-                                   method=image_ops.ResizeMethod.BILINEAR)
-  else:
-    raise ValueError('interpolation should be one '
-                     'of "nearest" or "bilinear".')
-  if data_format == 'channels_first':
-    x = permute_dimensions(x, [0, 3, 1, 2])
-
-  if original_shape[rows] is None:
-    new_height = None
-  else:
-    new_height = original_shape[rows] * height_factor
-
-  if original_shape[cols] is None:
-    new_width = None
-  else:
-    new_width = original_shape[cols] * width_factor
-
-  if data_format == 'channels_first':
-    output_shape = (None, None, new_height, new_width)
-  else:
-    output_shape = (None, new_height, new_width, None)
-  x.set_shape(output_shape)
-  return x
-
-
-def resize_volumes(x, depth_factor, height_factor, width_factor, data_format):
-  """Resizes the volume contained in a 5D tensor.
-
-  Arguments:
-      x: Tensor or variable to resize.
-      depth_factor: Positive integer.
-      height_factor: Positive integer.
-      width_factor: Positive integer.
-      data_format: One of `"channels_first"`, `"channels_last"`.
-
-  Returns:
-      A tensor.
-
-  Raises:
-      ValueError: if `data_format` is neither
-          `channels_last` or `channels_first`.
-  """
-  if data_format == 'channels_first':
-    output = repeat_elements(x, depth_factor, axis=2)
-    output = repeat_elements(output, height_factor, axis=3)
-    output = repeat_elements(output, width_factor, axis=4)
-    return output
-  elif data_format == 'channels_last':
-    output = repeat_elements(x, depth_factor, axis=1)
-    output = repeat_elements(output, height_factor, axis=2)
-    output = repeat_elements(output, width_factor, axis=3)
-    return output
-  else:
-    raise ValueError('Invalid data_format: ' + str(data_format))
-
-
-def repeat_elements(x, rep, axis):
-  """Repeats the elements of a tensor along an axis, like `np.repeat`.
-
-  If `x` has shape `(s1, s2, s3)` and `axis` is `1`, the output
-  will have shape `(s1, s2 * rep, s3)`.
-
-  Arguments:
-      x: Tensor or variable.
-      rep: Python integer, number of times to repeat.
-      axis: Axis along which to repeat.
-
-  Returns:
-      A tensor.
-
-  Example:
-
-      >>> b = tf.constant([1, 2, 3])
-      >>> tf.keras.backend.repeat_elements(b, rep=2, axis=0)
-      <tf.Tensor: shape=(6,), dtype=int32,
-          numpy=array([1, 1, 2, 2, 3, 3], dtype=int32)>
-
-  """
-  x_shape = x.shape.as_list()
-  # For static axis
-  if x_shape[axis] is not None:
-    # slices along the repeat axis
-    splits = array_ops.split(value=x,
-                             num_or_size_splits=x_shape[axis],
-                             axis=axis)
-    # repeat each slice the given number of reps
-    x_rep = [s for s in splits for _ in range(rep)]
-    return concatenate(x_rep, axis)
-
-  # Here we use tf.tile to mimic behavior of np.repeat so that
-  # we can handle dynamic shapes (that include None).
-  # To do that, we need an auxiliary axis to repeat elements along
-  # it and then merge them along the desired axis.
-
-  # Repeating
-  auxiliary_axis = axis + 1
-  x_shape = array_ops.shape(x)
-  x_rep = array_ops.expand_dims(x, axis=auxiliary_axis)
-  reps = np.ones(len(x.shape) + 1)
-  reps[auxiliary_axis] = rep
-  x_rep = array_ops.tile(x_rep, reps)
-
-  # Merging
-  reps = np.delete(reps, auxiliary_axis)
-  reps[axis] = rep
-  reps = array_ops.constant(reps, dtype='int32')
-  x_shape *= reps
-  x_rep = array_ops.reshape(x_rep, x_shape)
-
-  # Fix shape representation
-  x_shape = x.shape.as_list()
-  x_rep.set_shape(x_shape)
-  x_rep._keras_shape = tuple(x_shape)
-  return x_rep
-
-
-def repeat(x, n):
-  """Repeats a 2D tensor.
-
-  if `x` has shape (samples, dim) and `n` is `2`,
-  the output will have shape `(samples, 2, dim)`.
-
-  Arguments:
-      x: Tensor or variable.
-      n: Python integer, number of times to repeat.
-
-  Returns:
-      A tensor.
-
-  Example:
-
-      >>> b = tf.constant([[1, 2], [3, 4]])
-      >>> b
-      <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
-      array([[1, 2],
-             [3, 4]], dtype=int32)>
-      >>> tf.keras.backend.repeat(b, n=2)
-      <tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
-      array([[[1, 2],
-              [1, 2]],
-             [[3, 4],
-              [3, 4]]], dtype=int32)>
-
-  """
-  assert ndim(x) == 2
-  x = array_ops.expand_dims(x, 1)
-  pattern = array_ops.stack([1, n, 1])
-  return array_ops.tile(x, pattern)
-
-
-def arange(start, stop=None, step=1, dtype='int32'):
-  """Creates a 1D tensor containing a sequence of integers.
-
-  The function arguments use the same convention as
-  Theano's arange: if only one argument is provided,
-  it is in fact the "stop" argument and "start" is 0.
-
-  The default type of the returned tensor is `'int32'` to
-  match TensorFlow's default.
-
-  Arguments:
-      start: Start value.
-      stop: Stop value.
-      step: Difference between two successive values.
-      dtype: Integer dtype to use.
-
-  Returns:
-      An integer tensor.
-
-  Example:
-
-      >>> tf.keras.backend.arange(start=0, stop=10, step=1.5)
-      <tf.Tensor: shape=(7,), dtype=float32,
-          numpy=array([0. , 1.5, 3. , 4.5, 6. , 7.5, 9. ], dtype=float32)>
-
-
-
-  """
-  # Match the behavior of numpy and Theano by returning an empty sequence.
-  if stop is None and start < 0:
-    start = 0
-  result = math_ops.range(start, limit=stop, delta=step, name='arange')
-  if dtype != 'int32':
-    result = cast(result, dtype)
-  return result
-
-
-def tile(x, n):
-  """Creates a tensor by tiling `x` by `n`.
-
-  Arguments:
-      x: A tensor or variable
-      n: A list of integer. The length must be the same as the number of
-          dimensions in `x`.
-
-  Returns:
-      A tiled tensor.
-  """
-  if isinstance(n, int):
-    n = [n]
-  return array_ops.tile(x, n)
-
-
-def flatten(x):
-  """Flatten a tensor.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor, reshaped into 1-D
-
-  Example:
-
-      >>> b = tf.constant([[1, 2], [3, 4]])
-      >>> b
-      <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
-      array([[1, 2],
-             [3, 4]], dtype=int32)>
-      >>> tf.keras.backend.flatten(b)
-      <tf.Tensor: shape=(4,), dtype=int32,
-          numpy=array([1, 2, 3, 4], dtype=int32)>
-
-  """
-  return array_ops.reshape(x, [-1])
-
-
-def batch_flatten(x):
-  """Turn a nD tensor into a 2D tensor with same 0th dimension.
-
-  In other words, it flattens each data samples of a batch.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor.
-
-  Examples:
-    Flattening a 3D tensor to 2D by collapsing the last dimension.
-
-  >>> x_batch = tf.keras.backend.ones(shape=(2, 3, 4, 5))
-  >>> x_batch_flatten = batch_flatten(x_batch)
-  >>> tf.keras.backend.int_shape(x_batch_flatten)
-  (2, 60)
-
-  """
-  x = array_ops.reshape(x, array_ops.stack([-1, prod(shape(x)[1:])]))
-  return x
-
-
-def expand_dims(x, axis=-1):
-  """Adds a 1-sized dimension at index "axis".
-
-  Arguments:
-      x: A tensor or variable.
-      axis: Position where to add a new axis.
-
-  Returns:
-      A tensor with expanded dimensions.
-  """
-  return array_ops.expand_dims(x, axis)
-
-
-def squeeze(x, axis):
-  """Removes a 1-dimension from the tensor at index "axis".
-
-  Arguments:
-      x: A tensor or variable.
-      axis: Axis to drop.
-
-  Returns:
-      A tensor with the same data as `x` but reduced dimensions.
-  """
-  return array_ops.squeeze(x, [axis])
-
-
-def temporal_padding(x, padding=(1, 1)):
-  """Pads the middle dimension of a 3D tensor.
-
-  Arguments:
-      x: Tensor or variable.
-      padding: Tuple of 2 integers, how many zeros to
-          add at the start and end of dim 1.
-
-  Returns:
-      A padded 3D tensor.
-  """
-  assert len(padding) == 2
-  pattern = [[0, 0], [padding[0], padding[1]], [0, 0]]
-  return array_ops.pad(x, pattern)
-
-
-def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None):
-  """Pads the 2nd and 3rd dimensions of a 4D tensor.
-
-  Arguments:
-      x: Tensor or variable.
-      padding: Tuple of 2 tuples, padding pattern.
-      data_format: One of `channels_last` or `channels_first`.
-
-  Returns:
-      A padded 4D tensor.
-
-  Raises:
-      ValueError: if `data_format` is neither
-          `channels_last` or `channels_first`.
-  """
-  assert len(padding) == 2
-  assert len(padding[0]) == 2
-  assert len(padding[1]) == 2
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  if data_format == 'channels_first':
-    pattern = [[0, 0], [0, 0], list(padding[0]), list(padding[1])]
-  else:
-    pattern = [[0, 0], list(padding[0]), list(padding[1]), [0, 0]]
-  return array_ops.pad(x, pattern)
-
-
-def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None):
-  """Pads 5D tensor with zeros along the depth, height, width dimensions.
-
-  Pads these dimensions with respectively
-  "padding[0]", "padding[1]" and "padding[2]" zeros left and right.
-
-  For 'channels_last' data_format,
-  the 2nd, 3rd and 4th dimension will be padded.
-  For 'channels_first' data_format,
-  the 3rd, 4th and 5th dimension will be padded.
-
-  Arguments:
-      x: Tensor or variable.
-      padding: Tuple of 3 tuples, padding pattern.
-      data_format: One of `channels_last` or `channels_first`.
-
-  Returns:
-      A padded 5D tensor.
-
-  Raises:
-      ValueError: if `data_format` is neither
-          `channels_last` or `channels_first`.
-
-  """
-  assert len(padding) == 3
-  assert len(padding[0]) == 2
-  assert len(padding[1]) == 2
-  assert len(padding[2]) == 2
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  if data_format == 'channels_first':
-    pattern = [[0, 0], [0, 0], [padding[0][0], padding[0][1]],
-               [padding[1][0], padding[1][1]], [padding[2][0], padding[2][1]]]
-  else:
-    pattern = [[0, 0], [padding[0][0], padding[0][1]],
-               [padding[1][0], padding[1][1]], [padding[2][0],
-                                                padding[2][1]], [0, 0]]
-  return array_ops.pad(x, pattern)
-
-
-def stack(x, axis=0):
-  """Stacks a list of rank `R` tensors into a rank `R+1` tensor.
-
-  Arguments:
-      x: List of tensors.
-      axis: Axis along which to perform stacking.
-
-  Returns:
-      A tensor.
-
-  Example:
-
-      >>> a = tf.constant([[1, 2],[3, 4]])
-      >>> b = tf.constant([[10, 20],[30, 40]])
-      >>> tf.keras.backend.stack((a, b))
-      <tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
-      array([[[ 1,  2],
-              [ 3,  4]],
-             [[10, 20],
-              [30, 40]]], dtype=int32)>
-
-  """
-  return array_ops.stack(x, axis=axis)
-
-
-def one_hot(indices, num_classes):
-  """Computes the one-hot representation of an integer tensor.
-
-  Arguments:
-      indices: nD integer tensor of shape
-          `(batch_size, dim1, dim2, ... dim(n-1))`
-      num_classes: Integer, number of classes to consider.
-
-  Returns:
-      (n + 1)D one hot representation of the input
-      with shape `(batch_size, dim1, dim2, ... dim(n-1), num_classes)`
-
-  Returns:
-      The one-hot tensor.
-  """
-  return array_ops.one_hot(indices, depth=num_classes, axis=-1)
-
-
-def reverse(x, axes):
-  """Reverse a tensor along the specified axes.
-
-  Arguments:
-      x: Tensor to reverse.
-      axes: Integer or iterable of integers.
-          Axes to reverse.
-
-  Returns:
-      A tensor.
-  """
-  if isinstance(axes, int):
-    axes = [axes]
-  return array_ops.reverse(x, axes)
-
-
-# VALUE MANIPULATION
-_VALUE_SET_CODE_STRING = """
-  >>> K = tf.keras.backend  # Common keras convention
-  >>> v = K.variable(1.)
-
-  >>> # reassign
-  >>> K.set_value(v, 2.)
-  >>> print(K.get_value(v))
-  2.0
-
-  >>> # increment
-  >>> K.set_value(v, K.get_value(v) + 1)
-  >>> print(K.get_value(v))
-  3.0
-
-  Variable semantics in TensorFlow 2 are eager execution friendly. The above 
-  code is roughly equivalent to:
-
-  >>> v = tf.Variable(1.)
-
-  >>> _ = v.assign(2.)
-  >>> print(v.numpy())
-  2.0
-
-  >>> _ = v.assign_add(1.)
-  >>> print(v.numpy())
-  3.0"""[3:]  # Prune first newline and indent to match the docstring template.
-
-
-def get_value(x):
-  """Returns the value of a variable.
-
-  `backend.get_value` is the compliment of `backend.set_value`, and provides
-  a generic interface for reading from variables while abstracting away the
-  differences between TensorFlow 1.x and 2.x semantics.
-
-  {snippet}
-
-  Arguments:
-      x: input variable.
-
-  Returns:
-      A Numpy array.
-  """
-  if not tensor_util.is_tensor(x):
-    return x
-  if context.executing_eagerly() or isinstance(x, ops.EagerTensor):
-    return x.numpy()
-  if not getattr(x, '_in_graph_mode', True):
-    # This is a variable which was created in an eager context, but is being
-    # evaluated from a Graph.
-    with context.eager_mode():
-      return x.numpy()
-
-  if ops.executing_eagerly_outside_functions():
-    # This method of evaluating works inside the Keras FuncGraph.
-    return function([], x)(x)
-
-  with x.graph.as_default():
-    return x.eval(session=get_session((x,)))
-
-
-def batch_get_value(tensors):
-  """Returns the value of more than one tensor variable.
-
-  Arguments:
-      tensors: list of ops to run.
-
-  Returns:
-      A list of Numpy arrays.
-
-  Raises:
-      RuntimeError: If this method is called inside defun.
-  """
-  if context.executing_eagerly():
-    return [x.numpy() for x in tensors]
-  elif ops.inside_function():  # pylint: disable=protected-access
-    raise RuntimeError('Cannot get value inside Tensorflow graph function.')
-  if tensors:
-    return get_session(tensors).run(tensors)
-  else:
-    return []
-
-
-def set_value(x, value):
-  """Sets the value of a variable, from a Numpy array.
-
-  `backend.set_value` is the compliment of `backend.get_value`, and provides
-  a generic interface for assigning to variables while abstracting away the
-  differences between TensorFlow 1.x and 2.x semantics.
-
-  {snippet}
-
-  Arguments:
-      x: Variable to set to a new value.
-      value: Value to set the tensor to, as a Numpy array
-          (of the same shape).
-  """
-  value = np.asarray(value, dtype=dtype(x))
-  if ops.executing_eagerly_outside_functions():
-    x.assign(value)
-  else:
-    with get_graph().as_default():
-      tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0])
-      if hasattr(x, '_assign_placeholder'):
-        assign_placeholder = x._assign_placeholder
-        assign_op = x._assign_op
-      else:
-        # In order to support assigning weights to resizable variables in
-        # Keras, we make a placeholder with the correct number of dimensions
-        # but with None in each dimension. This way, we can assign weights
-        # of any size (as long as they have the correct dimensionality).
-        placeholder_shape = tensor_shape.TensorShape([None] * value.ndim)
-        assign_placeholder = array_ops.placeholder(
-            tf_dtype, shape=placeholder_shape)
-        assign_op = x.assign(assign_placeholder)
-        x._assign_placeholder = assign_placeholder
-        x._assign_op = assign_op
-      get_session().run(assign_op, feed_dict={assign_placeholder: value})
-
-
-def batch_set_value(tuples):
-  """Sets the values of many tensor variables at once.
-
-  Arguments:
-      tuples: a list of tuples `(tensor, value)`.
-          `value` should be a Numpy array.
-  """
-  if ops.executing_eagerly_outside_functions():
-    for x, value in tuples:
-      x.assign(np.asarray(value, dtype=dtype(x)))
-  else:
-    with get_graph().as_default():
-      if tuples:
-        assign_ops = []
-        feed_dict = {}
-        for x, value in tuples:
-          value = np.asarray(value, dtype=dtype(x))
-          tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0])
-          if hasattr(x, '_assign_placeholder'):
-            assign_placeholder = x._assign_placeholder
-            assign_op = x._assign_op
-          else:
-            # In order to support assigning weights to resizable variables in
-            # Keras, we make a placeholder with the correct number of dimensions
-            # but with None in each dimension. This way, we can assign weights
-            # of any size (as long as they have the correct dimensionality).
-            placeholder_shape = tensor_shape.TensorShape([None] * value.ndim)
-            assign_placeholder = array_ops.placeholder(
-                tf_dtype, shape=placeholder_shape)
-            assign_op = x.assign(assign_placeholder)
-            x._assign_placeholder = assign_placeholder
-            x._assign_op = assign_op
-          assign_ops.append(assign_op)
-          feed_dict[assign_placeholder] = value
-        get_session().run(assign_ops, feed_dict=feed_dict)
-
-
-get_value.__doc__ = get_value.__doc__.format(snippet=_VALUE_SET_CODE_STRING)
-set_value.__doc__ = set_value.__doc__.format(snippet=_VALUE_SET_CODE_STRING)
-
-
-def print_tensor(x, message=''):
-  """Prints `message` and the tensor value when evaluated.
-
-  Note that `print_tensor` returns a new tensor identical to `x`
-  which should be used in the following code. Otherwise the
-  print operation is not taken into account during evaluation.
-
-  Example:
-
-  >>> x = tf.constant([[1.0, 2.0], [3.0, 4.0]])
-  >>> tf.keras.backend.print_tensor(x)
-  <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
-    array([[1., 2.],
-           [3., 4.]], dtype=float32)>
-
-  Arguments:
-      x: Tensor to print.
-      message: Message to print jointly with the tensor.
-
-  Returns:
-      The same tensor `x`, unchanged.
-  """
-  if isinstance(x, ops.Tensor) and hasattr(x, 'graph'):
-    with get_graph().as_default():
-      op = logging_ops.print_v2(message, x, output_stream=sys.stdout)
-      with ops.control_dependencies([op]):
-        return array_ops.identity(x)
-  else:
-    logging_ops.print_v2(message, x, output_stream=sys.stdout)
-    return x
-
-# GRAPH MANIPULATION
-
-
-class GraphExecutionFunction(object):
-  """Runs a computation graph.
-
-  It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`.
-  In particular additional operations via `fetches` argument and additional
-  tensor substitutions via `feed_dict` arguments. Note that given
-  substitutions are merged with substitutions from `inputs`. Even though
-  `feed_dict` is passed once in the constructor (called in `model.compile()`)
-  we can modify the values in the dictionary. Through this feed_dict we can
-  provide additional substitutions besides Keras inputs.
-
-  Arguments:
-      inputs: Feed placeholders to the computation graph.
-      outputs: Output tensors to fetch.
-      updates: Additional update ops to be run at function call.
-      name: A name to help users identify what this function does.
-      session_kwargs: Arguments to `tf.Session.run()`:
-                      `fetches`, `feed_dict`, `options`, `run_metadata`.
-  """
-
-  def __init__(self, inputs, outputs, updates=None, name=None,
-               **session_kwargs):
-    updates = updates or []
-    if not isinstance(updates, (list, tuple)):
-      raise TypeError('`updates` in a Keras backend function '
-                      'should be a list or tuple.')
-
-    self._inputs_structure = inputs
-    self.inputs = nest.flatten(inputs, expand_composites=True)
-    self._outputs_structure = outputs
-    self.outputs = cast_variables_to_tensor(
-        nest.flatten(outputs, expand_composites=True))
-    # TODO(b/127668432): Consider using autograph to generate these
-    # dependencies in call.
-    # Index 0 = total loss or model output for `predict`.
-    with ops.control_dependencies([self.outputs[0]]):
-      updates_ops = []
-      for update in updates:
-        if isinstance(update, tuple):
-          p, new_p = update
-          updates_ops.append(state_ops.assign(p, new_p))
-        else:
-          # assumed already an op
-          updates_ops.append(update)
-      self.updates_op = control_flow_ops.group(*updates_ops)
-    self.name = name
-    # additional tensor substitutions
-    self.feed_dict = session_kwargs.pop('feed_dict', None)
-    # additional operations
-    self.fetches = session_kwargs.pop('fetches', [])
-    if not isinstance(self.fetches, list):
-      self.fetches = [self.fetches]
-    self.run_options = session_kwargs.pop('options', None)
-    self.run_metadata = session_kwargs.pop('run_metadata', None)
-    # The main use case of `fetches` being passed to a model is the ability
-    # to run custom updates
-    # This requires us to wrap fetches in `identity` ops.
-    self.fetches = [array_ops.identity(x) for x in self.fetches]
-    self.session_kwargs = session_kwargs
-    # This mapping keeps track of the function that should receive the
-    # output from a fetch in `fetches`: { fetch: function(fetch_output) }
-    # A Callback can use this to register a function with access to the
-    # output values for a fetch it added.
-    self.fetch_callbacks = {}
-
-    if session_kwargs:
-      raise ValueError('Some keys in session_kwargs are not supported at this '
-                       'time: %s' % (session_kwargs.keys(),))
-
-    self._callable_fn = None
-    self._feed_arrays = None
-    self._feed_symbols = None
-    self._symbol_vals = None
-    self._fetches = None
-    self._session = None
-
-  def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session):
-    """Generates a callable that runs the graph.
-
-    Arguments:
-      feed_arrays: List of input tensors to be fed Numpy arrays at runtime.
-      feed_symbols: List of input tensors to be fed symbolic tensors at runtime.
-      symbol_vals: List of symbolic tensors to be fed to `feed_symbols`.
-      session: Session to use to generate the callable.
-
-    Returns:
-      Function that runs the graph according to the above options.
-    """
-    # Prepare callable options.
-    callable_opts = config_pb2.CallableOptions()
-    # Handle external-data feed.
-    for x in feed_arrays:
-      callable_opts.feed.append(x.name)
-    if self.feed_dict:
-      for key in sorted(self.feed_dict.keys()):
-        callable_opts.feed.append(key.name)
-    # Handle symbolic feed.
-    for x, y in zip(feed_symbols, symbol_vals):
-      connection = callable_opts.tensor_connection.add()
-      if x.dtype != y.dtype:
-        y = math_ops.cast(y, dtype=x.dtype)
-      from_tensor = ops._as_graph_element(y)
-      if from_tensor is None:
-        from_tensor = y
-      connection.from_tensor = from_tensor.name  # Data tensor
-      connection.to_tensor = x.name  # Placeholder
-    # Handle fetches.
-    for x in self.outputs + self.fetches:
-      callable_opts.fetch.append(x.name)
-    # Handle updates.
-    callable_opts.target.append(self.updates_op.name)
-    # Handle run_options.
-    if self.run_options:
-      callable_opts.run_options.CopyFrom(self.run_options)
-    # Create callable.
-    callable_fn = session._make_callable_from_options(callable_opts)
-    # Cache parameters corresponding to the generated callable, so that
-    # we can detect future mismatches and refresh the callable.
-    self._callable_fn = callable_fn
-    self._feed_arrays = feed_arrays
-    self._feed_symbols = feed_symbols
-    self._symbol_vals = symbol_vals
-    self._fetches = list(self.fetches)
-    self._session = session
-
-  def _call_fetch_callbacks(self, fetches_output):
-    for fetch, output in zip(self._fetches, fetches_output):
-      if fetch in self.fetch_callbacks:
-        self.fetch_callbacks[fetch](output)
-
-  def _eval_if_composite(self, tensor):
-    """Helper method which evaluates any CompositeTensors passed to it."""
-    # We need to evaluate any composite tensor objects that have been
-    # reconstructed in 'pack_sequence_as', since otherwise they'll be output as
-    # actual CompositeTensor objects instead of the value(s) contained in the
-    # CompositeTensors. E.g., if output_structure contains a SparseTensor, then
-    # this ensures that we return its value as a SparseTensorValue rather than
-    # a SparseTensor.
-    if isinstance(tensor, composite_tensor.CompositeTensor):
-      return self._session.run(tensor)
-    else:
-      return tensor
-
-  def __call__(self, inputs):
-    inputs = nest.flatten(inputs, expand_composites=True)
-
-    session = get_session(inputs)
-    feed_arrays = []
-    array_vals = []
-    feed_symbols = []
-    symbol_vals = []
-    for tensor, value in zip(self.inputs, inputs):
-      if value is None:
-        continue
-
-      if tensor_util.is_tensor(value):
-        # Case: feeding symbolic tensor.
-        feed_symbols.append(tensor)
-        symbol_vals.append(value)
-      else:
-        # Case: feeding Numpy array.
-        feed_arrays.append(tensor)
-        # We need to do array conversion and type casting at this level, since
-        # `callable_fn` only supports exact matches.
-        tensor_type = dtypes_module.as_dtype(tensor.dtype)
-        array_vals.append(np.asarray(value,
-                                     dtype=tensor_type.as_numpy_dtype))
-
-    if self.feed_dict:
-      for key in sorted(self.feed_dict.keys()):
-        array_vals.append(
-            np.asarray(self.feed_dict[key], dtype=key.dtype.base_dtype.name))
-
-    # Refresh callable if anything has changed.
-    if (self._callable_fn is None or feed_arrays != self._feed_arrays or
-        symbol_vals != self._symbol_vals or
-        feed_symbols != self._feed_symbols or self.fetches != self._fetches or
-        session != self._session):
-      self._make_callable(feed_arrays, feed_symbols, symbol_vals, session)
-
-    fetched = self._callable_fn(*array_vals,
-                                run_metadata=self.run_metadata)
-    self._call_fetch_callbacks(fetched[-len(self._fetches):])
-    output_structure = nest.pack_sequence_as(
-        self._outputs_structure,
-        fetched[:len(self.outputs)],
-        expand_composites=True)
-    # We need to evaluate any composite tensor objects that have been
-    # reconstructed in 'pack_sequence_as', since otherwise they'll be output as
-    # actual CompositeTensor objects instead of the value(s) contained in the
-    # CompositeTensors. E.g., if output_structure contains a SparseTensor, then
-    # this ensures that we return its value as a SparseTensorValue rather than
-    # a SparseTensor.
-    return nest.map_structure(self._eval_if_composite, output_structure)
-
-
-class EagerExecutionFunction(object):
-  """Helper class for constructing a TF graph function from the Keras graph.
-
-  Arguments:
-    inputs: Feed placeholders to the computation graph.
-    outputs: Output tensors to fetch.
-    updates: Additional update ops to be run at function call.
-    name: A name to help users identify what this function does.
-    session_kwargs: Unsupported.
-  """
-
-  def __init__(self, inputs, outputs, updates=None, name=None):
-    self.name = name
-    self._inputs_structure = inputs
-    inputs = nest.flatten(inputs, expand_composites=True)
-    self._outputs_structure = outputs
-    outputs = nest.flatten(outputs, expand_composites=True)
-
-    updates = updates or []
-    if not isinstance(updates, (list, tuple)):
-      raise TypeError('`updates` in a Keras backend function '
-                      'should be a list or tuple.')
-
-    if updates and not outputs:
-      # Edge case; never happens in practice
-      raise ValueError('Cannot create a Keras backend function with updates'
-                       ' but no outputs during eager execution.')
-    graphs = {
-        i.graph
-        for i in nest.flatten([inputs, outputs, updates])
-        if hasattr(i, 'graph')
-    }
-    if len(graphs) > 1:
-      raise ValueError('Cannot create an execution function which is comprised '
-                       'of elements from multiple graphs.')
-
-    source_graph = graphs.pop()
-    global_graph = get_graph()
-
-    updates_ops = []
-    legacy_update_ops = []
-    for update in updates:
-      # For legacy reasons it is allowed to pass an update as a tuple
-      # `(variable, new_value)` (this maps to an assign op). Otherwise it
-      # is assumed to already be an op -- we cannot control its execution
-      # order.
-      if isinstance(update, tuple):
-        legacy_update_ops.append(update)
-      else:
-        if hasattr(update, 'op'):
-          update = update.op
-        if update is not None:
-          # `update.op` may have been None in certain cases.
-          updates_ops.append(update)
-
-    self._freezable_vars_to_feed = []
-    self._freezable_vars_values = []
-    freezable_vars_from_keras_graph = object_identity.ObjectIdentitySet(
-        _FREEZABLE_VARS.get(global_graph, {}))
-    with _scratch_graph() as exec_graph:
-      global_graph = get_graph()
-      if source_graph not in (exec_graph, global_graph):
-        raise ValueError('Unknown graph. Aborting.')
-
-      if source_graph is global_graph and exec_graph is not global_graph:
-        init_tensors = (
-            outputs + updates_ops + [p for [p, _] in legacy_update_ops] +
-            [p_new for [_, p_new] in legacy_update_ops
-             if isinstance(p_new, ops.Tensor)])
-        lifted_map = lift_to_graph.lift_to_graph(
-            tensors=init_tensors,
-            graph=exec_graph,
-            sources=inputs,
-            add_sources=True,
-            handle_captures=True,
-            base_graph=source_graph)
-
-        inputs = [lifted_map[i] for i in inputs]
-        outputs = [lifted_map[i] for i in outputs]
-        updates_ops = [lifted_map[i] for i in updates_ops]
-        legacy_update_ops = [(lifted_map[p], lifted_map.get(p_new, p_new))
-                             for p, p_new in legacy_update_ops]
-
-        # Keep track of the value to feed to any "freezable variables"
-        # created in this graph.
-        for old_op, new_op in lifted_map.items():
-          if old_op in freezable_vars_from_keras_graph:
-            frozen_var = old_op
-            if frozen_var._initial_value != frozen_var._current_value:
-              # We only feed a frozen_variable if its value has changed;
-              # otherwise it can rely on the default value of the
-              # underlying placeholder_with_default.
-              self._freezable_vars_to_feed.append(new_op)
-              self._freezable_vars_values.append(frozen_var._current_value)
-
-    # Consolidate updates
-    with exec_graph.as_default():
-      outputs = cast_variables_to_tensor(outputs)
-      with ops.control_dependencies(outputs):
-        for p, p_new in legacy_update_ops:
-          updates_ops.append(state_ops.assign(p, p_new))
-
-      self.inputs, self.outputs = inputs, outputs
-      self._input_references = self.inputs + self._freezable_vars_to_feed
-      with ops.control_dependencies(updates_ops):
-        self.outputs[0] = array_ops.identity(self.outputs[0])
-
-      exec_graph.inputs = self._input_references + exec_graph.internal_captures
-      exec_graph.outputs = self.outputs
-      graph_fn = eager_function.ConcreteFunction(exec_graph)
-
-    graph_fn._num_positional_args = len(self._input_references)
-    graph_fn._arg_keywords = []
-    self._graph_fn = graph_fn
-
-    # Handle placeholders with default
-    # (treated as required placeholder by graph functions)
-    self._placeholder_default_values = {}
-    with exec_graph.as_default():
-      for x in self.inputs:
-        if x.op.type == 'PlaceholderWithDefault':
-          self._placeholder_default_values[ops.tensor_id(
-              x)] = tensor_util.constant_value(x.op.inputs[0])
-
-  def __call__(self, inputs):
-    input_values = nest.flatten(inputs, expand_composites=True)
-
-    if self._freezable_vars_values:
-      input_values = input_values + self._freezable_vars_values
-    converted_inputs = []
-    for tensor, value in zip(self._input_references, input_values):
-      if value is None:
-        # Assume `value` is a placeholder with default
-        value = self._placeholder_default_values.get(
-            ops.tensor_id(tensor), None)
-        if value is None:
-          raise ValueError(
-              'You must feed a value for placeholder %s' % (tensor,))
-      if not isinstance(value, ops.Tensor):
-        value = ops.convert_to_tensor_v2(value, dtype=tensor.dtype)
-      if value.dtype != tensor.dtype:
-        # Temporary workaround due to `convert_to_tensor` not casting floats.
-        # See b/119637405
-        value = math_ops.cast(value, tensor.dtype)
-      converted_inputs.append(value)
-    outputs = self._graph_fn(*converted_inputs)
-
-    # EagerTensor.numpy() will often make a copy to ensure memory safety.
-    # However in this case `outputs` is not directly returned, so it is always
-    # safe to reuse the underlying buffer without checking. In such a case the
-    # private numpy conversion method is preferred to guarantee performance.
-    return nest.pack_sequence_as(
-        self._outputs_structure,
-        [x._numpy() for x in outputs],  # pylint: disable=protected-access
-        expand_composites=True)
-
-
-def function(inputs, outputs, updates=None, name=None, **kwargs):
-  """Instantiates a Keras function.
-
-  Arguments:
-      inputs: List of placeholder tensors.
-      outputs: List of output tensors.
-      updates: List of update ops.
-      name: String, name of function.
-      **kwargs: Passed to `tf.Session.run`.
-
-  Returns:
-      Output values as Numpy arrays.
-
-  Raises:
-      ValueError: if invalid kwargs are passed in or if in eager execution.
-  """
-  if ops.executing_eagerly_outside_functions():
-    if kwargs:
-      raise ValueError('Session keyword arguments are not support during '
-                       'eager execution. You passed: %s' % (kwargs,))
-    return EagerExecutionFunction(inputs, outputs, updates=updates, name=name)
-
-  if kwargs:
-    for key in kwargs:
-      if (key not in tf_inspect.getfullargspec(session_module.Session.run)[0]
-          and key not in ['inputs', 'outputs', 'updates', 'name']):
-        msg = ('Invalid argument "%s" passed to K.function with TensorFlow '
-               'backend') % key
-        raise ValueError(msg)
-  return GraphExecutionFunction(inputs, outputs, updates=updates, **kwargs)
-
-
-def gradients(loss, variables):
-  """Returns the gradients of `loss` w.r.t. `variables`.
-
-  Arguments:
-      loss: Scalar tensor to minimize.
-      variables: List of variables.
-
-  Returns:
-      A gradients tensor.
-  """
-  return gradients_module.gradients(
-      loss, variables, colocate_gradients_with_ops=True)
-
-
-def stop_gradient(variables):
-  """Returns `variables` but with zero gradient w.r.t. every other variable.
-
-  Arguments:
-      variables: Tensor or list of tensors to consider constant with respect
-        to any other variable.
-
-
-  Returns:
-      A single tensor or a list of tensors (depending on the passed argument)
-      that has no gradient with respect to any other variable.
-  """
-  if isinstance(variables, (list, tuple)):
-    return map(array_ops.stop_gradient, variables)
-  return array_ops.stop_gradient(variables)
-
-
-# CONTROL FLOW
-
-
-def rnn(step_function,
-        inputs,
-        initial_states,
-        go_backwards=False,
-        mask=None,
-        constants=None,
-        unroll=False,
-        input_length=None,
-        time_major=False,
-        zero_output_for_mask=False):
-  """Iterates over the time dimension of a tensor.
-
-  Arguments:
-      step_function: RNN step function.
-          Args;
-              input; Tensor with shape `(samples, ...)` (no time dimension),
-                  representing input for the batch of samples at a certain
-                  time step.
-              states; List of tensors.
-          Returns;
-              output; Tensor with shape `(samples, output_dim)`
-                  (no time dimension).
-              new_states; List of tensors, same length and shapes
-                  as 'states'. The first state in the list must be the
-                  output tensor at the previous timestep.
-      inputs: Tensor of temporal data of shape `(samples, time, ...)`
-          (at least 3D), or nested tensors, and each of which has shape
-          `(samples, time, ...)`.
-      initial_states: Tensor with shape `(samples, state_size)`
-          (no time dimension), containing the initial values for the states used
-          in the step function. In the case that state_size is in a nested
-          shape, the shape of initial_states will also follow the nested
-          structure.
-      go_backwards: Boolean. If True, do the iteration over the time
-          dimension in reverse order and return the reversed sequence.
-      mask: Binary tensor with shape `(samples, time, 1)`,
-          with a zero for every element that is masked.
-      constants: List of constant values passed at each step.
-      unroll: Whether to unroll the RNN or to use a symbolic `while_loop`.
-      input_length: An integer or a 1-D Tensor, depending on whether
-          the time dimension is fixed-length or not. In case of variable length
-          input, it is used for masking in case there's no mask specified.
-      time_major: Boolean. If true, the inputs and outputs will be in shape
-          `(timesteps, batch, ...)`, whereas in the False case, it will be
-          `(batch, timesteps, ...)`. Using `time_major = True` is a bit more
-          efficient because it avoids transposes at the beginning and end of the
-          RNN calculation. However, most TensorFlow data is batch-major, so by
-          default this function accepts input and emits output in batch-major
-          form.
-      zero_output_for_mask: Boolean. If True, the output for masked timestep
-          will be zeros, whereas in the False case, output from previous
-          timestep is returned.
-
-  Returns:
-      A tuple, `(last_output, outputs, new_states)`.
-          last_output: the latest output of the rnn, of shape `(samples, ...)`
-          outputs: tensor with shape `(samples, time, ...)` where each
-              entry `outputs[s, t]` is the output of the step function
-              at time `t` for sample `s`.
-          new_states: list of tensors, latest states returned by
-              the step function, of shape `(samples, ...)`.
-
-  Raises:
-      ValueError: if input dimension is less than 3.
-      ValueError: if `unroll` is `True` but input timestep is not a fixed
-      number.
-      ValueError: if `mask` is provided (not `None`) but states is not provided
-          (`len(states)` == 0).
-  """
-
-  def swap_batch_timestep(input_t):
-    # Swap the batch and timestep dim for the incoming tensor.
-    axes = list(range(len(input_t.shape)))
-    axes[0], axes[1] = 1, 0
-    return array_ops.transpose(input_t, axes)
-
-  if not time_major:
-    inputs = nest.map_structure(swap_batch_timestep, inputs)
-
-  flatted_inputs = nest.flatten(inputs)
-  time_steps = flatted_inputs[0].shape[0]
-  batch = flatted_inputs[0].shape[1]
-  time_steps_t = array_ops.shape(flatted_inputs[0])[0]
-
-  for input_ in flatted_inputs:
-    input_.shape.with_rank_at_least(3)
-
-  if mask is not None:
-    if mask.dtype != dtypes_module.bool:
-      mask = math_ops.cast(mask, dtypes_module.bool)
-    if len(mask.shape) == 2:
-      mask = expand_dims(mask)
-    if not time_major:
-      mask = swap_batch_timestep(mask)
-
-  if constants is None:
-    constants = []
-
-  # tf.where needs its condition tensor to be the same shape as its two
-  # result tensors, but in our case the condition (mask) tensor is
-  # (nsamples, 1), and inputs are (nsamples, ndimensions) or even more.
-  # So we need to broadcast the mask to match the shape of inputs.
-  # That's what the tile call does, it just repeats the mask along its
-  # second dimension n times.
-  def _expand_mask(mask_t, input_t, fixed_dim=1):
-    if nest.is_sequence(mask_t):
-      raise ValueError('mask_t is expected to be tensor, but got %s' % mask_t)
-    if nest.is_sequence(input_t):
-      raise ValueError('input_t is expected to be tensor, but got %s' % input_t)
-    rank_diff = len(input_t.shape) - len(mask_t.shape)
-    for _ in range(rank_diff):
-      mask_t = array_ops.expand_dims(mask_t, -1)
-    multiples = [1] * fixed_dim + input_t.shape.as_list()[fixed_dim:]
-    return array_ops.tile(mask_t, multiples)
-
-  if unroll:
-    if not time_steps:
-      raise ValueError('Unrolling requires a fixed number of timesteps.')
-    states = tuple(initial_states)
-    successive_states = []
-    successive_outputs = []
-
-    # Process the input tensors. The input tensor need to be split on the
-    # time_step dim, and reverse if go_backwards is True. In the case of nested
-    # input, the input is flattened and then transformed individually.
-    # The result of this will be a tuple of lists, each of the item in tuple is
-    # list of the tensor with shape (batch, feature)
-    def _process_single_input_t(input_t):
-      input_t = array_ops.unstack(input_t)  # unstack for time_step dim
-      if go_backwards:
-        input_t.reverse()
-      return input_t
-
-    if nest.is_sequence(inputs):
-      processed_input = nest.map_structure(_process_single_input_t, inputs)
-    else:
-      processed_input = (_process_single_input_t(inputs),)
-
-    def _get_input_tensor(time):
-      inp = [t_[time] for t_ in processed_input]
-      return nest.pack_sequence_as(inputs, inp)
-
-    if mask is not None:
-      mask_list = array_ops.unstack(mask)
-      if go_backwards:
-        mask_list.reverse()
-
-      for i in range(time_steps):
-        inp = _get_input_tensor(i)
-        mask_t = mask_list[i]
-        output, new_states = step_function(inp,
-                                           tuple(states) + tuple(constants))
-        tiled_mask_t = _expand_mask(mask_t, output)
-
-        if not successive_outputs:
-          prev_output = zeros_like(output)
-        else:
-          prev_output = successive_outputs[-1]
-
-        output = array_ops.where_v2(tiled_mask_t, output, prev_output)
-
-        flat_states = nest.flatten(states)
-        flat_new_states = nest.flatten(new_states)
-        tiled_mask_t = tuple(_expand_mask(mask_t, s) for s in flat_states)
-        flat_final_states = tuple(
-            array_ops.where_v2(m, s, ps)
-            for m, s, ps in zip(tiled_mask_t, flat_new_states, flat_states))
-        states = nest.pack_sequence_as(states, flat_final_states)
-
-        successive_outputs.append(output)
-        successive_states.append(states)
-      last_output = successive_outputs[-1]
-      new_states = successive_states[-1]
-      outputs = array_ops.stack(successive_outputs)
-
-      if zero_output_for_mask:
-        last_output = array_ops.where_v2(
-            _expand_mask(mask_list[-1], last_output), last_output,
-            zeros_like(last_output))
-        outputs = array_ops.where_v2(
-            _expand_mask(mask, outputs, fixed_dim=2), outputs,
-            zeros_like(outputs))
-
-    else:  # mask is None
-      for i in range(time_steps):
-        inp = _get_input_tensor(i)
-        output, states = step_function(inp, tuple(states) + tuple(constants))
-        successive_outputs.append(output)
-        successive_states.append(states)
-      last_output = successive_outputs[-1]
-      new_states = successive_states[-1]
-      outputs = array_ops.stack(successive_outputs)
-
-  else:  # Unroll == False
-    states = tuple(initial_states)
-
-    # Create input tensor array, if the inputs is nested tensors, then it will
-    # be flattened first, and tensor array will be created one per flattened
-    # tensor.
-    input_ta = tuple(
-        tensor_array_ops.TensorArray(
-            dtype=inp.dtype,
-            size=time_steps_t,
-            tensor_array_name='input_ta_%s' % i)
-        for i, inp in enumerate(flatted_inputs))
-    input_ta = tuple(
-        ta.unstack(input_) if not go_backwards else ta
-        .unstack(reverse(input_, 0))
-        for ta, input_ in zip(input_ta, flatted_inputs))
-
-    # Get the time(0) input and compute the output for that, the output will be
-    # used to determine the dtype of output tensor array. Don't read from
-    # input_ta due to TensorArray clear_after_read default to True.
-    input_time_zero = nest.pack_sequence_as(inputs,
-                                            [inp[0] for inp in flatted_inputs])
-    # output_time_zero is used to determine the cell output shape and its dtype.
-    # the value is discarded.
-    output_time_zero, _ = step_function(
-        input_time_zero, tuple(initial_states) + tuple(constants))
-    output_ta = tuple(
-        tensor_array_ops.TensorArray(
-            dtype=out.dtype,
-            size=time_steps_t,
-            element_shape=out.shape,
-            tensor_array_name='output_ta_%s' % i)
-        for i, out in enumerate(nest.flatten(output_time_zero)))
-
-    time = constant_op.constant(0, dtype='int32', name='time')
-
-    # We only specify the 'maximum_iterations' when building for XLA since that
-    # causes slowdowns on GPU in TF.
-    if (not context.executing_eagerly() and
-        control_flow_util.GraphOrParentsInXlaContext(ops.get_default_graph())):
-      max_iterations = math_ops.reduce_max(input_length)
-    else:
-      max_iterations = None
-
-    while_loop_kwargs = {
-        'cond': lambda time, *_: time < time_steps_t,
-        'maximum_iterations': max_iterations,
-        'parallel_iterations': 32,
-        'swap_memory': True,
-    }
-    if mask is not None:
-      if go_backwards:
-        mask = reverse(mask, 0)
-
-      mask_ta = tensor_array_ops.TensorArray(
-          dtype=dtypes_module.bool,
-          size=time_steps_t,
-          tensor_array_name='mask_ta')
-      mask_ta = mask_ta.unstack(mask)
-
-      def masking_fn(time):
-        return mask_ta.read(time)
-
-      def compute_masked_output(mask_t, flat_out, flat_mask):
-        tiled_mask_t = tuple(
-            _expand_mask(mask_t, o, fixed_dim=len(mask_t.shape))
-            for o in flat_out)
-        return tuple(
-            array_ops.where_v2(m, o, fm)
-            for m, o, fm in zip(tiled_mask_t, flat_out, flat_mask))
-    elif isinstance(input_length, ops.Tensor):
-      if go_backwards:
-        max_len = math_ops.reduce_max(input_length, axis=0)
-        rev_input_length = math_ops.subtract(max_len - 1, input_length)
-
-        def masking_fn(time):
-          return math_ops.less(rev_input_length, time)
-      else:
-
-        def masking_fn(time):
-          return math_ops.greater(input_length, time)
-
-      def compute_masked_output(mask_t, flat_out, flat_mask):
-        return tuple(
-            array_ops.where(mask_t, o, zo)
-            for (o, zo) in zip(flat_out, flat_mask))
-    else:
-      masking_fn = None
-
-    if masking_fn is not None:
-      # Mask for the T output will be base on the output of T - 1. In the case
-      # T = 0, a zero filled tensor will be used.
-      flat_zero_output = tuple(array_ops.zeros_like(o)
-                               for o in nest.flatten(output_time_zero))
-      def _step(time, output_ta_t, prev_output, *states):
-        """RNN step function.
-
-        Arguments:
-            time: Current timestep value.
-            output_ta_t: TensorArray.
-            prev_output: tuple of outputs from time - 1.
-            *states: List of states.
-
-        Returns:
-            Tuple: `(time + 1, output_ta_t, output) + tuple(new_states)`
-        """
-        current_input = tuple(ta.read(time) for ta in input_ta)
-        # maybe set shape.
-        current_input = nest.pack_sequence_as(inputs, current_input)
-        mask_t = masking_fn(time)
-        output, new_states = step_function(current_input,
-                                           tuple(states) + tuple(constants))
-        # mask output
-        flat_output = nest.flatten(output)
-        flat_mask_output = (flat_zero_output if zero_output_for_mask
-                            else nest.flatten(prev_output))
-        flat_new_output = compute_masked_output(mask_t, flat_output,
-                                                flat_mask_output)
-
-        # mask states
-        flat_state = nest.flatten(states)
-        flat_new_state = nest.flatten(new_states)
-        for state, new_state in zip(flat_state, flat_new_state):
-          if isinstance(new_state, ops.Tensor):
-            new_state.set_shape(state.shape)
-        flat_final_state = compute_masked_output(mask_t, flat_new_state,
-                                                 flat_state)
-        new_states = nest.pack_sequence_as(new_states, flat_final_state)
-
-        output_ta_t = tuple(
-            ta.write(time, out)
-            for ta, out in zip(output_ta_t, flat_new_output))
-        return (time + 1, output_ta_t,
-                tuple(flat_new_output)) + tuple(new_states)
-
-      final_outputs = control_flow_ops.while_loop(
-          body=_step,
-          loop_vars=(time, output_ta, flat_zero_output) + states,
-          **while_loop_kwargs)
-      # Skip final_outputs[2] which is the output for final timestep.
-      new_states = final_outputs[3:]
-    else:
-      def _step(time, output_ta_t, *states):
-        """RNN step function.
-
-        Arguments:
-            time: Current timestep value.
-            output_ta_t: TensorArray.
-            *states: List of states.
-
-        Returns:
-            Tuple: `(time + 1,output_ta_t) + tuple(new_states)`
-        """
-        current_input = tuple(ta.read(time) for ta in input_ta)
-        current_input = nest.pack_sequence_as(inputs, current_input)
-        output, new_states = step_function(current_input,
-                                           tuple(states) + tuple(constants))
-        flat_state = nest.flatten(states)
-        flat_new_state = nest.flatten(new_states)
-        for state, new_state in zip(flat_state, flat_new_state):
-          if isinstance(new_state, ops.Tensor):
-            new_state.set_shape(state.shape)
-
-        flat_output = nest.flatten(output)
-        output_ta_t = tuple(
-            ta.write(time, out) for ta, out in zip(output_ta_t, flat_output))
-        new_states = nest.pack_sequence_as(initial_states, flat_new_state)
-        return (time + 1, output_ta_t) + tuple(new_states)
-
-      final_outputs = control_flow_ops.while_loop(
-          body=_step,
-          loop_vars=(time, output_ta) + states,
-          **while_loop_kwargs)
-      new_states = final_outputs[2:]
-
-    output_ta = final_outputs[1]
-
-    outputs = tuple(o.stack() for o in output_ta)
-    last_output = tuple(o[-1] for o in outputs)
-
-    outputs = nest.pack_sequence_as(output_time_zero, outputs)
-    last_output = nest.pack_sequence_as(output_time_zero, last_output)
-
-  # static shape inference
-  def set_shape(output_):
-    if isinstance(output_, ops.Tensor):
-      shape = output_.shape.as_list()
-      shape[0] = time_steps
-      shape[1] = batch
-      output_.set_shape(shape)
-    return output_
-
-  outputs = nest.map_structure(set_shape, outputs)
-
-  if not time_major:
-    outputs = nest.map_structure(swap_batch_timestep, outputs)
-
-  return last_output, outputs, new_states
-
-
-def switch(condition, then_expression, else_expression):
-  """Switches between two operations depending on a scalar value.
-
-  Note that both `then_expression` and `else_expression`
-  should be symbolic tensors of the *same shape*.
-
-  Arguments:
-      condition: tensor (`int` or `bool`).
-      then_expression: either a tensor, or a callable that returns a tensor.
-      else_expression: either a tensor, or a callable that returns a tensor.
-
-  Returns:
-      The selected tensor.
-
-  Raises:
-      ValueError: If rank of `condition` is greater than rank of expressions.
-  """
-  if condition.dtype != dtypes_module.bool:
-    condition = math_ops.cast(condition, 'bool')
-  cond_ndim = ndim(condition)
-  if not cond_ndim:
-    if not callable(then_expression):
-
-      def then_expression_fn():
-        return then_expression
-    else:
-      then_expression_fn = then_expression
-    if not callable(else_expression):
-
-      def else_expression_fn():
-        return else_expression
-    else:
-      else_expression_fn = else_expression
-    x = control_flow_ops.cond(condition, then_expression_fn, else_expression_fn)
-  else:
-    # tf.where needs its condition tensor
-    # to be the same shape as its two
-    # result tensors
-    if callable(then_expression):
-      then_expression = then_expression()
-    if callable(else_expression):
-      else_expression = else_expression()
-    expr_ndim = ndim(then_expression)
-    if cond_ndim > expr_ndim:
-      raise ValueError('Rank of `condition` should be less than or'
-                       ' equal to rank of `then_expression` and '
-                       '`else_expression`. ndim(condition)=' + str(cond_ndim) +
-                       ', ndim(then_expression)'
-                       '=' + str(expr_ndim))
-    if cond_ndim > 1:
-      ndim_diff = expr_ndim - cond_ndim
-      cond_shape = array_ops.concat(
-          [array_ops.shape(condition), [1] * ndim_diff], axis=0)
-      condition = array_ops.reshape(condition, cond_shape)
-      expr_shape = array_ops.shape(then_expression)
-      shape_diff = expr_shape - cond_shape
-      tile_shape = array_ops.where_v2(shape_diff > 0, expr_shape,
-                                      array_ops.ones_like(expr_shape))
-      condition = array_ops.tile(condition, tile_shape)
-    x = array_ops.where_v2(condition, then_expression, else_expression)
-  return x
-
-
-def in_train_phase(x, alt, training=None):
-  """Selects `x` in train phase, and `alt` otherwise.
-
-  Note that `alt` should have the *same shape* as `x`.
-
-  Arguments:
-      x: What to return in train phase
-          (tensor or callable that returns a tensor).
-      alt: What to return otherwise
-          (tensor or callable that returns a tensor).
-      training: Optional scalar tensor
-          (or Python boolean, or Python integer)
-          specifying the learning phase.
-
-  Returns:
-      Either `x` or `alt` based on the `training` flag.
-      the `training` flag defaults to `K.learning_phase()`.
-  """
-  from tensorflow.python.frozen_keras.engine import base_layer_utils  # pylint: disable=g-import-not-at-top
-  if training is None:
-    training = base_layer_utils.call_context().training
-
-  if training is None:
-    training = learning_phase()
-
-  # TODO(b/138862903): Handle the case when training is tensor.
-  if not tensor_util.is_tensor(training):
-    if training == 1 or training is True:
-      if callable(x):
-        return x()
-      else:
-        return x
-
-    elif training == 0 or training is False:
-      if callable(alt):
-        return alt()
-      else:
-        return alt
-
-  # else: assume learning phase is a placeholder tensor.
-  x = switch(training, x, alt)
-  return x
-
-
-def in_test_phase(x, alt, training=None):
-  """Selects `x` in test phase, and `alt` otherwise.
-
-  Note that `alt` should have the *same shape* as `x`.
-
-  Arguments:
-      x: What to return in test phase
-          (tensor or callable that returns a tensor).
-      alt: What to return otherwise
-          (tensor or callable that returns a tensor).
-      training: Optional scalar tensor
-          (or Python boolean, or Python integer)
-          specifying the learning phase.
-
-  Returns:
-      Either `x` or `alt` based on `K.learning_phase`.
-  """
-  return in_train_phase(alt, x, training=training)
-
-
-# NN OPERATIONS
-
-
-def relu(x, alpha=0., max_value=None, threshold=0):
-  """Rectified linear unit.
-
-  With default values, it returns element-wise `max(x, 0)`.
-
-  Otherwise, it follows:
-  `f(x) = max_value` for `x >= max_value`,
-  `f(x) = x` for `threshold <= x < max_value`,
-  `f(x) = alpha * (x - threshold)` otherwise.
-
-  Arguments:
-      x: A tensor or variable.
-      alpha: A scalar, slope of negative section (default=`0.`).
-      max_value: float. Saturation threshold.
-      threshold: float. Threshold value for thresholded activation.
-
-  Returns:
-      A tensor.
-  """
-
-  if alpha != 0.:
-    if max_value is None and threshold == 0:
-      return nn.leaky_relu(x, alpha=alpha)
-
-    if threshold != 0:
-      negative_part = nn.relu(-x + threshold)
-    else:
-      negative_part = nn.relu(-x)
-
-  clip_max = max_value is not None
-
-  if threshold != 0:
-    # computes x for x > threshold else 0
-    x = x * math_ops.cast(math_ops.greater(x, threshold), floatx())
-  elif max_value == 6:
-    # if no threshold, then can use nn.relu6 native TF op for performance
-    x = nn.relu6(x)
-    clip_max = False
-  else:
-    x = nn.relu(x)
-
-  if clip_max:
-    max_value = _constant_to_tensor(max_value, x.dtype.base_dtype)
-    zero = _constant_to_tensor(0, x.dtype.base_dtype)
-    x = clip_ops.clip_by_value(x, zero, max_value)
-
-  if alpha != 0.:
-    alpha = _to_tensor(alpha, x.dtype.base_dtype)
-    x -= alpha * negative_part
-  return x
-
-
-def elu(x, alpha=1.):
-  """Exponential linear unit.
-
-  Arguments:
-      x: A tensor or variable to compute the activation function for.
-      alpha: A scalar, slope of negative section.
-
-  Returns:
-      A tensor.
-  """
-  res = nn.elu(x)
-  if alpha == 1:
-    return res
-  else:
-    return array_ops.where_v2(x > 0, res, alpha * res)
-
-
-def softmax(x, axis=-1):
-  """Softmax of a tensor.
-
-  Arguments:
-      x: A tensor or variable.
-      axis: The dimension softmax would be performed on.
-          The default is -1 which indicates the last dimension.
-
-  Returns:
-      A tensor.
-  """
-  return nn.softmax(x, axis=axis)
-
-
-def softplus(x):
-  """Softplus of a tensor.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return nn.softplus(x)
-
-
-def softsign(x):
-  """Softsign of a tensor.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return nn.softsign(x)
-
-
-def _backtrack_identity(tensor):
-  while tensor.op.type == 'Identity':
-    tensor = tensor.op.inputs[0]
-  return tensor
-
-
-def categorical_crossentropy(target, output, from_logits=False, axis=-1):
-  """Categorical crossentropy between an output tensor and a target tensor.
-
-  Arguments:
-      target: A tensor of the same shape as `output`.
-      output: A tensor resulting from a softmax
-          (unless `from_logits` is True, in which
-          case `output` is expected to be the logits).
-      from_logits: Boolean, whether `output` is the
-          result of a softmax, or is a tensor of logits.
-      axis: Int specifying the channels axis. `axis=-1` corresponds to data
-          format `channels_last', and `axis=1` corresponds to data format
-          `channels_first`.
-
-  Returns:
-      Output tensor.
-
-  Raises:
-      ValueError: if `axis` is neither -1 nor one of the axes of `output`.
-
-  Example:
-
-  >>> a = tf.constant([1., 0., 0., 0., 1., 0., 0., 0., 1.], shape=[3,3])
-  >>> print(a)
-  tf.Tensor(
-    [[1. 0. 0.]
-     [0. 1. 0.]
-     [0. 0. 1.]], shape=(3, 3), dtype=float32)
-  >>> b = tf.constant([.9, .05, .05, .5, .89, .6, .05, .01, .94], shape=[3,3])
-  >>> print(b)
-  tf.Tensor(
-    [[0.9  0.05 0.05]
-     [0.5  0.89 0.6 ]
-     [0.05 0.01 0.94]], shape=(3, 3), dtype=float32)
-  >>> loss = tf.keras.backend.categorical_crossentropy(a, b)
-  >>> print(np.around(loss, 5))
-  [0.10536 0.80467 0.06188]
-  >>> loss = tf.keras.backend.categorical_crossentropy(a, a)
-  >>> print(np.around(loss, 5))
-  [0. 0. 0.]
-
-  """
-  target.shape.assert_is_compatible_with(output.shape)
-  if from_logits:
-    return nn.softmax_cross_entropy_with_logits_v2(
-        labels=target, logits=output, axis=axis)
-
-  if not isinstance(output, (ops.EagerTensor, variables_module.Variable)):
-    output = _backtrack_identity(output)
-    if output.op.type == 'Softmax':
-      # When softmax activation function is used for output operation, we
-      # use logits from the softmax function directly to compute loss in order
-      # to prevent collapsing zero when training.
-      # See b/117284466
-      assert len(output.op.inputs) == 1
-      output = output.op.inputs[0]
-      return nn.softmax_cross_entropy_with_logits_v2(
-          labels=target, logits=output, axis=axis)
-
-  # scale preds so that the class probas of each sample sum to 1
-  output = output / math_ops.reduce_sum(output, axis, True)
-  # Compute cross entropy from probabilities.
-  epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
-  output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-  return -math_ops.reduce_sum(target * math_ops.log(output), axis)
-
-
-def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
-  """Categorical crossentropy with integer targets.
-
-  Arguments:
-      target: An integer tensor.
-      output: A tensor resulting from a softmax
-          (unless `from_logits` is True, in which
-          case `output` is expected to be the logits).
-      from_logits: Boolean, whether `output` is the
-          result of a softmax, or is a tensor of logits.
-      axis: Int specifying the channels axis. `axis=-1` corresponds to data
-          format `channels_last', and `axis=1` corresponds to data format
-          `channels_first`.
-
-  Returns:
-      Output tensor.
-
-  Raises:
-      ValueError: if `axis` is neither -1 nor one of the axes of `output`.
-  """
-  if not from_logits and not isinstance(
-      output, (ops.EagerTensor, variables_module.Variable)):
-    output = _backtrack_identity(output)
-    if output.op.type == 'Softmax':
-      # When softmax activation function is used for output operation, we
-      # use logits from the softmax function directly to compute loss in order
-      # to prevent collapsing zero when training.
-      # See b/117284466
-      assert len(output.op.inputs) == 1
-      output = output.op.inputs[0]
-      from_logits = True
-
-  if not from_logits:
-    epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
-    output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
-    output = math_ops.log(output)
-
-  if isinstance(output.shape, (tuple, list)):
-    output_rank = len(output.shape)
-  else:
-    output_rank = output.shape.ndims
-  if output_rank is not None:
-    axis %= output_rank
-    if axis != output_rank - 1:
-      permutation = list(
-          itertools.chain(range(axis), range(axis + 1, output_rank), [axis]))
-      output = array_ops.transpose(output, perm=permutation)
-  elif axis != -1:
-    raise ValueError(
-        'Cannot compute sparse categorical crossentropy with `axis={}` on an '
-        'output tensor with unknown rank'.format(axis))
-
-  target = cast(target, 'int64')
-
-  # Try to adjust the shape so that rank of labels = rank of logits - 1.
-  output_shape = array_ops.shape_v2(output)
-  target_rank = target.shape.ndims
-
-  update_shape = (
-      target_rank is not None and output_rank is not None and
-      target_rank != output_rank - 1)
-  if update_shape:
-    target = flatten(target)
-    output = array_ops.reshape(output, [-1, output_shape[-1]])
-
-  if py_any(_is_symbolic_tensor(v) for v in [target, output]):
-    with get_graph().as_default():
-      res = nn.sparse_softmax_cross_entropy_with_logits_v2(
-          labels=target, logits=output)
-  else:
-    res = nn.sparse_softmax_cross_entropy_with_logits_v2(
-        labels=target, logits=output)
-
-  if update_shape and output_rank >= 3:
-    # If our output includes timesteps or spatial dimensions we need to reshape
-    return array_ops.reshape(res, output_shape[:-1])
-  else:
-    return res
-
-
-def binary_crossentropy(target, output, from_logits=False):
-  """Binary crossentropy between an output tensor and a target tensor.
-
-  Arguments:
-      target: A tensor with the same shape as `output`.
-      output: A tensor.
-      from_logits: Whether `output` is expected to be a logits tensor.
-          By default, we consider that `output`
-          encodes a probability distribution.
-
-  Returns:
-      A tensor.
-  """
-  if from_logits:
-    return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
-
-  if not isinstance(output, (ops.EagerTensor, variables_module.Variable)):
-    output = _backtrack_identity(output)
-    if output.op.type == 'Sigmoid':
-      # When sigmoid activation function is used for output operation, we
-      # use logits from the sigmoid function directly to compute loss in order
-      # to prevent collapsing zero when training.
-      assert len(output.op.inputs) == 1
-      output = output.op.inputs[0]
-      return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
-
-  epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
-  output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-
-  # Compute cross entropy from probabilities.
-  bce = target * math_ops.log(output + epsilon())
-  bce += (1 - target) * math_ops.log(1 - output + epsilon())
-  return -bce
-
-
-def sigmoid(x):
-  """Element-wise sigmoid.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return nn.sigmoid(x)
-
-
-def hard_sigmoid(x):
-  """Segment-wise linear approximation of sigmoid.
-
-  Faster than sigmoid.
-  Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`.
-  In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  point_two = _constant_to_tensor(0.2, x.dtype.base_dtype)
-  point_five = _constant_to_tensor(0.5, x.dtype.base_dtype)
-  x = math_ops.mul(x, point_two)
-  x = math_ops.add(x, point_five)
-  x = clip_ops.clip_by_value(x, 0., 1.)
-  return x
-
-
-def tanh(x):
-  """Element-wise tanh.
-
-  Arguments:
-      x: A tensor or variable.
-
-  Returns:
-      A tensor.
-  """
-  return nn.tanh(x)
-
-
-def dropout(x, level, noise_shape=None, seed=None):
-  """Sets entries in `x` to zero at random, while scaling the entire tensor.
-
-  Arguments:
-      x: tensor
-      level: fraction of the entries in the tensor
-          that will be set to 0.
-      noise_shape: shape for randomly generated keep/drop flags,
-          must be broadcastable to the shape of `x`
-      seed: random seed to ensure determinism.
-
-  Returns:
-      A tensor.
-  """
-  if seed is None:
-    seed = np.random.randint(10e6)
-  return nn.dropout_v2(x, rate=level, noise_shape=noise_shape, seed=seed)
-
-
-def l2_normalize(x, axis=None):
-  """Normalizes a tensor wrt the L2 norm alongside the specified axis.
-
-  Arguments:
-      x: Tensor or variable.
-      axis: axis along which to perform normalization.
-
-  Returns:
-      A tensor.
-  """
-  return nn.l2_normalize(x, axis=axis)
-
-
-def in_top_k(predictions, targets, k):
-  """Returns whether the `targets` are in the top `k` `predictions`.
-
-  Arguments:
-      predictions: A tensor of shape `(batch_size, classes)` and type `float32`.
-      targets: A 1D tensor of length `batch_size` and type `int32` or `int64`.
-      k: An `int`, number of top elements to consider.
-
-  Returns:
-      A 1D tensor of length `batch_size` and type `bool`.
-      `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k`
-      values of `predictions[i]`.
-  """
-  return nn.in_top_k(predictions, targets, k)
-
-
-# CONVOLUTIONS
-
-
-def _preprocess_conv1d_input(x, data_format):
-  """Transpose and cast the input before the conv1d.
-
-  Arguments:
-      x: input tensor.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-
-  Returns:
-      A tensor.
-  """
-  tf_data_format = 'NWC'  # to pass TF Conv2dNative operations
-  if data_format == 'channels_first':
-    if not _has_nchw_support():
-      x = array_ops.transpose(x, (0, 2, 1))  # NCW -> NWC
-    else:
-      tf_data_format = 'NCW'
-  return x, tf_data_format
-
-
-def _preprocess_conv2d_input(x, data_format, force_transpose=False):
-  """Transpose and cast the input before the conv2d.
-
-  Arguments:
-      x: input tensor.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      force_transpose: Boolean. If True, the input will always be transposed
-          from NCHW to NHWC if `data_format` is `"channels_first"`.
-          If False, the transposition only occurs on CPU (GPU ops are
-          assumed to support NCHW).
-
-  Returns:
-      A tensor.
-  """
-  tf_data_format = 'NHWC'
-  if data_format == 'channels_first':
-    if not _has_nchw_support() or force_transpose:
-      x = array_ops.transpose(x, (0, 2, 3, 1))  # NCHW -> NHWC
-    else:
-      tf_data_format = 'NCHW'
-  return x, tf_data_format
-
-
-def _preprocess_conv3d_input(x, data_format):
-  """Transpose and cast the input before the conv3d.
-
-  Arguments:
-      x: input tensor.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-
-  Returns:
-      A tensor.
-  """
-  tf_data_format = 'NDHWC'
-  if data_format == 'channels_first':
-    if not _has_nchw_support():
-      x = array_ops.transpose(x, (0, 2, 3, 4, 1))
-    else:
-      tf_data_format = 'NCDHW'
-  return x, tf_data_format
-
-
-def _preprocess_padding(padding):
-  """Convert keras' padding to TensorFlow's padding.
-
-  Arguments:
-      padding: string, one of 'same' , 'valid'
-
-  Returns:
-      a string, one of 'SAME', 'VALID'.
-
-  Raises:
-      ValueError: if invalid `padding'`
-  """
-  if padding == 'same':
-    padding = 'SAME'
-  elif padding == 'valid':
-    padding = 'VALID'
-  else:
-    raise ValueError('Invalid padding: ' + str(padding))
-  return padding
-
-
-def conv1d(x,
-           kernel,
-           strides=1,
-           padding='valid',
-           data_format=None,
-           dilation_rate=1):
-  """1D convolution.
-
-  Arguments:
-      x: Tensor or variable.
-      kernel: kernel tensor.
-      strides: stride integer.
-      padding: string, `"same"`, `"causal"` or `"valid"`.
-      data_format: string, one of "channels_last", "channels_first".
-      dilation_rate: integer dilate rate.
-
-  Returns:
-      A tensor, result of 1D convolution.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  kernel_shape = kernel.shape.as_list()
-  if padding == 'causal':
-    # causal (dilated) convolution:
-    left_pad = dilation_rate * (kernel_shape[0] - 1)
-    x = temporal_padding(x, (left_pad, 0))
-    padding = 'valid'
-  padding = _preprocess_padding(padding)
-
-  x, tf_data_format = _preprocess_conv1d_input(x, data_format)
-  x = nn.convolution(
-      input=x,
-      filter=kernel,
-      dilation_rate=dilation_rate,
-      strides=strides,
-      padding=padding,
-      data_format=tf_data_format)
-  if data_format == 'channels_first' and tf_data_format == 'NWC':
-    x = array_ops.transpose(x, (0, 2, 1))  # NWC -> NCW
-  return x
-
-
-def conv2d(x,
-           kernel,
-           strides=(1, 1),
-           padding='valid',
-           data_format=None,
-           dilation_rate=(1, 1)):
-  """2D convolution.
-
-  Arguments:
-      x: Tensor or variable.
-      kernel: kernel tensor.
-      strides: strides tuple.
-      padding: string, `"same"` or `"valid"`.
-      data_format: `"channels_last"` or `"channels_first"`.
-      dilation_rate: tuple of 2 integers.
-
-  Returns:
-      A tensor, result of 2D convolution.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  x = nn.convolution(
-      input=x,
-      filter=kernel,
-      dilation_rate=dilation_rate,
-      strides=strides,
-      padding=padding,
-      data_format=tf_data_format)
-  if data_format == 'channels_first' and tf_data_format == 'NHWC':
-    x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
-  return x
-
-
-def conv2d_transpose(x,
-                     kernel,
-                     output_shape,
-                     strides=(1, 1),
-                     padding='valid',
-                     data_format=None,
-                     dilation_rate=(1, 1)):
-  """2D deconvolution (i.e.
-
-  transposed convolution).
-
-  Arguments:
-      x: Tensor or variable.
-      kernel: kernel tensor.
-      output_shape: 1D int tensor for the output shape.
-      strides: strides tuple.
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      dilation_rate: Tuple of 2 integers.
-
-  Returns:
-      A tensor, result of transposed 2D convolution.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  # `atrous_conv2d_transpose` only supports NHWC format, even on GPU.
-  if data_format == 'channels_first' and dilation_rate != (1, 1):
-    force_transpose = True
-  else:
-    force_transpose = False
-
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format, force_transpose)
-
-  if data_format == 'channels_first' and tf_data_format == 'NHWC':
-    output_shape = (output_shape[0], output_shape[2], output_shape[3],
-                    output_shape[1])
-  if output_shape[0] is None:
-    output_shape = (shape(x)[0],) + tuple(output_shape[1:])
-
-  if isinstance(output_shape, (tuple, list)):
-    output_shape = array_ops.stack(list(output_shape))
-
-  padding = _preprocess_padding(padding)
-  if tf_data_format == 'NHWC':
-    strides = (1,) + strides + (1,)
-  else:
-    strides = (1, 1) + strides
-
-  if dilation_rate == (1, 1):
-    x = nn.conv2d_transpose(x, kernel, output_shape, strides,
-                            padding=padding,
-                            data_format=tf_data_format)
-  else:
-    assert dilation_rate[0] == dilation_rate[1]
-    x = nn.atrous_conv2d_transpose(
-        x,
-        kernel,
-        output_shape,
-        rate=dilation_rate[0],
-        padding=padding)
-  if data_format == 'channels_first' and tf_data_format == 'NHWC':
-    x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
-  return x
-
-
-def separable_conv1d(x,
-                     depthwise_kernel,
-                     pointwise_kernel,
-                     strides=1,
-                     padding='valid',
-                     data_format=None,
-                     dilation_rate=1):
-  """1D convolution with separable filters.
-
-  Arguments:
-      x: input tensor
-      depthwise_kernel: convolution kernel for the depthwise convolution.
-      pointwise_kernel: kernel for the 1x1 convolution.
-      strides: stride integer.
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      dilation_rate: integer dilation rate.
-
-  Returns:
-      Output tensor.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  if isinstance(strides, int):
-    strides = (strides,)
-  if isinstance(dilation_rate, int):
-    dilation_rate = (dilation_rate,)
-
-  x, tf_data_format = _preprocess_conv1d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  if not isinstance(strides, tuple):
-    strides = tuple(strides)
-  if tf_data_format == 'NWC':
-    spatial_start_dim = 1
-    strides = (1,) + strides * 2 + (1,)
-  else:
-    spatial_start_dim = 2
-    strides = (1, 1) + strides * 2
-  x = array_ops.expand_dims(x, spatial_start_dim)
-  depthwise_kernel = array_ops.expand_dims(depthwise_kernel, 0)
-  pointwise_kernel = array_ops.expand_dims(pointwise_kernel, 0)
-  dilation_rate = (1,) + dilation_rate
-
-  x = nn.separable_conv2d(
-      x,
-      depthwise_kernel,
-      pointwise_kernel,
-      strides=strides,
-      padding=padding,
-      rate=dilation_rate,
-      data_format=tf_data_format)
-
-  x = array_ops.squeeze(x, [spatial_start_dim])
-
-  if data_format == 'channels_first' and tf_data_format == 'NWC':
-    x = array_ops.transpose(x, (0, 2, 1))  # NWC -> NCW
-
-  return x
-
-
-def separable_conv2d(x,
-                     depthwise_kernel,
-                     pointwise_kernel,
-                     strides=(1, 1),
-                     padding='valid',
-                     data_format=None,
-                     dilation_rate=(1, 1)):
-  """2D convolution with separable filters.
-
-  Arguments:
-      x: input tensor
-      depthwise_kernel: convolution kernel for the depthwise convolution.
-      pointwise_kernel: kernel for the 1x1 convolution.
-      strides: strides tuple (length 2).
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      dilation_rate: tuple of integers,
-          dilation rates for the separable convolution.
-
-  Returns:
-      Output tensor.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-      ValueError: if `strides` is not a tuple of 2 integers.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-  if len(strides) != 2:
-    raise ValueError('`strides` must be a tuple of 2 integers.')
-
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  if not isinstance(strides, tuple):
-    strides = tuple(strides)
-  if tf_data_format == 'NHWC':
-    strides = (1,) + strides + (1,)
-  else:
-    strides = (1, 1) + strides
-
-  x = nn.separable_conv2d(
-      x,
-      depthwise_kernel,
-      pointwise_kernel,
-      strides=strides,
-      padding=padding,
-      rate=dilation_rate,
-      data_format=tf_data_format)
-  if data_format == 'channels_first' and tf_data_format == 'NHWC':
-    x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
-  return x
-
-
-def depthwise_conv2d(x,
-                     depthwise_kernel,
-                     strides=(1, 1),
-                     padding='valid',
-                     data_format=None,
-                     dilation_rate=(1, 1)):
-  """2D convolution with separable filters.
-
-  Arguments:
-      x: input tensor
-      depthwise_kernel: convolution kernel for the depthwise convolution.
-      strides: strides tuple (length 2).
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      dilation_rate: tuple of integers,
-          dilation rates for the separable convolution.
-
-  Returns:
-      Output tensor.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  if tf_data_format == 'NHWC':
-    strides = (1,) + strides + (1,)
-  else:
-    strides = (1, 1) + strides
-
-  x = nn.depthwise_conv2d(
-      x,
-      depthwise_kernel,
-      strides=strides,
-      padding=padding,
-      rate=dilation_rate,
-      data_format=tf_data_format)
-  if data_format == 'channels_first' and tf_data_format == 'NHWC':
-    x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
-  return x
-
-
-def conv3d(x,
-           kernel,
-           strides=(1, 1, 1),
-           padding='valid',
-           data_format=None,
-           dilation_rate=(1, 1, 1)):
-  """3D convolution.
-
-  Arguments:
-      x: Tensor or variable.
-      kernel: kernel tensor.
-      strides: strides tuple.
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      dilation_rate: tuple of 3 integers.
-
-  Returns:
-      A tensor, result of 3D convolution.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  x, tf_data_format = _preprocess_conv3d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  x = nn.convolution(
-      input=x,
-      filter=kernel,
-      dilation_rate=dilation_rate,
-      strides=strides,
-      padding=padding,
-      data_format=tf_data_format)
-  if data_format == 'channels_first' and tf_data_format == 'NDHWC':
-    x = array_ops.transpose(x, (0, 4, 1, 2, 3))
-  return x
-
-
-def conv3d_transpose(x,
-                     kernel,
-                     output_shape,
-                     strides=(1, 1, 1),
-                     padding='valid',
-                     data_format=None):
-  """3D deconvolution (i.e.
-
-  transposed convolution).
-
-  Arguments:
-      x: input tensor.
-      kernel: kernel tensor.
-      output_shape: 1D int tensor for the output shape.
-      strides: strides tuple.
-      padding: string, "same" or "valid".
-      data_format: string, `"channels_last"` or `"channels_first"`.
-
-  Returns:
-      A tensor, result of transposed 3D convolution.
-
-  Raises:
-      ValueError: if `data_format` is neither `channels_last` or
-      `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-  if isinstance(output_shape, (tuple, list)):
-    output_shape = array_ops.stack(output_shape)
-
-  x, tf_data_format = _preprocess_conv3d_input(x, data_format)
-
-  if data_format == 'channels_first' and tf_data_format == 'NDHWC':
-    output_shape = (output_shape[0], output_shape[2], output_shape[3],
-                    output_shape[4], output_shape[1])
-  if output_shape[0] is None:
-    output_shape = (array_ops.shape(x)[0],) + tuple(output_shape[1:])
-    output_shape = array_ops.stack(list(output_shape))
-
-  padding = _preprocess_padding(padding)
-  if tf_data_format == 'NDHWC':
-    strides = (1,) + strides + (1,)
-  else:
-    strides = (1, 1) + strides
-
-  x = nn.conv3d_transpose(
-      x,
-      kernel,
-      output_shape,
-      strides,
-      padding=padding,
-      data_format=tf_data_format)
-  if data_format == 'channels_first' and tf_data_format == 'NDHWC':
-    x = array_ops.transpose(x, (0, 4, 1, 2, 3))
-  return x
-
-
-def pool2d(x,
-           pool_size,
-           strides=(1, 1),
-           padding='valid',
-           data_format=None,
-           pool_mode='max'):
-  """2D Pooling.
-
-  Arguments:
-      x: Tensor or variable.
-      pool_size: tuple of 2 integers.
-      strides: tuple of 2 integers.
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      pool_mode: string, `"max"` or `"avg"`.
-
-  Returns:
-      A tensor, result of 2D pooling.
-
-  Raises:
-      ValueError: if `data_format` is neither `"channels_last"` or
-      `"channels_first"`.
-      ValueError: if `pool_size` is not a tuple of 2 integers.
-      ValueError: if `strides` is not a tuple of 2 integers.
-      ValueError: if `pool_mode` is neither `"max"` or `"avg"`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-  if len(pool_size) != 2:
-    raise ValueError('`pool_size` must be a tuple of 2 integers.')
-  if len(strides) != 2:
-    raise ValueError('`strides` must be a tuple of 2 integers.')
-
-  x, tf_data_format = _preprocess_conv2d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  if tf_data_format == 'NHWC':
-    strides = (1,) + strides + (1,)
-    pool_size = (1,) + pool_size + (1,)
-  else:
-    strides = (1, 1) + strides
-    pool_size = (1, 1) + pool_size
-
-  if pool_mode == 'max':
-    x = nn.max_pool(
-        x, pool_size, strides, padding=padding, data_format=tf_data_format)
-  elif pool_mode == 'avg':
-    x = nn.avg_pool(
-        x, pool_size, strides, padding=padding, data_format=tf_data_format)
-  else:
-    raise ValueError('Invalid pooling mode: ' + str(pool_mode))
-
-  if data_format == 'channels_first' and tf_data_format == 'NHWC':
-    x = array_ops.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
-  return x
-
-
-def pool3d(x,
-           pool_size,
-           strides=(1, 1, 1),
-           padding='valid',
-           data_format=None,
-           pool_mode='max'):
-  """3D Pooling.
-
-  Arguments:
-      x: Tensor or variable.
-      pool_size: tuple of 3 integers.
-      strides: tuple of 3 integers.
-      padding: string, `"same"` or `"valid"`.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-      pool_mode: string, `"max"` or `"avg"`.
-
-  Returns:
-      A tensor, result of 3D pooling.
-
-  Raises:
-      ValueError: if `data_format` is neither `"channels_last"` or
-      `"channels_first"`.
-      ValueError: if `pool_mode` is neither `"max"` or `"avg"`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  x, tf_data_format = _preprocess_conv3d_input(x, data_format)
-  padding = _preprocess_padding(padding)
-  if tf_data_format == 'NDHWC':
-    strides = (1,) + strides + (1,)
-    pool_size = (1,) + pool_size + (1,)
-  else:
-    strides = (1, 1) + strides
-    pool_size = (1, 1) + pool_size
-
-  if pool_mode == 'max':
-    x = nn.max_pool3d(
-        x, pool_size, strides, padding=padding, data_format=tf_data_format)
-  elif pool_mode == 'avg':
-    x = nn.avg_pool3d(
-        x, pool_size, strides, padding=padding, data_format=tf_data_format)
-  else:
-    raise ValueError('Invalid pooling mode: ' + str(pool_mode))
-
-  if data_format == 'channels_first' and tf_data_format == 'NDHWC':
-    x = array_ops.transpose(x, (0, 4, 1, 2, 3))
-  return x
-
-
-def local_conv(inputs,
-               kernel,
-               kernel_size,
-               strides,
-               output_shape,
-               data_format=None):
-  """Apply N-D convolution with un-shared weights.
-
-  Arguments:
-      inputs: (N+2)-D tensor with shape
-          (batch_size, channels_in, d_in1, ..., d_inN)
-          if data_format='channels_first', or
-          (batch_size, d_in1, ..., d_inN, channels_in)
-          if data_format='channels_last'.
-      kernel: the unshared weight for N-D convolution,
-          with shape (output_items, feature_dim, channels_out), where
-          feature_dim = np.prod(kernel_size) * channels_in,
-          output_items = np.prod(output_shape).
-      kernel_size: a tuple of N integers, specifying the
-          spatial dimensions of the N-D convolution window.
-      strides: a tuple of N integers, specifying the strides
-          of the convolution along the spatial dimensions.
-      output_shape: a tuple of (d_out1, ..., d_outN) specifying the spatial
-          dimensionality of the output.
-      data_format: string, "channels_first" or "channels_last".
-
-  Returns:
-      An (N+2)-D tensor with shape:
-      (batch_size, channels_out) + output_shape
-      if data_format='channels_first', or:
-      (batch_size,) + output_shape + (channels_out,)
-      if data_format='channels_last'.
-
-  Raises:
-      ValueError: if `data_format` is neither
-      `channels_last` nor `channels_first`.
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-
-  kernel_shape = int_shape(kernel)
-  feature_dim = kernel_shape[1]
-  channels_out = kernel_shape[-1]
-  ndims = len(output_shape)
-  spatial_dimensions = list(range(ndims))
-
-  xs = []
-  output_axes_ticks = [range(axis_max) for axis_max in output_shape]
-  for position in itertools.product(*output_axes_ticks):
-    slices = [slice(None)]
-
-    if data_format == 'channels_first':
-      slices.append(slice(None))
-
-    slices.extend(
-        slice(position[d] * strides[d], position[d] * strides[d] +
-              kernel_size[d]) for d in spatial_dimensions)
-
-    if data_format == 'channels_last':
-      slices.append(slice(None))
-
-    xs.append(reshape(inputs[slices], (1, -1, feature_dim)))
-
-  x_aggregate = concatenate(xs, axis=0)
-  output = batch_dot(x_aggregate, kernel)
-  output = reshape(output, output_shape + (-1, channels_out))
-
-  if data_format == 'channels_first':
-    permutation = [ndims, ndims + 1] + spatial_dimensions
-  else:
-    permutation = [ndims] + spatial_dimensions + [ndims + 1]
-
-  return permute_dimensions(output, permutation)
-
-
-def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None):
-  """Apply 1D conv with un-shared weights.
-
-  Arguments:
-      inputs: 3D tensor with shape:
-          (batch_size, steps, input_dim)
-          if data_format is "channels_last" or
-          (batch_size, input_dim, steps)
-          if data_format is "channels_first".
-      kernel: the unshared weight for convolution,
-          with shape (output_length, feature_dim, filters).
-      kernel_size: a tuple of a single integer,
-          specifying the length of the 1D convolution window.
-      strides: a tuple of a single integer,
-          specifying the stride length of the convolution.
-      data_format: the data format, channels_first or channels_last.
-
-  Returns:
-      A 3d tensor with shape:
-      (batch_size, output_length, filters)
-      if data_format='channels_first'
-      or 3D tensor with shape:
-      (batch_size, filters, output_length)
-      if data_format='channels_last'.
-  """
-  output_shape = (kernel.shape[0],)
-  return local_conv(inputs,
-                    kernel,
-                    kernel_size,
-                    strides,
-                    output_shape,
-                    data_format)
-
-
-def local_conv2d(inputs,
-                 kernel,
-                 kernel_size,
-                 strides,
-                 output_shape,
-                 data_format=None):
-  """Apply 2D conv with un-shared weights.
-
-  Arguments:
-      inputs: 4D tensor with shape:
-          (batch_size, filters, new_rows, new_cols)
-          if data_format='channels_first'
-          or 4D tensor with shape:
-          (batch_size, new_rows, new_cols, filters)
-          if data_format='channels_last'.
-      kernel: the unshared weight for convolution,
-          with shape (output_items, feature_dim, filters).
-      kernel_size: a tuple of 2 integers, specifying the
-          width and height of the 2D convolution window.
-      strides: a tuple of 2 integers, specifying the strides
-          of the convolution along the width and height.
-      output_shape: a tuple with (output_row, output_col).
-      data_format: the data format, channels_first or channels_last.
-
-  Returns:
-      A 4D tensor with shape:
-      (batch_size, filters, new_rows, new_cols)
-      if data_format='channels_first'
-      or 4D tensor with shape:
-      (batch_size, new_rows, new_cols, filters)
-      if data_format='channels_last'.
-  """
-  return local_conv(inputs,
-                    kernel,
-                    kernel_size,
-                    strides,
-                    output_shape,
-                    data_format)
-
-
-def bias_add(x, bias, data_format=None):
-  """Adds a bias vector to a tensor.
-
-  Arguments:
-      x: Tensor or variable.
-      bias: Bias tensor to add.
-      data_format: string, `"channels_last"` or `"channels_first"`.
-
-  Returns:
-      Output tensor.
-
-  Raises:
-      ValueError: In one of the two cases below:
-                  1. invalid `data_format` argument.
-                  2. invalid bias shape.
-                     the bias should be either a vector or
-                     a tensor with ndim(x) - 1 dimension
-  """
-  if data_format is None:
-    data_format = image_data_format()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-  bias_shape = int_shape(bias)
-  if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1:
-    raise ValueError(
-        'Unexpected bias dimensions %d, expect to be 1 or %d dimensions' %
-        (len(bias_shape), ndim(x)))
-
-  if len(bias_shape) == 1:
-    if data_format == 'channels_first':
-      return nn.bias_add(x, bias, data_format='NCHW')
-    return nn.bias_add(x, bias, data_format='NHWC')
-  if ndim(x) in (3, 4, 5):
-    if data_format == 'channels_first':
-      bias_reshape_axis = (1, bias_shape[-1]) + bias_shape[:-1]
-      return x + reshape(bias, bias_reshape_axis)
-    return x + reshape(bias, (1,) + bias_shape)
-  return nn.bias_add(x, bias)
-
-
-# RANDOMNESS
-
-
-def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
-  """Returns a tensor with normal distribution of values.
-
-  It is an alias to `tf.random.normal`.
-
-  Arguments:
-      shape: A tuple of integers, the shape of tensor to create.
-      mean: A float, the mean value of the normal distribution to draw samples.
-        Default to 0.0.
-      stddev: A float, the standard deviation of the normal distribution
-        to draw samples. Default to 1.0.
-      dtype: `tf.dtypes.DType`, dtype of returned tensor. Default to use Keras
-        backend dtype which is float32.
-      seed: Integer, random seed. Will use a random numpy integer when not
-        specified.
-
-  Returns:
-      A tensor with normal distribution of values.
-  """
-  if dtype is None:
-    dtype = floatx()
-  if seed is None:
-    seed = np.random.randint(10e6)
-  return random_ops.random_normal(
-      shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
-
-
-def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
-  """Returns a tensor with uniform distribution of values.
-
-  Arguments:
-      shape: A tuple of integers, the shape of tensor to create.
-      minval: A float, lower boundary of the uniform distribution
-          to draw samples.
-      maxval: A float, upper boundary of the uniform distribution
-          to draw samples.
-      dtype: String, dtype of returned tensor.
-      seed: Integer, random seed.
-
-  Returns:
-      A tensor.
-  """
-  if dtype is None:
-    dtype = floatx()
-  if seed is None:
-    seed = np.random.randint(10e6)
-  return random_ops.random_uniform(
-      shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed)
-
-
-@deprecated(None, 'Use `tf.keras.backend.random_bernoulli` instead.')
-def random_binomial(shape, p=0.0, dtype=None, seed=None):
-  """Returns a tensor with random binomial distribution of values.
-
-  DEPRECATED, use `tf.keras.backend.random_bernoulli` instead.
-
-  The binomial distribution with parameters `n` and `p` is the probability
-  distribution of the number of successful Bernoulli process. Only supports
-  `n` = 1 for now.
-
-  Arguments:
-      shape: A tuple of integers, the shape of tensor to create.
-      p: A float, `0. <= p <= 1`, probability of binomial distribution.
-      dtype: String, dtype of returned tensor.
-      seed: Integer, random seed.
-
-  Returns:
-      A tensor.
-  """
-  if dtype is None:
-    dtype = floatx()
-  if seed is None:
-    seed = np.random.randint(10e6)
-  return array_ops.where_v2(
-      random_ops.random_uniform(shape, dtype=dtype, seed=seed) <= p,
-      array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
-
-
-def random_bernoulli(shape, p=0.0, dtype=None, seed=None):
-  """Returns a tensor with random bernoulli distribution of values.
-
-  Arguments:
-      shape: A tuple of integers, the shape of tensor to create.
-      p: A float, `0. <= p <= 1`, probability of bernoulli distribution.
-      dtype: String, dtype of returned tensor.
-      seed: Integer, random seed.
-
-  Returns:
-      A tensor.
-  """
-  return random_binomial(shape, p, dtype, seed)
-
-
-def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
-  """Returns a tensor with truncated random normal distribution of values.
-
-  The generated values follow a normal distribution
-  with specified mean and standard deviation,
-  except that values whose magnitude is more than
-  two standard deviations from the mean are dropped and re-picked.
-
-  Arguments:
-      shape: A tuple of integers, the shape of tensor to create.
-      mean: Mean of the values.
-      stddev: Standard deviation of the values.
-      dtype: String, dtype of returned tensor.
-      seed: Integer, random seed.
-
-  Returns:
-      A tensor.
-  """
-  if dtype is None:
-    dtype = floatx()
-  if seed is None:
-    seed = np.random.randint(10e6)
-  return random_ops.truncated_normal(
-      shape, mean, stddev, dtype=dtype, seed=seed)
-
-
-# CTC
-# TensorFlow has a native implementation, but it uses sparse tensors
-# and therefore requires a wrapper for Keras. The functions below convert
-# dense to sparse tensors and also wraps up the beam search code that is
-# in TensorFlow's CTC implementation
-
-
-def ctc_label_dense_to_sparse(labels, label_lengths):
-  """Converts CTC labels from dense to sparse.
-
-  Arguments:
-      labels: dense CTC labels.
-      label_lengths: length of the labels.
-
-  Returns:
-      A sparse tensor representation of the labels.
-  """
-  label_shape = array_ops.shape(labels)
-  num_batches_tns = array_ops.stack([label_shape[0]])
-  max_num_labels_tns = array_ops.stack([label_shape[1]])
-
-  def range_less_than(old_input, current_input):
-    return array_ops.expand_dims(
-        math_ops.range(array_ops.shape(old_input)[1]), 0) < array_ops.fill(
-            max_num_labels_tns, current_input)
-
-  init = math_ops.cast(
-      array_ops.fill([1, label_shape[1]], 0), dtypes_module.bool)
-  dense_mask = functional_ops.scan(
-      range_less_than, label_lengths, initializer=init, parallel_iterations=1)
-  dense_mask = dense_mask[:, 0, :]
-
-  label_array = array_ops.reshape(
-      array_ops.tile(math_ops.range(0, label_shape[1]), num_batches_tns),
-      label_shape)
-  label_ind = array_ops.boolean_mask(label_array, dense_mask)
-
-  batch_array = array_ops.transpose(
-      array_ops.reshape(
-          array_ops.tile(math_ops.range(0, label_shape[0]), max_num_labels_tns),
-          reverse(label_shape, 0)))
-  batch_ind = array_ops.boolean_mask(batch_array, dense_mask)
-  indices = array_ops.transpose(
-      array_ops.reshape(concatenate([batch_ind, label_ind], axis=0), [2, -1]))
-
-  vals_sparse = array_ops.gather_nd(labels, indices)
-
-  return sparse_tensor.SparseTensor(
-      math_ops.cast(indices, dtypes_module.int64), vals_sparse,
-      math_ops.cast(label_shape, dtypes_module.int64))
-
-
-def ctc_batch_cost(y_true, y_pred, input_length, label_length):
-  """Runs CTC loss algorithm on each batch element.
-
-  Arguments:
-      y_true: tensor `(samples, max_string_length)`
-          containing the truth labels.
-      y_pred: tensor `(samples, time_steps, num_categories)`
-          containing the prediction, or output of the softmax.
-      input_length: tensor `(samples, 1)` containing the sequence length for
-          each batch item in `y_pred`.
-      label_length: tensor `(samples, 1)` containing the sequence length for
-          each batch item in `y_true`.
-
-  Returns:
-      Tensor with shape (samples,1) containing the
-          CTC loss of each element.
-  """
-  label_length = math_ops.cast(
-      array_ops.squeeze(label_length, axis=-1), dtypes_module.int32)
-  input_length = math_ops.cast(
-      array_ops.squeeze(input_length, axis=-1), dtypes_module.int32)
-  sparse_labels = math_ops.cast(
-      ctc_label_dense_to_sparse(y_true, label_length), dtypes_module.int32)
-
-  y_pred = math_ops.log(array_ops.transpose(y_pred, perm=[1, 0, 2]) + epsilon())
-
-  return array_ops.expand_dims(
-      ctc.ctc_loss(
-          inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1)
-
-
-def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
-  """Decodes the output of a softmax.
-
-  Can use either greedy search (also known as best path)
-  or a constrained dictionary search.
-
-  Arguments:
-      y_pred: tensor `(samples, time_steps, num_categories)`
-          containing the prediction, or output of the softmax.
-      input_length: tensor `(samples, )` containing the sequence length for
-          each batch item in `y_pred`.
-      greedy: perform much faster best-path search if `true`.
-          This does not use a dictionary.
-      beam_width: if `greedy` is `false`: a beam search decoder will be used
-          with a beam of this width.
-      top_paths: if `greedy` is `false`,
-          how many of the most probable paths will be returned.
-
-  Returns:
-      Tuple:
-          List: if `greedy` is `true`, returns a list of one element that
-              contains the decoded sequence.
-              If `false`, returns the `top_paths` most probable
-              decoded sequences.
-              Important: blank labels are returned as `-1`.
-          Tensor `(top_paths, )` that contains
-              the log probability of each decoded sequence.
-  """
-  y_pred = math_ops.log(array_ops.transpose(y_pred, perm=[1, 0, 2]) + epsilon())
-  input_length = math_ops.cast(input_length, dtypes_module.int32)
-
-  if greedy:
-    (decoded, log_prob) = ctc.ctc_greedy_decoder(
-        inputs=y_pred, sequence_length=input_length)
-  else:
-    (decoded, log_prob) = ctc.ctc_beam_search_decoder(
-        inputs=y_pred,
-        sequence_length=input_length,
-        beam_width=beam_width,
-        top_paths=top_paths)
-  decoded_dense = [
-      sparse_ops.sparse_to_dense(
-          st.indices, st.dense_shape, st.values, default_value=-1)
-      for st in decoded
-  ]
-  return (decoded_dense, log_prob)
-
-
-# HIGH ORDER FUNCTIONS
-
-
-def map_fn(fn, elems, name=None, dtype=None):
-  """Map the function fn over the elements elems and return the outputs.
-
-  Arguments:
-      fn: Callable that will be called upon each element in elems
-      elems: tensor
-      name: A string name for the map node in the graph
-      dtype: Output data type.
-
-  Returns:
-      Tensor with dtype `dtype`.
-  """
-  return map_fn_lib.map_fn(fn, elems, name=name, dtype=dtype)
-
-
-def foldl(fn, elems, initializer=None, name=None):
-  """Reduce elems using fn to combine them from left to right.
-
-  Arguments:
-      fn: Callable that will be called upon each element in elems and an
-          accumulator, for instance `lambda acc, x: acc + x`
-      elems: tensor
-      initializer: The first value used (`elems[0]` in case of None)
-      name: A string name for the foldl node in the graph
-
-  Returns:
-      Tensor with same type and shape as `initializer`.
-  """
-  return functional_ops.foldl(fn, elems, initializer=initializer, name=name)
-
-
-def foldr(fn, elems, initializer=None, name=None):
-  """Reduce elems using fn to combine them from right to left.
-
-  Arguments:
-      fn: Callable that will be called upon each element in elems and an
-          accumulator, for instance `lambda acc, x: acc + x`
-      elems: tensor
-      initializer: The first value used (`elems[-1]` in case of None)
-      name: A string name for the foldr node in the graph
-
-  Returns:
-      Same type and shape as initializer
-  """
-  return functional_ops.foldr(fn, elems, initializer=initializer, name=name)
-
-# Load Keras default configuration from config file if present.
-# Set Keras base dir path given KERAS_HOME env variable, if applicable.
-# Otherwise either ~/.keras or /tmp.
-if 'KERAS_HOME' in os.environ:
-  _keras_dir = os.environ.get('KERAS_HOME')
-else:
-  _keras_base_dir = os.path.expanduser('~')
-  _keras_dir = os.path.join(_keras_base_dir, '.keras')
-_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json'))
-if os.path.exists(_config_path):
-  try:
-    with open(_config_path) as fh:
-      _config = json.load(fh)
-  except ValueError:
-    _config = {}
-  _floatx = _config.get('floatx', floatx())
-  assert _floatx in {'float16', 'float32', 'float64'}
-  _epsilon = _config.get('epsilon', epsilon())
-  assert isinstance(_epsilon, float)
-  _image_data_format = _config.get('image_data_format', image_data_format())
-  assert _image_data_format in {'channels_last', 'channels_first'}
-  set_floatx(_floatx)
-  set_epsilon(_epsilon)
-  set_image_data_format(_image_data_format)
-
-# Save config file.
-if not os.path.exists(_keras_dir):
-  try:
-    os.makedirs(_keras_dir)
-  except OSError:
-    # Except permission denied and potential race conditions
-    # in multi-threaded environments.
-    pass
-
-if not os.path.exists(_config_path):
-  _config = {
-      'floatx': floatx(),
-      'epsilon': epsilon(),
-      'backend': 'tensorflow',
-      'image_data_format': image_data_format()
-  }
-  try:
-    with open(_config_path, 'w') as f:
-      f.write(json.dumps(_config, indent=4))
-  except IOError:
-    # Except permission denied.
-    pass
-
-
-def configure_and_create_distributed_session(distribution_strategy):
-  """Configure session config and create a session with it."""
-
-  def _create_session(distribution_strategy):
-    """Create the Distributed Strategy session."""
-    session_config = get_default_session_config()
-
-    # If a session already exists, merge in its config; in the case there is a
-    # conflict, take values of the existing config.
-    global _SESSION
-    if getattr(_SESSION, 'session', None) and _SESSION.session._config:
-      session_config.MergeFrom(_SESSION.session._config)
-
-    if is_tpu_strategy(distribution_strategy):
-      # TODO(priyag, yuefengz): Remove this workaround when Distribute
-      # Coordinator is integrated with keras and we can create a session from
-      # there.
-      distribution_strategy.configure(session_config)
-      master = distribution_strategy.extended._tpu_cluster_resolver.master()  # pylint: disable=protected-access
-      session = session_module.Session(config=session_config, target=master)
-    else:
-      worker_context = dc_context.get_current_worker_context()
-      if worker_context:
-        dc_session_config = worker_context.session_config
-        # Merge the default session config to the one from distribute
-        # coordinator, which is fine for now since they don't have
-        # conflicting configurations.
-        dc_session_config.MergeFrom(session_config)
-        session = session_module.Session(
-            config=dc_session_config, target=worker_context.master_target)
-      else:
-        distribution_strategy.configure(session_config)
-        session = session_module.Session(config=session_config)
-
-    set_session(session)
-
-  if distribution_strategy.extended._in_multi_worker_mode():
-    dc.run_distribute_coordinator(
-        _create_session,
-        distribution_strategy,
-        mode=dc.CoordinatorMode.INDEPENDENT_WORKER)
-  else:
-    _create_session(distribution_strategy)
-
-
-def is_tpu_strategy(strategy):
-  """We're executing TPU Strategy."""
-  return (strategy is not None and
-          strategy.__class__.__name__.startswith('TPUStrategy'))
-
-
-def cast_variables_to_tensor(tensors):
-
-  def _cast_variables_to_tensor(tensor):
-    if isinstance(tensor, variables_module.Variable):
-      return array_ops.identity(tensor)
-    return tensor
-
-  return nest.map_structure(_cast_variables_to_tensor, tensors)
-
-
-def _is_symbolic_tensor(x):
-  return tensor_util.is_tensor(x) and not isinstance(x, ops.EagerTensor)
-
-
-def convert_inputs_if_ragged(inputs):
-  """Converts any ragged tensors to dense."""
-
-  def _convert_ragged_input(inputs):
-    if isinstance(inputs, ragged_tensor.RaggedTensor):
-      return inputs.to_tensor()
-    return inputs
-
-  flat_inputs = nest.flatten(inputs)
-  contains_ragged = py_any(
-      isinstance(i, ragged_tensor.RaggedTensor) for i in flat_inputs)
-
-  if not contains_ragged:
-    return inputs, None
-
-  inputs = nest.map_structure(_convert_ragged_input, inputs)
-  # Multiple mask are not yet supported, so one mask is used on all inputs.
-  # We approach this similarly when using row lengths to ignore steps.
-  nested_row_lengths = math_ops.cast(flat_inputs[0].nested_row_lengths()[0],
-                                     'int32')
-  return inputs, nested_row_lengths
-
-
-def maybe_convert_to_ragged(is_ragged_input, output, nested_row_lengths):
-  """Converts any ragged input back to its initial structure."""
-  if not is_ragged_input:
-    return output
-
-  return ragged_tensor.RaggedTensor.from_tensor(output, nested_row_lengths)
-
-
-class ContextValueCache(weakref.WeakKeyDictionary):
-  """Container that caches (possibly tensor) values based on the context.
-
-  This class is similar to defaultdict, where values may be produced by the
-  default factory specified during initialization. This class also has a default
-  value for the key (when key is `None`) -- the key is set to the the current
-  graph or eager context. The default factories for key and value are only used
-  in `__getitem__` and `setdefault`. The `.get()` behavior remains the same.
-
-  This object will return the value of the current graph or closest parent graph
-  if the current graph is a function. This is to reflect the fact that if a
-  tensor is created in eager/graph, child functions may capture that tensor.
-
-  The default factory method may accept keyword arguments (unlike defaultdict,
-  which only accepts callables with 0 arguments). To pass keyword arguments to
-  `default_factory`, use the `setdefault` method instead of `__getitem__`.
-
-  An example of how this class can be used in different contexts:
-
-  ```
-  cache = ContextValueCache(int)
-
-  # Eager mode
-  cache[None] += 2
-  cache[None] += 4
-  assert cache[None] == 6
-
-  # Graph mode
-  with tf.Graph().as_default() as g:
-    cache[None] += 5
-    cache[g] += 3
-  assert cache[g] == 8
-  ```
-
-  Example of a default factory with arguments:
-
-  ```
-  cache = ContextValueCache(lambda x: x + 1)
-  g = tf.get_default_graph()
-
-  # Example with keyword argument.
-  value = cache.setdefault(key=g, kwargs={'x': 3})
-  assert cache[g] == 4
-  ```
-  """
-
-  def __init__(self, default_factory):
-    self.default_factory = default_factory
-    weakref.WeakKeyDictionary.__init__(self)
-
-  def _key(self):
-    if context.executing_eagerly():
-      return _DUMMY_EAGER_GRAPH.key
-    else:
-      return ops.get_default_graph()
-
-  def _get_parent_graph(self, graph):
-    """Returns the parent graph or dummy eager object."""
-    # TODO(b/149317164): Currently FuncGraphs use ops.get_default_graph() as the
-    # outer graph. This results in outer_graph always being a Graph,
-    # even in eager mode (get_default_graph will create a new Graph if there
-    # isn't a default graph). Because of this bug, we have to specially set the
-    # key when eager execution is enabled.
-    parent_graph = graph.outer_graph
-    if (not isinstance(parent_graph, func_graph.FuncGraph) and
-        ops.executing_eagerly_outside_functions()):
-      return _DUMMY_EAGER_GRAPH.key
-    return parent_graph
-
-  def _get_recursive(self, key):
-    """Gets the value at key or the closest parent graph."""
-    value = self.get(key)
-    if value is not None:
-      return value
-
-    # Since FuncGraphs are able to capture tensors and variables from their
-    # parent graphs, recursively search to see if there is a value stored for
-    # one of the parent graphs.
-    if isinstance(key, func_graph.FuncGraph):
-      return self._get_recursive(self._get_parent_graph(key))
-    return None
-
-  def __getitem__(self, key):
-    """Gets the value at key (or current context), or sets default value.
-
-    Args:
-      key: May be `None` or `Graph`object. When `None`, the key is set to the
-        current context.
-
-    Returns:
-      Either the cached or default value.
-    """
-    if key is None:
-      key = self._key()
-
-    value = self._get_recursive(key)
-    if value is None:
-      value = self[key] = self.default_factory()  # pylint:disable=not-callable
-    return value
-
-  def setdefault(self, key=None, default=None, kwargs=None):
-    """Sets the default value if key is not in dict, and returns the value."""
-    if key is None:
-      key = self._key()
-    kwargs = kwargs or {}
-
-    if default is None and key not in self:
-      default = self.default_factory(**kwargs)
-    return weakref.WeakKeyDictionary.setdefault(self, key, default)
-
-# This dictionary holds a mapping {graph: learning_phase}. In eager mode, a
-# dummy object is used.
-# A learning phase is a bool tensor used to run Keras models in
-# either train mode (learning_phase == 1) or test mode (learning_phase == 0).
-_GRAPH_LEARNING_PHASES = ContextValueCache(_default_learning_phase)
-
-# This dictionary holds a mapping {graph: set_of_freezable_variables}.
-# Each set tracks objects created via `freezable_variable` in the graph.
-_FREEZABLE_VARS = ContextValueCache(object_identity.ObjectIdentityWeakSet)
-
-# This dictionary holds a mapping between a graph and variables to initialize
-# in the graph.
-_GRAPH_VARIABLES = ContextValueCache(object_identity.ObjectIdentityWeakSet)
-
-# This dictionary holds a mapping between a graph and TF optimizers created in
-# the graph.
-_GRAPH_TF_OPTIMIZERS = ContextValueCache(object_identity.ObjectIdentityWeakSet)
diff --git a/tensorflow/python/frozen_keras/backend_config.py b/tensorflow/python/frozen_keras/backend_config.py
deleted file mode 100644
index cbfe0f85fcf..00000000000
--- a/tensorflow/python/frozen_keras/backend_config.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras backend config API."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-# The type of float to use throughout a session.
-_FLOATX = 'float32'
-
-# Epsilon fuzz factor used throughout the codebase.
-_EPSILON = 1e-7
-
-# Default image data format, one of "channels_last", "channels_first".
-_IMAGE_DATA_FORMAT = 'channels_last'
-
-
-def epsilon():
-  """Returns the value of the fuzz factor used in numeric expressions.
-
-  Returns:
-      A float.
-
-  Example:
-  >>> tf.keras.backend.epsilon()
-  1e-07
-  """
-  return _EPSILON
-
-
-def set_epsilon(value):
-  """Sets the value of the fuzz factor used in numeric expressions.
-
-  Arguments:
-      value: float. New value of epsilon.
-
-  Example:
-  >>> tf.keras.backend.epsilon()
-  1e-07
-  >>> tf.keras.backend.set_epsilon(1e-5)
-  >>> tf.keras.backend.epsilon()
-  1e-05
-   >>> tf.keras.backend.set_epsilon(1e-7)
-  """
-  global _EPSILON
-  _EPSILON = value
-
-
-def floatx():
-  """Returns the default float type, as a string.
-
-  E.g. `'float16'`, `'float32'`, `'float64'`.
-
-  Returns:
-      String, the current default float type.
-
-  Example:
-  >>> tf.keras.backend.floatx()
-  'float32'
-  """
-  return _FLOATX
-
-
-def set_floatx(value):
-  """Sets the default float type.
-
-  Note: It is not recommended to set this to float16 for training, as this will
-  likely cause numeric stability issues. Instead, mixed precision, which is
-  using a mix of float16 and float32, can be used by calling
-  `tf.keras.mixed_precision.experimental.set_policy('mixed_float16')`. See the
-  [mixed precision
-  guide](https://www.tensorflow.org/guide/keras/mixed_precision) for details.
-
-  Arguments:
-      value: String; `'float16'`, `'float32'`, or `'float64'`.
-
-  Example:
-  >>> tf.keras.backend.floatx()
-  'float32'
-  >>> tf.keras.backend.set_floatx('float64')
-  >>> tf.keras.backend.floatx()
-  'float64'
-  >>> tf.keras.backend.set_floatx('float32')
-
-  Raises:
-      ValueError: In case of invalid value.
-  """
-  global _FLOATX
-  if value not in {'float16', 'float32', 'float64'}:
-    raise ValueError('Unknown floatx type: ' + str(value))
-  _FLOATX = str(value)
-
-
-def image_data_format():
-  """Returns the default image data format convention.
-
-  Returns:
-      A string, either `'channels_first'` or `'channels_last'`
-
-  Example:
-  >>> tf.keras.backend.image_data_format()
-  'channels_last'
-  """
-  return _IMAGE_DATA_FORMAT
-
-
-def set_image_data_format(data_format):
-  """Sets the value of the image data format convention.
-
-  Arguments:
-      data_format: string. `'channels_first'` or `'channels_last'`.
-
-  Example:
-  >>> tf.keras.backend.image_data_format()
-  'channels_last'
-  >>> tf.keras.backend.set_image_data_format('channels_first')
-  >>> tf.keras.backend.image_data_format()
-  'channels_first'
-  >>> tf.keras.backend.set_image_data_format('channels_last')
-
-  Raises:
-      ValueError: In case of invalid `data_format` value.
-  """
-  global _IMAGE_DATA_FORMAT
-  if data_format not in {'channels_last', 'channels_first'}:
-    raise ValueError('Unknown data_format: ' + str(data_format))
-  _IMAGE_DATA_FORMAT = str(data_format)
diff --git a/tensorflow/python/frozen_keras/backend_config_test.py b/tensorflow/python/frozen_keras/backend_config_test.py
deleted file mode 100644
index f21a8d1ee58..00000000000
--- a/tensorflow/python/frozen_keras/backend_config_test.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for backend_config."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.frozen_keras import backend_config
-from tensorflow.python.keras import combinations
-from tensorflow.python.platform import test
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class BackendConfigTest(test.TestCase):
-
-  def test_backend(self):
-    self.assertEqual(backend.backend(), 'tensorflow')
-
-  def test_epsilon(self):
-    epsilon = 1e-2
-    backend_config.set_epsilon(epsilon)
-    self.assertEqual(backend_config.epsilon(), epsilon)
-    backend_config.set_epsilon(1e-7)
-    self.assertEqual(backend_config.epsilon(), 1e-7)
-
-  def test_floatx(self):
-    floatx = 'float64'
-    backend_config.set_floatx(floatx)
-    self.assertEqual(backend_config.floatx(), floatx)
-    backend_config.set_floatx('float32')
-    self.assertEqual(backend_config.floatx(), 'float32')
-
-  def test_image_data_format(self):
-    image_data_format = 'channels_first'
-    backend_config.set_image_data_format(image_data_format)
-    self.assertEqual(backend_config.image_data_format(), image_data_format)
-    backend_config.set_image_data_format('channels_last')
-    self.assertEqual(backend_config.image_data_format(), 'channels_last')
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/frozen_keras/backend_test.py b/tensorflow/python/frozen_keras/backend_test.py
deleted file mode 100644
index c9aa403797f..00000000000
--- a/tensorflow/python/frozen_keras/backend_test.py
+++ /dev/null
@@ -1,2180 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras backend."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import gc
-
-from absl.testing import parameterized
-import numpy as np
-import scipy.sparse
-
-from tensorflow.core.protobuf import config_pb2
-from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import config
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.keras import combinations
-from tensorflow.python.keras.engine import input_layer
-from tensorflow.python.keras.layers import advanced_activations
-from tensorflow.python.keras.layers import normalization
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.util import tf_inspect
-
-
-def compare_single_input_op_to_numpy(keras_op,
-                                     np_op,
-                                     input_shape,
-                                     dtype='float32',
-                                     negative_values=True,
-                                     keras_args=None,
-                                     keras_kwargs=None,
-                                     np_args=None,
-                                     np_kwargs=None):
-  keras_args = keras_args or []
-  keras_kwargs = keras_kwargs or {}
-  np_args = np_args or []
-  np_kwargs = np_kwargs or {}
-  inputs = 2. * np.random.random(input_shape)
-  if negative_values:
-    inputs -= 1.
-  keras_output = keras_op(
-      backend.variable(inputs, dtype=dtype), *keras_args, **keras_kwargs)
-  keras_output = backend.eval(keras_output)
-  np_output = np_op(inputs.astype(dtype), *np_args, **np_kwargs)
-  try:
-    np.testing.assert_allclose(keras_output, np_output, atol=1e-4)
-  except AssertionError:
-    raise AssertionError('Test for op `' + str(keras_op.__name__) + '` failed; '
-                         'Expected ' + str(np_output) + ' but got ' +
-                         str(keras_output))
-
-
-def compare_two_inputs_op_to_numpy(keras_op,
-                                   np_op,
-                                   input_shape_a,
-                                   input_shape_b,
-                                   dtype='float32',
-                                   keras_args=None,
-                                   keras_kwargs=None,
-                                   np_args=None,
-                                   np_kwargs=None):
-  keras_args = keras_args or []
-  keras_kwargs = keras_kwargs or {}
-  np_args = np_args or []
-  np_kwargs = np_kwargs or {}
-  input_a = np.random.random(input_shape_a)
-  input_b = np.random.random(input_shape_b)
-  keras_output = keras_op(
-      backend.variable(input_a, dtype=dtype),
-      backend.variable(input_b, dtype=dtype), *keras_args, **keras_kwargs)
-  keras_output = backend.eval(keras_output)
-  np_output = np_op(input_a.astype(dtype), input_b.astype(dtype),
-                    *np_args, **np_kwargs)
-  try:
-    np.testing.assert_allclose(keras_output, np_output, atol=1e-4)
-  except AssertionError:
-    raise AssertionError('Test for op `' + str(keras_op.__name__) + '` failed; '
-                         'Expected ' + str(np_output) + ' but got ' +
-                         str(keras_output))
-
-
-class BackendResetTest(test.TestCase, parameterized.TestCase):
-
-  def test_new_config(self):
-    # User defined jit setting
-    config.set_optimizer_jit(False)
-    sess = backend.get_session()
-    default_config = context.context().config
-    self.assertEqual(
-        sess._config.graph_options.optimizer_options.global_jit_level,
-        default_config.graph_options.optimizer_options.global_jit_level)
-    backend.clear_session()
-
-    # New session has the same jit setting
-    sess = backend.get_session()
-    default_config = context.context().config
-    self.assertEqual(
-        sess._config.graph_options.optimizer_options.global_jit_level,
-        default_config.graph_options.optimizer_options.global_jit_level)
-    backend.clear_session()
-
-    # Change respected
-    config.set_optimizer_jit(True)
-    sess = backend.get_session()
-    default_config = context.context().config
-    self.assertEqual(
-        sess._config.graph_options.optimizer_options.global_jit_level,
-        default_config.graph_options.optimizer_options.global_jit_level)
-    backend.clear_session()
-
-  # We can't use the normal parameterized decorator because the test session
-  # will block graph clearing.
-  @parameterized.named_parameters(('_v1', context.graph_mode),
-                                  ('_v2', context.eager_mode))
-  def test_new_graph(self, test_context):
-    with test_context():
-      g_old = backend.get_graph()
-      backend.clear_session()
-      g = backend.get_graph()
-
-      assert g_old is not g
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class BackendUtilsTest(test.TestCase):
-
-  def test_backend(self):
-    self.assertEqual(backend.backend(), 'tensorflow')
-
-  def test_get_reset_uids(self):
-    self.assertEqual(backend.get_uid('foo'), 1)
-    self.assertEqual(backend.get_uid('foo'), 2)
-
-    backend.reset_uids()
-    self.assertEqual(backend.get_uid('foo'), 1)
-
-  def test_learning_phase(self):
-    with self.cached_session() as sess:
-      with self.assertRaises(ValueError):
-        backend.set_learning_phase(2)
-
-      # Test running with a learning-phase-consuming layer
-      with backend.learning_phase_scope(0):
-        x = input_layer.Input((3,))
-        y = normalization.BatchNormalization()(x)
-        if not context.executing_eagerly():
-          self.evaluate(variables.global_variables_initializer())
-          sess.run(y, feed_dict={x: np.random.random((2, 3))})
-
-  def test_learning_phase_name(self):
-    with ops.name_scope('test_scope'):
-      # Test that outer name scopes do not affect the learning phase's name.
-      lp = backend.symbolic_learning_phase()
-    self.assertEqual(lp.name, 'keras_learning_phase:0')
-
-  def test_learning_phase_scope(self):
-    initial_learning_phase = backend.learning_phase()
-    with backend.learning_phase_scope(1):
-      self.assertEqual(backend.learning_phase(), 1)
-    self.assertEqual(backend.learning_phase(), initial_learning_phase)
-    with backend.learning_phase_scope(0):
-      self.assertEqual(backend.learning_phase(), 0)
-    self.assertEqual(backend.learning_phase(), initial_learning_phase)
-    with self.assertRaises(ValueError):
-      with backend.learning_phase_scope(None):
-        pass
-    self.assertEqual(backend.learning_phase(), initial_learning_phase)
-
-    new_learning_phase = 0
-    backend.set_learning_phase(new_learning_phase)
-    self.assertEqual(backend.learning_phase(), new_learning_phase)
-    with backend.learning_phase_scope(1):
-      self.assertEqual(backend.learning_phase(), 1)
-    self.assertEqual(backend.learning_phase(), new_learning_phase)
-
-  def test_learning_phase_scope_in_graph(self):
-    initial_learning_phase_outside_graph = backend.learning_phase()
-    with backend.get_graph().as_default():
-      initial_learning_phase_in_graph = backend.learning_phase()
-
-    self.assertEqual(backend.learning_phase(),
-                     initial_learning_phase_outside_graph)
-    with backend.learning_phase_scope(1):
-      self.assertEqual(backend.learning_phase(), 1)
-    self.assertEqual(backend.learning_phase(),
-                     initial_learning_phase_outside_graph)
-
-    with backend.get_graph().as_default():
-      self.assertIs(backend.learning_phase(), initial_learning_phase_in_graph)
-
-    self.assertEqual(backend.learning_phase(),
-                     initial_learning_phase_outside_graph)
-
-  def test_int_shape(self):
-    x = backend.ones(shape=(3, 4))
-    self.assertEqual(backend.int_shape(x), (3, 4))
-
-    if not context.executing_eagerly():
-      x = backend.placeholder(shape=(None, 4))
-      self.assertEqual(backend.int_shape(x), (None, 4))
-
-  def test_in_train_phase(self):
-    y1 = backend.variable(1)
-    y2 = backend.variable(2)
-    if context.executing_eagerly():
-      with backend.learning_phase_scope(0):
-        y_val_test = backend.in_train_phase(y1, y2).numpy()
-      with backend.learning_phase_scope(1):
-        y_val_train = backend.in_train_phase(y1, y2).numpy()
-    else:
-      y = backend.in_train_phase(y1, y2)
-      f = backend.function([backend.learning_phase()], [y])
-      y_val_test = f([0])[0]
-      y_val_train = f([1])[0]
-    self.assertAllClose(y_val_test, 2)
-    self.assertAllClose(y_val_train, 1)
-
-  def test_is_keras_tensor(self):
-    x = backend.variable(1)
-    self.assertEqual(backend.is_keras_tensor(x), False)
-    x = input_layer.Input(shape=(1,))
-    self.assertEqual(backend.is_keras_tensor(x), True)
-    x = input_layer.Input(shape=(None,), ragged=True)
-    self.assertEqual(backend.is_keras_tensor(x), True)
-    x = input_layer.Input(shape=(None, None), sparse=True)
-    self.assertEqual(backend.is_keras_tensor(x), True)
-    with self.assertRaises(ValueError):
-      backend.is_keras_tensor(0)
-
-  def test_stop_gradient(self):
-    x = backend.variable(1)
-    y = backend.stop_gradient(x)
-    if not context.executing_eagerly():
-      self.assertEqual(y.op.name[:12], 'StopGradient')
-
-    xs = [backend.variable(1) for _ in range(3)]
-    ys = backend.stop_gradient(xs)
-    if not context.executing_eagerly():
-      for y in ys:
-        self.assertEqual(y.op.name[:12], 'StopGradient')
-
-  def test_placeholder(self):
-    x = backend.placeholder(shape=(3, 4))
-    self.assertEqual(x.shape.as_list(), [3, 4])
-    x = backend.placeholder(shape=(3, 4), sparse=True)
-    self.assertEqual(x.shape.as_list(), [3, 4])
-
-  def test_is_placeholder(self):
-    x = backend.placeholder(shape=(1,))
-    self.assertEqual(backend.is_placeholder(x), True)
-    x = backend.variable(1)
-    self.assertEqual(backend.is_placeholder(x), False)
-
-  def test_print_tensor(self):
-    # Unfortunately it seems impossible to use `mock` (or any other method)
-    # to capture stdout when used inside a graph or graph function, thus
-    # we cannot test correctness.
-    # The message gets correctly printed in practice.
-    x = backend.placeholder(shape=())
-    y = backend.print_tensor(x, 'eager=%s' % context.executing_eagerly())
-    f = backend.function(x, y)
-    f(0)
-
-  def test_cast_to_floatx(self):
-    x = backend.variable(1, dtype='float64')
-    x = backend.cast_to_floatx(x)
-    self.assertEqual(x.dtype.name, 'float32')
-    x = backend.cast_to_floatx(2)
-    self.assertEqual(x.dtype.name, 'float32')
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class BackendVariableTest(test.TestCase):
-
-  def test_zeros(self):
-    x = backend.zeros((3, 4))
-    val = backend.eval(x)
-    self.assertAllClose(val, np.zeros((3, 4)))
-
-  def test_ones(self):
-    x = backend.ones((3, 4))
-    val = backend.eval(x)
-    self.assertAllClose(val, np.ones((3, 4)))
-
-  def test_eye(self):
-    x = backend.eye(4)
-    val = backend.eval(x)
-    self.assertAllClose(val, np.eye(4))
-
-  def test_zeros_like(self):
-    x = backend.zeros((3, 4))
-    y = backend.zeros_like(x)
-    val = backend.eval(y)
-    self.assertAllClose(val, np.zeros((3, 4)))
-
-  def test_ones_like(self):
-    x = backend.zeros((3, 4))
-    y = backend.ones_like(x)
-    val = backend.eval(y)
-    self.assertAllClose(val, np.ones((3, 4)))
-
-  def test_random_uniform_variable(self):
-    x = backend.random_uniform_variable((30, 20), low=1, high=2, seed=0)
-    val = backend.eval(x)
-    self.assertAllClose(val.mean(), 1.5, atol=1e-1)
-    self.assertAllClose(val.max(), 2., atol=1e-1)
-    self.assertAllClose(val.min(), 1., atol=1e-1)
-
-  def test_random_normal_variable(self):
-    x = backend.random_normal_variable((30, 20), 1., 0.5, seed=0)
-    val = backend.eval(x)
-    self.assertAllClose(val.mean(), 1., atol=1e-1)
-    self.assertAllClose(val.std(), 0.5, atol=1e-1)
-
-  def test_count_params(self):
-    x = backend.zeros((4, 5))
-    val = backend.count_params(x)
-    self.assertAllClose(val, 20)
-
-  def test_constant(self):
-    ref_val = np.random.random((3, 4)).astype('float32')
-    x = backend.constant(ref_val)
-    val = backend.eval(x)
-    self.assertAllClose(val, ref_val)
-
-  def test_sparse_variable(self):
-    val = scipy.sparse.eye(10)
-    x = backend.variable(val)
-    self.assertTrue(isinstance(x, sparse_tensor.SparseTensor))
-
-    y = backend.to_dense(x)
-    self.assertFalse(backend.is_sparse(y))
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class BackendLinearAlgebraTest(test.TestCase, parameterized.TestCase):
-
-  def test_dot(self):
-    x = backend.ones(shape=(2, 3))
-    y = backend.ones(shape=(3, 4))
-    xy = backend.dot(x, y)
-    self.assertEqual(xy.shape.as_list(), [2, 4])
-
-    x = backend.ones(shape=(32, 28, 3))
-    y = backend.ones(shape=(3, 4))
-    xy = backend.dot(x, y)
-    self.assertEqual(xy.shape.as_list(), [32, 28, 4])
-
-  @parameterized.parameters(
-      [(2, 3, 4, 5), (2, 5, 6, 7), (2, 3, 4, 6, 7), (3, 1)],
-      [(2, 20, 1), (2, 30, 20), (2, 1, 30), (1, 2)],
-      [(4, 2, 3), (4, 5, 3), (4, 2, 5), (2, 2)],
-      [(4, 2), (4, 2, 3), (4, 3), (1, 1)],
-      [(4, 2), (4, 2, 3), (4, 3), 1],
-      [(4, 2, 3), (4, 3), (4, 2), (2, 1)],
-  )
-  def test_batch_dot(self, x_shape, y_shape, output_shape, axes):
-    x_val = np.random.random(x_shape)
-    y_val = np.random.random(y_shape)
-    x = backend.variable(x_val)
-    y = backend.variable(y_val)
-    xy = backend.batch_dot(x, y, axes=axes)
-    self.assertEqual(tuple(xy.shape.as_list()), output_shape)
-    xy_val = backend.eval(xy)
-    ref_val = self._reference_batch_dot(x_val, y_val, axes)
-    self.assertAllClose(xy_val, ref_val, atol=1e-5)
-
-  def _reference_batch_dot(self, x, y, axes):
-    if isinstance(axes, int):
-      axes = [axes, axes]
-    elif isinstance(axes, tuple):
-      axes = list(axes)
-    if axes is None:
-      if y.ndim == 2:
-        axes = [x.ndim - 1, y.ndim - 1]
-      else:
-        axes = [x.ndim - 1, y.ndim - 2]
-    if axes[0] < 0:
-      axes[0] += x.ndim
-    if axes[1] < 0:
-      axes[1] += y.ndim
-    result = []
-    axes = [axes[0] - 1, axes[1] - 1]
-    for xi, yi in zip(x, y):
-      result.append(np.tensordot(xi, yi, axes))
-    result = np.array(result)
-    if result.ndim == 1:
-      result = np.expand_dims(result, -1)
-    return result
-
-  def test_reduction_ops(self):
-    ops_to_test = [
-        (backend.max, np.max),
-        (backend.min, np.min),
-        (backend.sum, np.sum),
-        (backend.prod, np.prod),
-        (backend.var, np.var),
-        (backend.std, np.std),
-        (backend.mean, np.mean),
-        (backend.argmin, np.argmin),
-        (backend.argmax, np.argmax),
-    ]
-    for keras_op, np_op in ops_to_test:
-      compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5),
-                                       keras_kwargs={'axis': 1},
-                                       np_kwargs={'axis': 1})
-      compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5),
-                                       keras_kwargs={'axis': -1},
-                                       np_kwargs={'axis': -1})
-      if 'keepdims' in tf_inspect.getargspec(keras_op).args:
-        compare_single_input_op_to_numpy(keras_op, np_op,
-                                         input_shape=(4, 7, 5),
-                                         keras_kwargs={'axis': 1,
-                                                       'keepdims': True},
-                                         np_kwargs={'axis': 1,
-                                                    'keepdims': True})
-
-  def test_elementwise_ops(self):
-    ops_to_test = [
-        (backend.square, np.square),
-        (backend.abs, np.abs),
-        (backend.round, np.round),
-        (backend.sign, np.sign),
-        (backend.sin, np.sin),
-        (backend.cos, np.cos),
-        (backend.exp, np.exp),
-    ]
-    for keras_op, np_op in ops_to_test:
-      compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7))
-
-    ops_to_test = [
-        (backend.sqrt, np.sqrt),
-        (backend.log, np.log),
-    ]
-    for keras_op, np_op in ops_to_test:
-      compare_single_input_op_to_numpy(keras_op, np_op,
-                                       input_shape=(4, 7),
-                                       negative_values=False)
-
-    compare_single_input_op_to_numpy(
-        backend.clip,
-        np.clip,
-        input_shape=(6, 4),
-        keras_kwargs={
-            'min_value': 0.1,
-            'max_value': 2.4
-        },
-        np_kwargs={
-            'a_min': 0.1,
-            'a_max': 1.4
-        })
-
-    compare_single_input_op_to_numpy(
-        backend.pow, np.power, input_shape=(6, 4), keras_args=[3], np_args=[3])
-
-  def test_two_tensor_ops(self):
-    ops_to_test = [
-        (backend.equal, np.equal),
-        (backend.not_equal, np.not_equal),
-        (backend.greater, np.greater),
-        (backend.greater_equal, np.greater_equal),
-        (backend.less, np.less),
-        (backend.less_equal, np.less_equal),
-        (backend.maximum, np.maximum),
-        (backend.minimum, np.minimum),
-    ]
-    for keras_op, np_op in ops_to_test:
-      compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                     input_shape_a=(4, 7),
-                                     input_shape_b=(4, 7))
-
-  def test_relu(self):
-    x = ops.convert_to_tensor_v2([[-4, 0], [2, 7]], 'float32')
-
-    # standard relu
-    relu_op = backend.relu(x)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]])
-
-    # alpha (leaky relu used)
-    relu_op = backend.relu(x, alpha=0.5)
-    if not context.executing_eagerly():
-      self.assertTrue('LeakyRelu' in relu_op.name)
-    self.assertAllClose(backend.eval(relu_op), [[-2, 0], [2, 7]])
-
-    # max_value < some elements
-    relu_op = backend.relu(x, max_value=5)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 5]])
-
-    # nn.relu6 used
-    relu_op = backend.relu(x, max_value=6)
-    if not context.executing_eagerly():
-      self.assertTrue('Relu6' in relu_op.name)  # uses tf.nn.relu6
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 6]])
-
-    # max value > 6
-    relu_op = backend.relu(x, max_value=10)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]])
-
-    # max value is float
-    relu_op = backend.relu(x, max_value=4.3)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 4.3]])
-
-    # max value == 0
-    relu_op = backend.relu(x, max_value=0)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 0]])
-
-    # alpha and max_value
-    relu_op = backend.relu(x, alpha=0.25, max_value=3)
-    self.assertAllClose(backend.eval(relu_op), [[-1, 0], [2, 3]])
-
-    # threshold
-    relu_op = backend.relu(x, threshold=3)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 7]])
-
-    # threshold is float
-    relu_op = backend.relu(x, threshold=1.5)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [2, 7]])
-
-    # threshold is negative
-    relu_op = backend.relu(x, threshold=-5)
-    self.assertAllClose(backend.eval(relu_op), [[-4, 0], [2, 7]])
-
-    # threshold and max_value
-    relu_op = backend.relu(x, threshold=3, max_value=5)
-    self.assertAllClose(backend.eval(relu_op), [[0, 0], [0, 5]])
-
-    # threshold and alpha
-    relu_op = backend.relu(x, alpha=0.25, threshold=4)
-    self.assertAllClose(backend.eval(relu_op), [[-2, -1], [-0.5, 7]])
-
-    # threshold, alpha, and max_value
-    relu_op = backend.relu(x, alpha=0.25, threshold=4, max_value=5)
-    self.assertAllClose(backend.eval(relu_op), [[-2, -1], [-0.5, 5]])
-
-    # Test case for GitHub issue 35430, with integer dtype
-    x = input_layer.Input(shape=(), name='x', dtype='int64')
-    _ = advanced_activations.ReLU(max_value=100, dtype='int64')(x)
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class BackendShapeOpsTest(test.TestCase):
-
-  def test_reshape(self):
-    compare_single_input_op_to_numpy(
-        backend.reshape,
-        np.reshape,
-        input_shape=(4, 7),
-        keras_args=[(2, 14)],
-        np_args=[(2, 14)])
-
-  def test_concatenate(self):
-    a = backend.variable(np.ones((1, 2, 3)))
-    b = backend.variable(np.ones((1, 2, 2)))
-    y = backend.concatenate([a, b], axis=-1)
-    self.assertEqual(y.shape.as_list(), [1, 2, 5])
-
-  def test_permute_dimensions(self):
-    compare_single_input_op_to_numpy(
-        backend.permute_dimensions,
-        np.transpose,
-        input_shape=(4, 7),
-        keras_args=[(1, 0)],
-        np_args=[(1, 0)])
-
-  def test_resize_images(self):
-    height_factor = 2
-    width_factor = 2
-    data_format = 'channels_last'
-    x = backend.variable(np.ones((1, 2, 2, 3)))
-    y = backend.resize_images(x, height_factor, width_factor, data_format)
-    self.assertEqual(y.shape.as_list(), [1, 4, 4, 3])
-
-    data_format = 'channels_first'
-    x = backend.variable(np.ones((1, 3, 2, 2)))
-    y = backend.resize_images(x, height_factor, width_factor, data_format)
-    self.assertEqual(y.shape.as_list(), [1, 3, 4, 4])
-
-    # Invalid use:
-    with self.assertRaises(ValueError):
-      backend.resize_images(
-          x, height_factor, width_factor, data_format='unknown')
-
-  def test_resize_volumes(self):
-    height_factor = 2
-    width_factor = 2
-    depth_factor = 2
-    data_format = 'channels_last'
-    x = backend.variable(np.ones((1, 2, 2, 2, 3)))
-    y = backend.resize_volumes(x, depth_factor, height_factor, width_factor,
-                               data_format)
-    self.assertEqual(y.shape.as_list(), [1, 4, 4, 4, 3])
-
-    data_format = 'channels_first'
-    x = backend.variable(np.ones((1, 3, 2, 2, 2)))
-    y = backend.resize_volumes(x, depth_factor, height_factor, width_factor,
-                               data_format)
-    self.assertEqual(y.shape.as_list(), [1, 3, 4, 4, 4])
-
-    # Invalid use:
-    with self.assertRaises(ValueError):
-      backend.resize_volumes(
-          x, depth_factor, height_factor, width_factor, data_format='unknown')
-
-  def test_repeat_elements(self):
-    x = backend.variable(np.ones((1, 3, 2)))
-    y = backend.repeat_elements(x, 3, axis=1)
-    self.assertEqual(y.shape.as_list(), [1, 9, 2])
-
-    # Use with a dynamic axis:
-    if not context.executing_eagerly():
-      x = backend.placeholder(shape=(2, None, 2))
-      y = backend.repeat_elements(x, 3, axis=1)
-      self.assertEqual(y.shape.as_list(), [2, None, 2])
-
-  def test_repeat(self):
-    x = backend.variable(np.ones((1, 3)))
-    y = backend.repeat(x, 2)
-    self.assertEqual(y.shape.as_list(), [1, 2, 3])
-
-  def test_flatten(self):
-    compare_single_input_op_to_numpy(
-        backend.flatten,
-        np.reshape,
-        input_shape=(4, 7, 6),
-        np_args=[(4 * 7 * 6,)])
-
-  def test_batch_flatten(self):
-    compare_single_input_op_to_numpy(
-        backend.batch_flatten,
-        np.reshape,
-        input_shape=(4, 7, 6),
-        np_args=[(4, 7 * 6)])
-
-  def test_temporal_padding(self):
-
-    def ref_op(x, padding):
-      shape = list(x.shape)
-      shape[1] += padding[0] + padding[1]
-      y = np.zeros(tuple(shape))
-      y[:, padding[0]:-padding[1], :] = x
-      return y
-
-    compare_single_input_op_to_numpy(
-        backend.temporal_padding,
-        ref_op,
-        input_shape=(4, 7, 6),
-        keras_args=[(2, 3)],
-        np_args=[(2, 3)])
-
-  def test_spatial_2d_padding(self):
-
-    def ref_op(x, padding, data_format='channels_last'):
-      shape = list(x.shape)
-      if data_format == 'channels_last':
-        shape[1] += padding[0][0] + padding[0][1]
-        shape[2] += padding[1][0] + padding[1][1]
-        y = np.zeros(tuple(shape))
-        y[:, padding[0][0]:-padding[0][1], padding[1][0]:-padding[1][1], :] = x
-      else:
-        shape[2] += padding[0][0] + padding[0][1]
-        shape[3] += padding[1][0] + padding[1][1]
-        y = np.zeros(tuple(shape))
-        y[:, :, padding[0][0]:-padding[0][1], padding[1][0]:-padding[1][1]] = x
-      return y
-
-    compare_single_input_op_to_numpy(
-        backend.spatial_2d_padding,
-        ref_op,
-        input_shape=(2, 3, 2, 3),
-        keras_args=[((2, 3), (1, 2))],
-        keras_kwargs={'data_format': 'channels_last'},
-        np_args=[((2, 3), (1, 2))],
-        np_kwargs={'data_format': 'channels_last'})
-    compare_single_input_op_to_numpy(
-        backend.spatial_2d_padding,
-        ref_op,
-        input_shape=(2, 3, 2, 3),
-        keras_args=[((2, 3), (1, 2))],
-        keras_kwargs={'data_format': 'channels_first'},
-        np_args=[((2, 3), (1, 2))],
-        np_kwargs={'data_format': 'channels_first'})
-
-  def test_spatial_3d_padding(self):
-
-    def ref_op(x, padding, data_format='channels_last'):
-      shape = list(x.shape)
-      if data_format == 'channels_last':
-        shape[1] += padding[0][0] + padding[0][1]
-        shape[2] += padding[1][0] + padding[1][1]
-        shape[3] += padding[2][0] + padding[2][1]
-        y = np.zeros(tuple(shape))
-        y[:,
-          padding[0][0]:-padding[0][1],
-          padding[1][0]:-padding[1][1],
-          padding[2][0]:-padding[2][1],
-          :] = x
-      else:
-        shape[2] += padding[0][0] + padding[0][1]
-        shape[3] += padding[1][0] + padding[1][1]
-        shape[4] += padding[2][0] + padding[2][1]
-        y = np.zeros(tuple(shape))
-        y[:, :,
-          padding[0][0]:-padding[0][1],
-          padding[1][0]:-padding[1][1],
-          padding[2][0]:-padding[2][1]] = x
-      return y
-
-    compare_single_input_op_to_numpy(
-        backend.spatial_3d_padding,
-        ref_op,
-        input_shape=(2, 3, 2, 3, 2),
-        keras_args=[((2, 3), (1, 2), (2, 3))],
-        keras_kwargs={'data_format': 'channels_last'},
-        np_args=[((2, 3), (1, 2), (2, 3))],
-        np_kwargs={'data_format': 'channels_last'})
-    compare_single_input_op_to_numpy(
-        backend.spatial_3d_padding,
-        ref_op,
-        input_shape=(2, 3, 2, 3, 2),
-        keras_args=[((2, 3), (1, 2), (2, 3))],
-        keras_kwargs={'data_format': 'channels_first'},
-        np_args=[((2, 3), (1, 2), (2, 3))],
-        np_kwargs={'data_format': 'channels_first'})
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
-
-  def test_bias_add(self):
-    keras_op = backend.bias_add
-    np_op = np.add
-    compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                   input_shape_a=(4, 7),
-                                   input_shape_b=(7,))
-    compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                   input_shape_a=(4, 3, 7),
-                                   input_shape_b=(7,))
-    compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                   input_shape_a=(4, 3, 5, 7),
-                                   input_shape_b=(7,))
-    compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                   input_shape_a=(4, 3, 5, 2, 7),
-                                   input_shape_b=(7,))
-
-    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
-      x = backend.variable((3, 4))
-      b = backend.variable((3, 4))
-      backend.bias_add(x, b)
-    with self.assertRaises(ValueError):
-      x = backend.variable((3, 4))
-      b = backend.variable((4,))
-      backend.bias_add(x, b, data_format='unknown')
-
-  def test_bias_add_channels_first(self):
-
-    def keras_op(x, b):
-      return backend.bias_add(x, b, data_format='channels_first')
-
-    def np_op(x, b):
-      if x.ndim == 3:
-        b = b.reshape((1, b.shape[0], 1))
-      if x.ndim == 4:
-        b = b.reshape((1, b.shape[0], 1, 1))
-      return x + b
-
-    compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                   input_shape_a=(4, 3, 7),
-                                   input_shape_b=(3,))
-    compare_two_inputs_op_to_numpy(keras_op, np_op,
-                                   input_shape_a=(4, 3, 5, 7),
-                                   input_shape_b=(3,))
-
-  def test_pool2d(self):
-    val = np.random.random((10, 3, 10, 10))
-    x = backend.variable(val)
-    y = backend.pool2d(
-        x, (2, 2),
-        strides=(1, 1),
-        padding='valid',
-        data_format='channels_first',
-        pool_mode='max')
-    self.assertEqual(y.shape.as_list(), [10, 3, 9, 9])
-
-    y = backend.pool2d(
-        x, (2, 2),
-        strides=(1, 1),
-        padding='valid',
-        data_format='channels_first',
-        pool_mode='avg')
-    self.assertEqual(y.shape.as_list(), [10, 3, 9, 9])
-
-    val = np.random.random((10, 10, 10, 3))
-    x = backend.variable(val)
-    y = backend.pool2d(
-        x, (2, 2), strides=(1, 1), padding='valid', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 9, 9, 3])
-
-    val = np.random.random((10, 10, 10, 3))
-    x = backend.variable(val)
-    y = backend.pool2d(
-        x, (2, 2), strides=(1, 1), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 10, 3])
-
-    val = np.random.random((10, 10, 10, 3))
-    x = backend.variable(val)
-    y = backend.pool2d(
-        x, (2, 2), strides=(2, 2), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 5, 5, 3])
-
-    with self.assertRaises(ValueError):
-      y = backend.pool2d(
-          x, (2, 2),
-          strides=(2, 2),
-          padding='other',
-          data_format='channels_last')
-    with self.assertRaises(ValueError):
-      y = backend.pool2d(x, (2, 2), strides=(2, 2), data_format='other')
-    with self.assertRaises(ValueError):
-      y = backend.pool2d(x, (2, 2, 2), strides=(2, 2))
-    with self.assertRaises(ValueError):
-      y = backend.pool2d(x, (2, 2), strides=(2, 2, 2))
-    with self.assertRaises(ValueError):
-      y = backend.pool2d(x, (2, 2), strides=(2, 2), pool_mode='other')
-
-  def test_pool3d(self):
-    if test.is_built_with_rocm():
-      self.skipTest('Pooling with 3D tensors is not supported in ROCm')
-    val = np.random.random((10, 3, 10, 10, 10))
-    x = backend.variable(val)
-    y = backend.pool3d(
-        x, (2, 2, 2),
-        strides=(1, 1, 1),
-        padding='valid',
-        data_format='channels_first',
-        pool_mode='max')
-    self.assertEqual(y.shape.as_list(), [10, 3, 9, 9, 9])
-
-    y = backend.pool3d(
-        x, (2, 2, 2),
-        strides=(1, 1, 1),
-        padding='valid',
-        data_format='channels_first',
-        pool_mode='avg')
-    self.assertEqual(y.shape.as_list(), [10, 3, 9, 9, 9])
-
-    val = np.random.random((10, 10, 10, 10, 3))
-    x = backend.variable(val)
-    y = backend.pool3d(
-        x, (2, 2, 2),
-        strides=(1, 1, 1),
-        padding='valid',
-        data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 9, 9, 9, 3])
-
-    val = np.random.random((10, 10, 10, 10, 3))
-    x = backend.variable(val)
-    y = backend.pool3d(
-        x, (2, 2, 2),
-        strides=(1, 1, 1),
-        padding='same',
-        data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 10, 10, 3])
-
-    val = np.random.random((10, 10, 10, 10, 3))
-    x = backend.variable(val)
-    y = backend.pool3d(
-        x, (2, 2, 2),
-        strides=(2, 2, 2),
-        padding='same',
-        data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 5, 5, 5, 3])
-
-  def test_conv1d(self):
-    val = np.random.random((10, 4, 10))
-    x = backend.variable(val)
-    kernel_val = np.random.random((3, 4, 5))
-    k = backend.variable(kernel_val)
-    y = backend.conv1d(
-        x, k, strides=(1,), padding='valid', data_format='channels_first')
-    self.assertEqual(y.shape.as_list(), [10, 5, 8])
-
-    val = np.random.random((10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv1d(
-        x, k, strides=(1,), padding='valid', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 8, 5])
-
-    val = np.random.random((10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv1d(
-        x, k, strides=(1,), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 5])
-
-    val = np.random.random((10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv1d(
-        x, k, strides=(2,), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 5, 5])
-
-  def test_local_conv_channels_dim(self):
-    filters = 3
-    batch_size = 2
-
-    for input_shape in [(3, 5), (2, 3, 5), (2, 5, 3, 4)]:
-      channels_in = input_shape[0]
-      input_spatial_shape = input_shape[1:]
-      dim = len(input_spatial_shape)
-
-      inputs = np.random.normal(0, 1, (batch_size,) + input_shape)
-      inputs_cf = backend.variable(inputs)
-
-      for kernel_size in [1, 2]:
-        for stride in [1, 2]:
-          kernel_sizes = (kernel_size,) * dim
-          strides = (stride,) * dim
-
-          output_shape = tuple([(i - kernel_size + stride) // stride
-                                for i in input_spatial_shape])
-
-          kernel_shape = (np.prod(output_shape),
-                          np.prod(kernel_sizes) * channels_in,
-                          filters)
-
-          kernel = np.random.normal(
-              0,
-              1,
-              output_shape + (channels_in, np.prod(kernel_sizes), filters)
-          )
-
-          kernel_cf = np.reshape(kernel, kernel_shape)
-          kernel_cf = backend.variable(kernel_cf)
-
-          conv_cf = backend.local_conv(inputs_cf, kernel_cf, kernel_sizes,
-                                       strides, output_shape, 'channels_first')
-
-          inputs_cl = np.transpose(inputs, [0, 2] + list(range(3, dim + 2)) +
-                                   [1])
-          inputs_cl = backend.variable(inputs_cl)
-
-          kernel_cl = np.reshape(
-              np.transpose(kernel, list(range(dim)) + [dim + 1, dim, dim + 2]),
-              kernel_shape
-          )
-          kernel_cl = backend.variable(kernel_cl)
-
-          conv_cl = backend.local_conv(inputs_cl, kernel_cl, kernel_sizes,
-                                       strides, output_shape, 'channels_last')
-
-          conv_cf = backend.eval(conv_cf)
-          conv_cl = backend.eval(conv_cl)
-
-          self.assertAllCloseAccordingToType(
-              conv_cf,
-              np.transpose(conv_cl,
-                           [0, dim + 1] + list(range(1, dim + 1))),
-              atol=1e-5
-          )
-
-  @parameterized.named_parameters(
-      ('local_conv1d', (5, 6), (3,), (1,), (3,)),
-      ('local_conv2d', (4, 5, 6), (3, 3), (1, 1), (2, 3)))
-  def test_local_conv_1d_and_2d(self,
-                                input_shape,
-                                kernel_sizes,
-                                strides,
-                                output_shape):
-    filters = 3
-    batch_size = 2
-
-    inputs = np.random.normal(0, 1, (batch_size,) + input_shape)
-    inputs = backend.variable(inputs)
-
-    kernel = np.random.normal(0, 1, (np.prod(output_shape),
-                                     np.prod(kernel_sizes) * input_shape[-1],
-                                     filters))
-    kernel = backend.variable(kernel)
-
-    local_conv = backend.local_conv(inputs, kernel, kernel_sizes, strides,
-                                    output_shape, 'channels_last')
-    if len(output_shape) == 1:
-      local_conv_dim = backend.local_conv1d(inputs, kernel, kernel_sizes,
-                                            strides, 'channels_last')
-    else:
-      local_conv_dim = backend.local_conv2d(inputs, kernel, kernel_sizes,
-                                            strides, output_shape,
-                                            'channels_last')
-
-    local_conv = backend.eval(local_conv)
-    local_conv_dim = backend.eval(local_conv_dim)
-
-    self.assertAllCloseAccordingToType(local_conv, local_conv_dim)
-
-  def test_conv2d(self):
-    kernel_val = np.random.random((3, 3, 4, 5))
-    k = backend.variable(kernel_val)
-
-    # Test channels_first
-    val = np.random.random((10, 4, 10, 10))
-    x = backend.variable(val)
-    y = backend.conv2d(x, k, padding='valid', data_format='channels_first')
-    self.assertEqual(y.shape.as_list(), [10, 5, 8, 8])
-
-    # Test channels_last
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv2d(
-        x, k, strides=(1, 1), padding='valid', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 8, 8, 5])
-
-    # Test same padding
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv2d(x, k, padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 10, 5])
-
-    # Test dilation_rate
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv2d(
-        x, k, dilation_rate=(2, 2), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 10, 5])
-
-    # Test strides
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv2d(
-        x, k, strides=(2, 2), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 5, 5, 5])
-
-    # Test invalid arguments
-    with self.assertRaises(ValueError):
-      y = backend.conv2d(
-          x, k, (2, 2), padding='other', data_format='channels_last')
-    with self.assertRaises(ValueError):
-      y = backend.conv2d(x, k, (2, 2), data_format='other')
-    with self.assertRaises(ValueError):
-      y = backend.conv2d(x, k, (2, 2, 2))
-
-  def test_conv2d_transpose(self):
-    input_size = (7, 8)
-    kernel_size = (3, 3)
-    input_depth = 6
-    filters = 6
-    batch_size = 2
-
-    kernel_val = np.random.random(kernel_size + (input_depth, filters))
-    k = backend.variable(kernel_val)
-
-    # Test channels_first
-    input_val = np.random.random((batch_size, input_depth) + input_size)
-    x = backend.variable(input_val)
-    y = backend.conv2d_transpose(
-        x,
-        k, (batch_size, filters) + input_size,
-        padding='same',
-        data_format='channels_first')
-    self.assertEqual(
-        tuple(y.shape.as_list()), (batch_size, filters) + input_size)
-
-    # Test channels_last
-    input_val = np.random.random((batch_size,) + input_size + (input_depth,))
-    x = backend.variable(input_val)
-    y = backend.conv2d_transpose(
-        x,
-        k, (batch_size,) + input_size + (filters,),
-        padding='same',
-        data_format='channels_last')
-    self.assertEqual(
-        tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,))
-
-    # Test dilation_rate
-    y = backend.conv2d_transpose(
-        x,
-        k, (batch_size,) + input_size + (filters,),
-        padding='same',
-        data_format='channels_last',
-        dilation_rate=(2, 2))
-    self.assertEqual(
-        tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,))
-
-    # Test batch size of None in output_shape
-    y = backend.conv2d_transpose(
-        x,
-        k, (None,) + input_size + (filters,),
-        padding='same',
-        data_format='channels_last')
-    self.assertEqual(
-        tuple(y.shape.as_list()), (batch_size,) + input_size + (filters,))
-
-    # Test invalid values
-    with self.assertRaises(ValueError):
-      y = backend.conv2d_transpose(
-          x, k, (2, 2, 8, 9), padding='other', data_format='channels_last')
-    with self.assertRaises(ValueError):
-      y = backend.conv2d_transpose(x, k, (2, 2, 8, 9), data_format='other')
-
-  def test_separable_conv2d(self):
-    val = np.random.random((10, 4, 10, 10))
-    x = backend.variable(val)
-    depthwise_kernel_val = np.random.random((3, 3, 4, 1))
-    pointwise_kernel_val = np.random.random((1, 1, 4, 5))
-    dk = backend.variable(depthwise_kernel_val)
-    pk = backend.variable(pointwise_kernel_val)
-    y = backend.separable_conv2d(
-        x, dk, pk, padding='valid', data_format='channels_first')
-    self.assertEqual(y.shape.as_list(), [10, 5, 8, 8])
-
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.separable_conv2d(
-        x, dk, pk, strides=(1, 1), padding='valid', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 8, 8, 5])
-
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.separable_conv2d(
-        x, dk, pk, strides=(1, 1), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 10, 5])
-
-    val = np.random.random((10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.separable_conv2d(
-        x, dk, pk, strides=(2, 2), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 5, 5, 5])
-    with self.assertRaises(ValueError):
-      y = backend.separable_conv2d(
-          x, dk, pk, (2, 2), padding='other', data_format='channels_last')
-    with self.assertRaises(ValueError):
-      y = backend.separable_conv2d(x, dk, pk, (2, 2), data_format='other')
-    with self.assertRaises(ValueError):
-      y = backend.separable_conv2d(x, dk, pk, (2, 2, 2))
-
-  def test_conv3d(self):
-    val = np.random.random((10, 4, 10, 10, 10))
-    x = backend.variable(val)
-    kernel_val = np.random.random((3, 3, 3, 4, 5))
-    k = backend.variable(kernel_val)
-    y = backend.conv3d(x, k, padding='valid', data_format='channels_first')
-    self.assertEqual(y.shape.as_list(), [10, 5, 8, 8, 8])
-
-    val = np.random.random((10, 10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv3d(
-        x, k, strides=(1, 1, 1), padding='valid', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 8, 8, 8, 5])
-
-    val = np.random.random((10, 10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv3d(
-        x, k, strides=(1, 1, 1), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 10, 10, 10, 5])
-
-    val = np.random.random((10, 10, 10, 10, 4))
-    x = backend.variable(val)
-    y = backend.conv3d(
-        x, k, strides=(2, 2, 2), padding='same', data_format='channels_last')
-    self.assertEqual(y.shape.as_list(), [10, 5, 5, 5, 5])
-    with self.assertRaises(ValueError):
-      y = backend.conv3d(
-          x, k, (2, 2, 2), padding='other', data_format='channels_last')
-    with self.assertRaises(ValueError):
-      y = backend.conv3d(x, k, (2, 2, 2), data_format='other')
-    with self.assertRaises(ValueError):
-      y = backend.conv3d(x, k, (2, 2))
-
-  def test_rnn(self):
-    # implement a simple RNN
-    num_samples = 4
-    input_dim = 5
-    output_dim = 3
-    timesteps = 6
-
-    input_val = np.random.random(
-        (num_samples, timesteps, input_dim)).astype(np.float32)
-    init_state_val = np.random.random(
-        (num_samples, output_dim)).astype(np.float32)
-    w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32)
-    w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32)
-    np_mask = np.random.randint(2, size=(num_samples, timesteps))
-
-    def rnn_step_fn():
-      w_i = backend.variable(w_i_val)
-      w_o = backend.variable(w_o_val)
-
-      def step_function(x, states):
-        assert len(states) == 1
-        prev_output = states[0]
-        output = backend.dot(x, w_i) + backend.dot(prev_output, w_o)
-        return output, [output]
-
-      return step_function
-
-    # test default setup
-    last_output_list = [[], [], [], [], [], []]
-    outputs_list = [[], [], [], [], [], []]
-    state_list = [[], [], [], [], [], []]
-
-    rnn_fn = rnn_step_fn()
-    inputs = backend.variable(input_val)
-    initial_states = [backend.variable(init_state_val)]
-    mask = backend.variable(np_mask)
-
-    kwargs_list = [
-        {'go_backwards': False, 'mask': None},
-        {'go_backwards': False, 'mask': None, 'unroll': True},
-        {'go_backwards': True, 'mask': None},
-        {'go_backwards': True, 'mask': None, 'unroll': True},
-        {'go_backwards': False, 'mask': mask},
-        {'go_backwards': False, 'mask': mask, 'unroll': True},
-    ]
-    for i, kwargs in enumerate(kwargs_list):
-      last_output, outputs, new_states = backend.rnn(rnn_fn, inputs,
-                                                     initial_states, **kwargs)
-      # check static shape inference
-      self.assertEqual(last_output.shape.as_list(), [num_samples, output_dim])
-      self.assertEqual(outputs.shape.as_list(),
-                       [num_samples, timesteps, output_dim])
-      for state in new_states:
-        self.assertEqual(state.shape.as_list(), [num_samples, output_dim])
-
-      last_output_list[i].append(backend.eval(last_output))
-      outputs_list[i].append(backend.eval(outputs))
-      self.assertLen(new_states, 1)
-      state_list[i].append(backend.eval(new_states[0]))
-
-      def assert_list_pairwise(z_list, atol=1e-05):
-        for (z1, z2) in zip(z_list[1:], z_list[:-1]):
-          self.assertAllClose(z1, z2, atol=atol)
-
-      assert_list_pairwise(last_output_list[0], atol=1e-04)
-      assert_list_pairwise(outputs_list[0], atol=1e-04)
-      assert_list_pairwise(state_list[0], atol=1e-04)
-      assert_list_pairwise(last_output_list[2], atol=1e-04)
-      assert_list_pairwise(outputs_list[2], atol=1e-04)
-      assert_list_pairwise(state_list[2], atol=1e-04)
-
-      for l, u_l in zip(last_output_list[0], last_output_list[1]):
-        self.assertAllClose(l, u_l, atol=1e-04)
-
-      for o, u_o in zip(outputs_list[0], outputs_list[1]):
-        self.assertAllClose(o, u_o, atol=1e-04)
-
-      for s, u_s in zip(state_list[0], state_list[1]):
-        self.assertAllClose(s, u_s, atol=1e-04)
-
-      for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]):
-        self.assertAllClose(b_l, b_u_l, atol=1e-04)
-
-      for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]):
-        self.assertAllClose(b_o, b_u_o, atol=1e-04)
-
-      for b_s, b_u_s in zip(state_list[2], state_list[3]):
-        self.assertAllClose(b_s, b_u_s, atol=1e-04)
-
-  def test_rnn_additional_states(self):
-    # implement a simple RNN
-    num_samples = 4
-    input_dim = 5
-    output_dim = 3
-    timesteps = 6
-
-    input_val = np.random.random(
-        (num_samples, timesteps, input_dim)).astype(np.float32)
-    init_state_val = np.random.random(
-        (num_samples, output_dim)).astype(np.float32)
-    w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32)
-    w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32)
-    np_mask = np.random.randint(2, size=(num_samples, timesteps))
-
-    def rnn_step_fn():
-      w_i = backend.variable(w_i_val)
-      w_o = backend.variable(w_o_val)
-
-      def step_function(x, states):
-        assert len(states) == 2
-        prev_output = states[0]
-        output = backend.dot(x, w_i) + backend.dot(prev_output, w_o)
-        return output, [output, backend.concatenate([output, output], axis=-1)]
-
-      return step_function
-
-    # test default setup
-    last_output_list = [[], [], [], [], [], []]
-    outputs_list = [[], [], [], [], [], []]
-    state_list = [[], [], [], [], [], []]
-    additional_state_list = [[], [], [], [], [], []]
-
-    rnn_fn = rnn_step_fn()
-    inputs = backend.variable(input_val)
-    initial_states = [
-        backend.variable(init_state_val),
-        ops.convert_to_tensor_v2(
-            np.concatenate([init_state_val, init_state_val], axis=-1))
-    ]
-    mask = backend.variable(np_mask)
-
-    kwargs_list = [
-        {'go_backwards': False, 'mask': None},
-        {'go_backwards': False, 'mask': None, 'unroll': True},
-        {'go_backwards': True, 'mask': None},
-        {'go_backwards': True, 'mask': None, 'unroll': True},
-        {'go_backwards': False, 'mask': mask},
-        {'go_backwards': False, 'mask': mask, 'unroll': True},
-    ]
-    for i, kwargs in enumerate(kwargs_list):
-      last_output, outputs, new_states = backend.rnn(rnn_fn, inputs,
-                                                     initial_states, **kwargs)
-      # check static shape inference
-      self.assertEqual(last_output.shape.as_list(), [num_samples, output_dim])
-      self.assertEqual(outputs.shape.as_list(),
-                       [num_samples, timesteps, output_dim])
-      # for state in new_states:
-      #   self.assertEqual(state.shape.as_list(),
-      #                     [num_samples, output_dim])
-      self.assertEqual(new_states[0].shape.as_list(), [num_samples, output_dim])
-      self.assertEqual(new_states[1].shape.as_list(),
-                       [num_samples, 2 * output_dim])
-
-      last_output_list[i].append(backend.eval(last_output))
-      outputs_list[i].append(backend.eval(outputs))
-      self.assertLen(new_states, 2)
-      state_list[i].append(backend.eval(new_states[0]))
-      additional_state_list[i].append(backend.eval(new_states[1]))
-
-      def assert_list_pairwise(z_list, atol=1e-05):
-        for (z1, z2) in zip(z_list[1:], z_list[:-1]):
-          self.assertAllClose(z1, z2, atol=atol)
-
-      assert_list_pairwise(last_output_list[0], atol=1e-04)
-      assert_list_pairwise(outputs_list[0], atol=1e-04)
-      assert_list_pairwise(state_list[0], atol=1e-04)
-      assert_list_pairwise(additional_state_list[0], atol=1e-04)
-      assert_list_pairwise(last_output_list[2], atol=1e-04)
-      assert_list_pairwise(outputs_list[2], atol=1e-04)
-      assert_list_pairwise(state_list[2], atol=1e-04)
-      assert_list_pairwise(additional_state_list[2], atol=1e-04)
-
-      for l, u_l in zip(last_output_list[0], last_output_list[1]):
-        self.assertAllClose(l, u_l, atol=1e-04)
-
-      for o, u_o in zip(outputs_list[0], outputs_list[1]):
-        self.assertAllClose(o, u_o, atol=1e-04)
-
-      for s, u_s in zip(state_list[0], state_list[1]):
-        self.assertAllClose(s, u_s, atol=1e-04)
-
-      for s, u_s in zip(additional_state_list[0], additional_state_list[1]):
-        self.assertAllClose(s, u_s, atol=1e-04)
-
-      for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]):
-        self.assertAllClose(b_l, b_u_l, atol=1e-04)
-
-      for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]):
-        self.assertAllClose(b_o, b_u_o, atol=1e-04)
-
-      for b_s, b_u_s in zip(state_list[2], state_list[3]):
-        self.assertAllClose(b_s, b_u_s, atol=1e-04)
-
-      for s, u_s in zip(additional_state_list[2], additional_state_list[3]):
-        self.assertAllClose(s, u_s, atol=1e-04)
-
-  def test_rnn_output_and_state_masking_independent(self):
-    num_samples = 2
-    num_timesteps = 4
-    state_and_io_size = 2
-    mask_last_num_timesteps = 2  # for second sample only
-
-    # a step function that just outputs inputs,
-    # but increments states +1 per timestep
-    def step_function(inputs, states):
-      return inputs, [s + 1 for s in states]
-
-    inputs_vals = np.random.random((num_samples, num_timesteps,
-                                    state_and_io_size))
-    initial_state_vals = np.random.random((num_samples, state_and_io_size))
-    # masking of two last timesteps for second sample only
-    mask_vals = np.ones((num_samples, num_timesteps))
-    mask_vals[1, -mask_last_num_timesteps:] = 0
-
-    # outputs expected to be same as inputs for the first sample
-    expected_outputs = inputs_vals.copy()
-    # but for the second sample all outputs in masked region should be the same
-    # as last output before masked region
-    expected_outputs[1, -mask_last_num_timesteps:] = \
-        expected_outputs[1, -(mask_last_num_timesteps + 1)]
-
-    expected_last_state = initial_state_vals.copy()
-    # first state should be incremented for every timestep (no masking)
-    expected_last_state[0] += num_timesteps
-    # second state should not be incremented for last two timesteps
-    expected_last_state[1] += (num_timesteps - mask_last_num_timesteps)
-
-    # verify same expected output for `unroll=true/false`
-    inputs = backend.variable(inputs_vals)
-    initial_states = [backend.variable(initial_state_vals)]
-    mask = backend.variable(mask_vals)
-    for unroll in [True, False]:
-      _, outputs, last_states = backend.rnn(
-          step_function,
-          inputs,
-          initial_states,
-          mask=mask,
-          unroll=unroll,
-          input_length=num_timesteps if unroll else None)
-
-      self.assertAllClose(backend.eval(outputs), expected_outputs)
-      self.assertAllClose(backend.eval(last_states[0]), expected_last_state)
-
-  def test_rnn_output_num_dim_larger_than_2_masking(self):
-    num_samples = 3
-    num_timesteps = 4
-    num_features = 5
-
-    def step_function(inputs, states):
-      outputs = backend.tile(backend.expand_dims(inputs), [1, 1, 2])
-      return outputs, [backend.identity(s) for s in states]
-      # Note: cannot just return states (which can be a problem) ->
-      # tensorflow/python/ops/resource_variable_ops.py", line 824, in set_shape
-      # NotImplementedError: ResourceVariable does not implement set_shape()
-
-    inputs_vals = np.random.random((num_samples, num_timesteps, num_features))
-    initial_state_vals = np.random.random((num_samples, 6))
-    mask_vals = np.ones((num_samples, num_timesteps))
-    mask_vals[-1, -1] = 0  # final timestep masked for last sample
-
-    expected_outputs = np.repeat(inputs_vals[..., None], repeats=2, axis=-1)
-    # for the last sample, the final timestep (in masked region) should be the
-    # same as the second to final output (before masked region)
-    expected_outputs[-1, -1] = expected_outputs[-1, -2]
-
-    inputs = backend.variable(inputs_vals)
-    initial_states = [backend.variable(initial_state_vals)]
-    mask = backend.variable(mask_vals)
-    for unroll in [True, False]:
-      _, outputs, _ = backend.rnn(
-          step_function,
-          inputs,
-          initial_states,
-          mask=mask,
-          unroll=unroll,
-          input_length=num_timesteps if unroll else None)
-
-      self.assertAllClose(backend.eval(outputs), expected_outputs)
-
-  def test_rnn_state_num_dim_larger_than_2_masking(self):
-    num_samples = 3
-    num_timesteps = 4
-
-    def step_function(inputs, states):
-      return inputs, [s + 1 for s in states]
-
-    inputs_vals = np.random.random((num_samples, num_timesteps, 5))
-    initial_state_vals = np.random.random((num_samples, 6, 7))
-    mask_vals = np.ones((num_samples, num_timesteps))
-    mask_vals[0, -2:] = 0  # final two timesteps masked for first sample
-
-    expected_last_state = initial_state_vals.copy()
-    expected_last_state[0] += (num_timesteps - 2)
-    expected_last_state[1:] += num_timesteps
-
-    inputs = backend.variable(inputs_vals)
-    initial_states = [backend.variable(initial_state_vals)]
-    mask = backend.variable(mask_vals)
-    for unroll in [True, False]:
-      _, _, last_states = backend.rnn(
-          step_function,
-          inputs,
-          initial_states,
-          mask=mask,
-          unroll=unroll,
-          input_length=num_timesteps if unroll else None)
-
-      self.assertAllClose(backend.eval(last_states[0]), expected_last_state)
-
-  def test_batch_normalization(self):
-    g_val = np.random.random((3,))
-    b_val = np.random.random((3,))
-    gamma = backend.variable(g_val)
-    beta = backend.variable(b_val)
-
-    # 3D NHC case
-    val = np.random.random((10, 5, 3))
-    x = backend.variable(val)
-    mean, var = nn.moments(x, (0, 1), None, None, False)
-    normed = backend.batch_normalization(
-        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 5, 3])
-
-    # 4D NHWC case
-    val = np.random.random((10, 5, 5, 3))
-    x = backend.variable(val)
-    mean, var = nn.moments(x, (0, 1, 2), None, None, False)
-    normed = backend.batch_normalization(
-        x, mean, var, beta, gamma, axis=-1, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 5, 5, 3])
-
-    # 4D NCHW case
-    if not context.executing_eagerly():
-      # Eager CPU kernel for NCHW does not exist.
-      val = np.random.random((10, 3, 5, 5))
-      x = backend.variable(val)
-      mean, var = nn.moments(x, (0, 2, 3), None, None, False)
-      normed = backend.batch_normalization(
-          x, mean, var, beta, gamma, axis=1, epsilon=1e-3)
-      self.assertEqual(normed.shape.as_list(), [10, 3, 5, 5])
-
-  def test_normalize_batch_in_training(self):
-    val = np.random.random((10, 3, 10, 10))
-    x = backend.variable(val)
-    reduction_axes = (0, 2, 3)
-
-    g_val = np.random.random((3,))
-    b_val = np.random.random((3,))
-    gamma = backend.variable(g_val)
-    beta = backend.variable(b_val)
-    normed, mean, var = backend.normalize_batch_in_training(
-        x, gamma, beta, reduction_axes, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10])
-    self.assertEqual(mean.shape.as_list(), [
-        3,
-    ])
-    self.assertEqual(var.shape.as_list(), [
-        3,
-    ])
-
-    # case: gamma=None
-    gamma = None
-    normed, mean, var = backend.normalize_batch_in_training(
-        x, gamma, beta, reduction_axes, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10])
-    self.assertEqual(mean.shape.as_list(), [
-        3,
-    ])
-    self.assertEqual(var.shape.as_list(), [
-        3,
-    ])
-
-    # case: beta=None
-    beta = None
-    normed, mean, var = backend.normalize_batch_in_training(
-        x, gamma, beta, reduction_axes, epsilon=1e-3)
-    self.assertEqual(normed.shape.as_list(), [10, 3, 10, 10])
-    self.assertEqual(mean.shape.as_list(), [
-        3,
-    ])
-    self.assertEqual(var.shape.as_list(), [
-        3,
-    ])
-
-  def test_dropout(self):
-    inputs = array_ops.ones((200, 200))
-    outputs = backend.dropout(inputs, 0.2)
-    outputs_val = backend.eval(outputs)
-    self.assertEqual(np.min(outputs_val), 0)
-    self.assertAllClose(np.count_nonzero(outputs_val), 32000, atol=1000)
-    # Test noise shape
-    outputs = backend.dropout(inputs, 0.2, noise_shape=(200, 1))
-    outputs_val = backend.eval(outputs)
-    self.assertAllClose(outputs_val[2, :], outputs_val[3, :], atol=1e-5)
-
-
-class BackendCrossEntropyLossesTest(test.TestCase, parameterized.TestCase):
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_binary_crossentropy_with_sigmoid(self):
-    t = backend.constant([[0, 1, 0]])
-    logits = backend.constant([[8., 1., 1.]])
-    p = backend.sigmoid(logits)
-    p = array_ops.identity(array_ops.identity(p))
-    result = self.evaluate(backend.binary_crossentropy(t, p))
-    self.assertArrayNear(result[0], [8., 0.313, 1.313], 1e-3)
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_categorical_crossentropy_loss(self):
-    t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-
-    p = backend.constant([[.9, .05, .05], [.05, .89, .06], [.05, .01, .94]])
-    result = backend.categorical_crossentropy(t, p)
-    self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3)
-
-    p = backend.constant([[.9, .05, .05], [.05, .89, .01], [.05, .06, .94]])
-    result = backend.categorical_crossentropy(t, p, axis=0)
-    self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3)
-
-    p = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-    result = backend.categorical_crossentropy(t, p, from_logits=True),
-    self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3)
-
-    p = backend.constant([[8., 0., 2.], [1., 9., 3.], [1., 1., 5.]])
-    result = backend.categorical_crossentropy(t, p, from_logits=True, axis=0),
-    self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3)
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_categorical_crossentropy_loss_with_unknown_rank_tensor(self):
-    t = backend.placeholder()
-    p = backend.placeholder()
-    o = backend.categorical_crossentropy(t, p)
-
-    t_val = ops.convert_to_tensor_v2([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
-    p_val = ops.convert_to_tensor_v2([[.9, .05, .05], [.05, .89, .06],
-                                      [.05, .01, .94]])
-    f = backend.function([t, p], o)
-
-    result = f([t_val, p_val])
-    self.assertArrayNear(result, [.105, .116, .062], 1e-3)
-
-    # With axis set
-    o = backend.categorical_crossentropy(t, p, axis=0)
-    f = backend.function([t, p], o)
-
-    result = f([t_val, p_val])
-    self.assertArrayNear(result, [.105, .065, .111], 1e-3)
-
-    # from logits
-    p_val = ops.convert_to_tensor_v2([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-    o = backend.categorical_crossentropy(t, p, from_logits=True)
-    f = backend.function([t, p], o)
-
-    result = f([t_val, p_val])
-    self.assertArrayNear(result, [.002, 0, .17], 1e-3)
-
-    # from logits and axis set
-    o = backend.categorical_crossentropy(t, p, from_logits=True, axis=0)
-    f = backend.function([t, p], o)
-
-    result = f([t_val, p_val])
-    self.assertArrayNear(result, [.002, .003, .036], 1e-3)
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_categorical_crossentropy_with_softmax(self):
-    t = backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-    logits = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-    p = backend.softmax(logits)
-    p = array_ops.identity(array_ops.identity(p))
-    result = self.evaluate(backend.categorical_crossentropy(t, p))
-    self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3)
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_sparse_categorical_crossentropy_loss(self):
-    t = backend.constant([0, 1, 2])
-
-    p = backend.constant([[.9, .05, .05], [.05, .89, .06], [.05, .01, .94]])
-    result = backend.sparse_categorical_crossentropy(t, p)
-    self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3)
-
-    p = backend.constant([[.9, .05, .05], [.05, .89, .01], [.05, .06, .94]])
-    result = backend.sparse_categorical_crossentropy(t, p, axis=0)
-    self.assertArrayNear(self.evaluate(result), [.105, .116, .062], 1e-3)
-
-    p = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-    result = backend.sparse_categorical_crossentropy(t, p, from_logits=True),
-    self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3)
-
-    p = backend.constant([[8., 0., 2.], [1., 9., 3.], [1., 1., 5.]])
-    result = backend.sparse_categorical_crossentropy(
-        t, p, from_logits=True, axis=0),
-    self.assertArrayNear(self.evaluate(result)[0], [.002, 0, .17], 1e-3)
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_sparse_categorical_crossentropy_loss_with_unknown_rank_tensor(self):
-    t = backend.placeholder()
-    p = backend.placeholder()
-    o = backend.sparse_categorical_crossentropy(t, p)
-
-    t_val = ops.convert_to_tensor_v2([0, 1, 2])
-    p_val = ops.convert_to_tensor_v2([[.9, .05, .05], [.05, .89, .06],
-                                      [.05, .01, .94]])
-    f = backend.function([t, p], o)
-
-    result = f([t_val, p_val])
-    self.assertArrayNear(result, [.105, .116, .062], 1e-3)
-
-    # With axis set
-    with self.assertRaisesRegex(
-        ValueError,
-        'Cannot compute sparse categorical crossentropy with `axis=0`'):
-      o = backend.sparse_categorical_crossentropy(t, p, axis=0)
-      f = backend.function([t, p], o)
-
-      _ = f([t_val, p_val])
-
-    # from logits
-    p_val = ops.convert_to_tensor_v2([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-    o = backend.sparse_categorical_crossentropy(t, p, from_logits=True)
-    f = backend.function([t, p], o)
-
-    result = f([t_val, p_val])
-    self.assertArrayNear(result, [.002, 0, .17], 1e-3)
-
-    # from logits and axis set
-    with self.assertRaisesRegex(
-        ValueError,
-        'Cannot compute sparse categorical crossentropy with `axis=0`'):
-      o = backend.sparse_categorical_crossentropy(
-          t, p, from_logits=True, axis=0)
-      f = backend.function([t, p], o)
-
-      _ = f([t_val, p_val])
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_sparse_categorical_crossentropy_with_softmax(self):
-    t = backend.constant([0, 1, 2])
-    logits = backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-    p = backend.softmax(logits)
-    p = array_ops.identity(array_ops.identity(p))
-    result = self.evaluate(backend.sparse_categorical_crossentropy(t, p))
-    self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3)
-
-
-@test_util.with_control_flow_v2
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class TestCTC(test.TestCase):
-
-  def test_ctc_decode(self):
-    depth = 6
-    seq_len_0 = 5
-    input_prob_matrix_0 = np.asarray(
-        [[0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908],
-         [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517],
-         [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763],
-         [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655],
-         [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878],
-         # Random entry added in at time=5
-         [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671]],
-        dtype=np.float32)
-
-    # len max_time_steps array of batch_size x depth matrices
-    inputs = ([input_prob_matrix_0[t, :][np.newaxis, :]
-               for t in range(seq_len_0)] +  # Pad to max_time_steps = 8
-              2 * [np.zeros((1, depth), dtype=np.float32)])
-
-    inputs = backend.variable(np.asarray(inputs).transpose((1, 0, 2)))
-
-    # batch_size length vector of sequence_lengths
-    input_length = backend.variable(np.array([seq_len_0], dtype=np.int32))
-    # batch_size length vector of negative log probabilities
-    log_prob_truth = np.array([
-        -3.5821197,  # output beam 0
-        -3.777835    # output beam 1
-    ], np.float32)[np.newaxis, :]
-
-    decode_truth = [np.array([1, 0]), np.array([0, 1, 0])]
-    beam_width = 2
-    top_paths = 2
-
-    decode_pred_tf, log_prob_pred_tf = backend.ctc_decode(
-        inputs,
-        input_length,
-        greedy=False,
-        beam_width=beam_width,
-        top_paths=top_paths)
-
-    self.assertEqual(len(decode_pred_tf), top_paths)
-    log_prob_pred = backend.eval(log_prob_pred_tf)
-    for i in range(top_paths):
-      self.assertTrue(
-          np.alltrue(decode_truth[i] == backend.eval(decode_pred_tf[i])))
-    self.assertAllClose(log_prob_truth, log_prob_pred)
-
-  def test_ctc_batch_cost(self):
-    with self.cached_session():
-      label_lens = np.expand_dims(np.asarray([5, 4]), 1)
-      input_lens = np.expand_dims(np.asarray([5, 5]), 1)  # number of timesteps
-      loss_log_probs = [3.34211, 5.42262]
-
-      # dimensions are batch x time x categories
-      labels = np.asarray([[0, 1, 2, 1, 0], [0, 1, 1, 0, -1]])
-      inputs = np.asarray(
-          [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553],
-            [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436],
-            [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688],
-            [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533],
-            [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]],
-           [[0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508],
-            [0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549],
-            [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456],
-            [0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345],
-            [0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]]],
-          dtype=np.float32)
-
-      labels = backend.variable(labels, dtype='int32')
-      inputs = backend.variable(inputs, dtype='float32')
-      input_lens = backend.variable(input_lens, dtype='int32')
-      label_lens = backend.variable(label_lens, dtype='int32')
-      res = backend.eval(
-          backend.ctc_batch_cost(labels, inputs, input_lens, label_lens))
-      self.assertAllClose(res[:, 0], loss_log_probs, atol=1e-05)
-
-      # test when batch_size = 1, that is, one sample only
-      ref = [3.34211]
-      input_lens = np.expand_dims(np.asarray([5]), 1)
-      label_lens = np.expand_dims(np.asarray([5]), 1)
-
-      labels = np.asarray([[0, 1, 2, 1, 0]])
-      inputs = np.asarray(
-          [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], [
-              0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436
-          ], [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688],
-            [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533],
-            [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]]
-          ],
-          dtype=np.float32)
-
-      k_labels = backend.variable(labels, dtype='int32')
-      k_inputs = backend.variable(inputs, dtype='float32')
-      k_input_lens = backend.variable(input_lens, dtype='int32')
-      k_label_lens = backend.variable(label_lens, dtype='int32')
-      res = backend.eval(
-          backend.ctc_batch_cost(k_labels, k_inputs, k_input_lens,
-                                 k_label_lens))
-      self.assertAllClose(res[:, 0], ref, atol=1e-05)
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class TestRandomOps(test.TestCase):
-
-  def test_random_normal(self):
-    np.random.seed(123)
-    x = backend.random_normal((500, 500))
-    val = backend.eval(x)
-    self.assertAllClose(np.mean(val), 0., atol=0.01)
-    self.assertAllClose(np.std(val), 1., atol=0.01)
-
-  def test_random_uniform(self):
-    np.random.seed(123)
-    x = backend.random_uniform((500, 500))
-    val = backend.eval(x)
-    self.assertAllClose(np.mean(val), 0.5, atol=0.01)
-    self.assertAllClose(np.max(val), 1., atol=0.01)
-    self.assertAllClose(np.min(val), 0., atol=0.01)
-
-  def test_random_binomial(self):
-    np.random.seed(123)
-    x = backend.random_binomial((500, 500), p=0.5)
-    self.assertAllClose(np.mean(backend.eval(x)), 0.5, atol=0.01)
-
-  def test_truncated_normal(self):
-    np.random.seed(123)
-    x = backend.truncated_normal((500, 500), mean=0.0, stddev=1.0)
-    x = backend.truncated_normal((1000, 1000), mean=0.0, stddev=1.0)
-    y = backend.eval(x)
-    self.assertAllClose(np.mean(y), 0., atol=0.01)
-    self.assertAllClose(np.std(y), 0.88, atol=0.01)
-    self.assertAllClose(np.max(y), 2., atol=0.01)
-    self.assertAllClose(np.min(y), -2., atol=0.01)
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class FunctionTest(test.TestCase):
-
-  def test_function_basics(self):
-    x1 = backend.placeholder(shape=(), dtype='float32')
-    x2 = backend.placeholder(shape=(), dtype='int32')
-    v = backend.variable(10.)
-
-    y1 = x1 + backend.cast(x2, 'float32') + v
-    y2 = x1 * backend.cast(x2, 'float32')
-
-    with ops.control_dependencies([y1]):
-      u = backend.update(v, x1)
-
-    f = backend.function([x1, x2], [y1, y2], updates=[u])
-    output_values = f([2, 3])
-    self.assertEqual(output_values, [15., 6.])
-    self.assertEqual(backend.eval(v), 2.)
-
-  def test_function_dict_outputs(self):
-    x_ph = backend.placeholder(shape=(), name='x')
-    y_ph = backend.placeholder(shape=(), name='y')
-    outputs = {'x*y': y_ph * x_ph, 'x*x': x_ph * x_ph}
-
-    f = backend.function(inputs=[x_ph, y_ph], outputs=outputs)
-    x, y = 2., 5.
-    results = f([x, y])
-
-    self.assertEqual(results['x*y'], 10.)
-    self.assertEqual(results['x*x'], 4)
-
-  def test_function_dict_inputs(self):
-    placeholders = {
-        'x': backend.placeholder(shape=()),
-        'y': backend.placeholder(shape=())
-    }
-    outputs = [placeholders['x'] * placeholders['y']]
-
-    f = backend.function(inputs=placeholders, outputs=outputs)
-    results = f({'x': 2., 'y': 3.})
-    self.assertEqual(results[0], 6.)
-
-  def test_function_single_input_output(self):
-    x_ph = backend.placeholder(shape=(), name='x')
-    output = x_ph * x_ph
-    f = backend.function(x_ph, output)
-    result = f(2.)
-    self.assertEqual(result, 4.)
-
-  def test_tuple_updates(self):
-    x_ph = backend.placeholder(ndim=2)
-    v = backend.variable(np.ones((4, 2)))
-    output = x_ph ** 2 + v
-    new_v = v + x_ph
-    f = backend.function(x_ph, output, updates=[(v, new_v)])
-    input_val = np.random.random((4, 2))
-    result = f(input_val)
-    self.assertAllClose(result, input_val ** 2 + 1)
-    self.assertAllClose(backend.get_value(v), np.ones((4, 2)) + input_val)
-
-
-class BackendGraphTests(test.TestCase, parameterized.TestCase):
-
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
-  def test_function_placeholder_with_default(self):
-    with backend.get_graph().as_default():
-      x1 = array_ops.placeholder_with_default(
-          np.array(2., dtype='float32'), shape=())
-      x2 = array_ops.placeholder_with_default(
-          np.array(3, dtype='int32'), shape=())
-    y1 = x1 + backend.cast(x2, 'float32')
-    y2 = x1 * backend.cast(x2, 'float32')
-    f = backend.function([x1, x2], [y1, y2])
-    output_values = f([4, 5])
-    self.assertEqual(output_values, [9., 20.])
-    output_values = f([None, None])
-    self.assertEqual(output_values, [5., 6.])
-
-  def test_function_tf_feed_symbols(self):
-    # Test Keras backend functions with TF tensor inputs.
-    with ops.Graph().as_default(), self.cached_session():
-      # Test feeding a resource variable to `function`.
-      x1 = backend.placeholder(shape=())
-      x2 = backend.placeholder(shape=())
-      lr = backend.learning_phase()  # Include a placeholder_with_default.
-
-      y1 = backend.variable(10.)
-      y2 = 3
-
-      f = backend.function(
-          inputs=[x1, x2, lr],
-          outputs=[x1 + 1, backend.in_train_phase(x2 + 2, x2 - 1)])
-      outs = f([y1, y2, None])  # Use default learning_phase value.
-      self.assertEqual(outs, [11., 2.])
-      outs = f([y1, y2, 1])  # Set learning phase value.
-      self.assertEqual(outs, [11., 5.])
-
-      # Test triggering a callable refresh by changing the input.
-      y3 = backend.constant(20.)  # Test with tensor
-      outs = f([y3, y2, None])
-      self.assertEqual(outs, [21., 2.])
-
-      y4 = 4  # Test with non-symbol
-      outs = f([y4, y2, None])
-      self.assertEqual(outs, [5., 2.])
-
-      # Test with a different dtype
-      y5 = backend.constant(10., dtype='float64')
-      outs = f([y5, y2, None])
-      self.assertEqual(outs, [11., 2.])
-
-  def test_function_tf_fetches(self):
-    # Additional operations can be passed to tf.compat.v1.Session().run() via
-    # its `fetches` arguments. In contrast to `updates` argument of
-    # backend.function() these do not have control dependency on `outputs`
-    # so they can run in parallel. Also they should not contribute to output of
-    # backend.function().
-    with ops.Graph().as_default(), self.cached_session():
-      x = backend.variable(0.)
-      y = backend.variable(0.)
-      x_placeholder = backend.placeholder(shape=())
-      y_placeholder = backend.placeholder(shape=())
-
-      f = backend.function(
-          inputs=[x_placeholder, y_placeholder],
-          outputs=[x_placeholder + y_placeholder],
-          updates=[(x, x_placeholder + 1.)],
-          fetches=[backend.update(y, 5.)])
-      output = f([10., 20.])
-      self.assertEqual(output, [30.])
-      self.assertEqual(backend.get_session().run(fetches=[x, y]), [11., 5.])
-
-  def test_function_tf_feed_dict(self):
-    # Additional substitutions can be passed to `tf.compat.v1.Session().run()`
-    # via its `feed_dict` arguments. Note that the feed_dict is passed once in
-    # the constructor but we can modify the values in the dictionary. Through
-    # this feed_dict we can provide additional substitutions besides Keras
-    # inputs.
-    with ops.Graph().as_default(), self.cached_session():
-      x = backend.variable(0.)
-      y = backend.variable(0.)
-      x_placeholder = backend.placeholder(shape=())
-      y_placeholder = backend.placeholder(shape=())
-
-      feed_dict = {y_placeholder: 3.}
-      fetches = [backend.update(y, y_placeholder * 10.)]
-      f = backend.function(
-          inputs=[x_placeholder],
-          outputs=[x_placeholder + 1.],
-          updates=[(x, x_placeholder + 10.)],
-          feed_dict=feed_dict,
-          fetches=fetches)
-      output = f([10.])
-      self.assertEqual(output, [11.])
-      self.assertEqual(backend.get_session().run(fetches=[x, y]), [20., 30.])
-
-      # updated value in feed_dict will be modified within the K.function()
-      feed_dict[y_placeholder] = 4.
-      output = f([20.])
-      self.assertEqual(output, [21.])
-      self.assertEqual(backend.get_session().run(fetches=[x, y]), [30., 40.])
-
-  def test_function_tf_run_options_with_run_metadata(self):
-    with ops.Graph().as_default(), self.cached_session():
-      x_placeholder = backend.placeholder(shape=())
-      y_placeholder = backend.placeholder(shape=())
-
-      run_options = config_pb2.RunOptions(output_partition_graphs=True)
-      run_metadata = config_pb2.RunMetadata()
-      # enable run_options.
-      f = backend.function(
-          inputs=[x_placeholder, y_placeholder],
-          outputs=[x_placeholder + y_placeholder],
-          options=run_options,
-          run_metadata=run_metadata)
-      output = f([10., 20.])
-      self.assertEqual(output, [30.])
-      self.assertNotEmpty(run_metadata.partition_graphs)
-      # disable run_options.
-      f1 = backend.function(
-          inputs=[x_placeholder, y_placeholder],
-          outputs=[x_placeholder + y_placeholder],
-          run_metadata=run_metadata)
-      output1 = f1([10., 20.])
-      self.assertEqual(output1, [30.])
-      self.assertEmpty(run_metadata.partition_graphs)
-
-  def test_function_fetch_callbacks(self):
-
-    class CallbackStub(object):
-
-      def __init__(self):
-        self.times_called = 0
-        self.callback_result = 0
-
-      def _fetch_callback(self, result):
-        self.times_called += 1
-        self.callback_result = result
-
-    with ops.Graph().as_default(), self.cached_session():
-      callback = CallbackStub()
-      x_placeholder = backend.placeholder(shape=())
-      y_placeholder = backend.placeholder(shape=())
-
-      callback_op = x_placeholder * y_placeholder
-
-      f = backend.function(
-          inputs=[x_placeholder, y_placeholder],
-          outputs=[x_placeholder + y_placeholder])
-      f.fetches.append(callback_op)
-      f.fetch_callbacks[callback_op] = callback._fetch_callback
-
-      _ = f([10., 20.])
-
-      self.assertEqual(callback.times_called, 1)
-      self.assertEqual(callback.callback_result, 200)
-
-  def test_get_session_different_graphs(self):
-    with ops.Graph().as_default():
-      x = backend.constant(1)
-      session = backend.get_session()
-      self.assertIs(session, backend.get_session((x,)))
-      self.assertIs(session, backend.get_session())
-    with ops.Graph().as_default():
-      self.assertIs(session, backend.get_session((x,)))
-      self.assertIsNot(session, backend.get_session())
-
-
-@combinations.generate(combinations.combine(mode=['graph', 'eager']))
-class ControlOpsTests(test.TestCase):
-
-  def test_function_switch_basics(self):
-    x = array_ops.constant(2.0)
-    y = array_ops.constant(3.0)
-
-    def xpowy():
-      return backend.pow(x, y)
-
-    def ypowx():
-      return backend.pow(y, x)
-
-    tensor = backend.switch(backend.less(x, y), xpowy, ypowx)
-    self.assertEqual(backend.eval(tensor), [8.0])
-
-    tensor = backend.switch(backend.greater(x, y), xpowy, ypowx)
-    self.assertEqual(backend.eval(tensor), [9.0])
-
-  def test_unequal_rank(self):
-    x = ops.convert_to_tensor_v2(
-        np.array([[1, 2, 3], [4, 5, 6]]), dtype='float32')
-    y = ops.convert_to_tensor_v2(np.array([1, 2, 3]), dtype='float32')
-
-    def true_func():
-      return x
-
-    def false_func():
-      return y
-
-    with self.assertRaisesRegexp(ValueError,
-                                 'Rank of `condition` should be less than'):
-      backend.switch(backend.equal(x, x), false_func, true_func)
-
-
-class ContextValueCacheTest(test.TestCase):
-
-  def test_cache(self):
-    cache = backend.ContextValueCache(list)
-    graph1 = ops.Graph()
-    graph2 = ops.Graph()
-
-    cache[graph1].append(1)
-    with graph1.as_default():
-      cache[None].append(2)
-
-    with graph2.as_default():
-      cache[None].append(3)
-    cache[graph2].append(4)
-
-    self.assertAllEqual(cache[graph1], [1, 2])
-    self.assertAllEqual(cache[graph2], [3, 4])
-
-    with context.eager_mode():
-      cache[None].append(5)
-      cache[None].append(6)
-      self.assertAllEqual(cache[None], [5, 6])
-
-    self.assertLen(cache, 3)
-
-    del graph1
-    gc.collect()
-    self.assertLen(cache, 2)
-
-  def test_cache_in_parent_graph(self):
-    cache = backend.ContextValueCache(int)
-    cache.setdefault(None, backend.constant(5))
-
-    with ops.Graph().as_default() as g:
-      # g is not a child graph of the default test context, so the recursive
-      # lookup will create a new default value.
-      self.assertAllEqual(cache[g], 0)
-
-    @def_function.function
-    def fn():
-      # The function graph is a child of the default test context, so
-      # __getitem__ will return the previously saved value.
-      return cache[ops.get_default_graph()]
-
-    self.assertEqual(self.evaluate(fn()), 5)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/frozen_keras/constraints.py b/tensorflow/python/frozen_keras/constraints.py
deleted file mode 100644
index b7716a5eae3..00000000000
--- a/tensorflow/python/frozen_keras/constraints.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=invalid-name
-# pylint: disable=g-classes-have-attributes
-"""Constraints: functions that impose constraints on weight values."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import six
-
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.frozen_keras import backend as K
-from tensorflow.python.frozen_keras.utils.generic_utils import deserialize_keras_object
-from tensorflow.python.frozen_keras.utils.generic_utils import serialize_keras_object
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-
-
-class Constraint(object):
-
-  def __call__(self, w):
-    return w
-
-  def get_config(self):
-    return {}
-
-
-class MaxNorm(Constraint):
-  """MaxNorm weight constraint.
-
-  Constrains the weights incident to each hidden unit
-  to have a norm less than or equal to a desired value.
-
-  Arguments:
-      m: the maximum norm for the incoming weights.
-      axis: integer, axis along which to calculate weight norms.
-          For instance, in a `Dense` layer the weight matrix
-          has shape `(input_dim, output_dim)`,
-          set `axis` to `0` to constrain each weight vector
-          of length `(input_dim,)`.
-          In a `Conv2D` layer with `data_format="channels_last"`,
-          the weight tensor has shape
-          `(rows, cols, input_depth, output_depth)`,
-          set `axis` to `[0, 1, 2]`
-          to constrain the weights of each filter tensor of size
-          `(rows, cols, input_depth)`.
-
-  """
-
-  def __init__(self, max_value=2, axis=0):
-    self.max_value = max_value
-    self.axis = axis
-
-  def __call__(self, w):
-    norms = K.sqrt(
-        math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True))
-    desired = K.clip(norms, 0, self.max_value)
-    return w * (desired / (K.epsilon() + norms))
-
-  def get_config(self):
-    return {'max_value': self.max_value, 'axis': self.axis}
-
-
-class NonNeg(Constraint):
-  """Constrains the weights to be non-negative.
-  """
-
-  def __call__(self, w):
-    return w * math_ops.cast(math_ops.greater_equal(w, 0.), K.floatx())
-
-
-class UnitNorm(Constraint):
-  """Constrains the weights incident to each hidden unit to have unit norm.
-
-  Arguments:
-      axis: integer, axis along which to calculate weight norms.
-          For instance, in a `Dense` layer the weight matrix
-          has shape `(input_dim, output_dim)`,
-          set `axis` to `0` to constrain each weight vector
-          of length `(input_dim,)`.
-          In a `Conv2D` layer with `data_format="channels_last"`,
-          the weight tensor has shape
-          `(rows, cols, input_depth, output_depth)`,
-          set `axis` to `[0, 1, 2]`
-          to constrain the weights of each filter tensor of size
-          `(rows, cols, input_depth)`.
-  """
-
-  def __init__(self, axis=0):
-    self.axis = axis
-
-  def __call__(self, w):
-    return w / (
-        K.epsilon() + K.sqrt(
-            math_ops.reduce_sum(
-                math_ops.square(w), axis=self.axis, keepdims=True)))
-
-  def get_config(self):
-    return {'axis': self.axis}
-
-
-class MinMaxNorm(Constraint):
-  """MinMaxNorm weight constraint.
-
-  Constrains the weights incident to each hidden unit
-  to have the norm between a lower bound and an upper bound.
-
-  Arguments:
-      min_value: the minimum norm for the incoming weights.
-      max_value: the maximum norm for the incoming weights.
-      rate: rate for enforcing the constraint: weights will be
-          rescaled to yield
-          `(1 - rate) * norm + rate * norm.clip(min_value, max_value)`.
-          Effectively, this means that rate=1.0 stands for strict
-          enforcement of the constraint, while rate<1.0 means that
-          weights will be rescaled at each step to slowly move
-          towards a value inside the desired interval.
-      axis: integer, axis along which to calculate weight norms.
-          For instance, in a `Dense` layer the weight matrix
-          has shape `(input_dim, output_dim)`,
-          set `axis` to `0` to constrain each weight vector
-          of length `(input_dim,)`.
-          In a `Conv2D` layer with `data_format="channels_last"`,
-          the weight tensor has shape
-          `(rows, cols, input_depth, output_depth)`,
-          set `axis` to `[0, 1, 2]`
-          to constrain the weights of each filter tensor of size
-          `(rows, cols, input_depth)`.
-  """
-
-  def __init__(self, min_value=0.0, max_value=1.0, rate=1.0, axis=0):
-    self.min_value = min_value
-    self.max_value = max_value
-    self.rate = rate
-    self.axis = axis
-
-  def __call__(self, w):
-    norms = K.sqrt(
-        math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True))
-    desired = (
-        self.rate * K.clip(norms, self.min_value, self.max_value) +
-        (1 - self.rate) * norms)
-    return w * (desired / (K.epsilon() + norms))
-
-  def get_config(self):
-    return {
-        'min_value': self.min_value,
-        'max_value': self.max_value,
-        'rate': self.rate,
-        'axis': self.axis
-    }
-
-
-class RadialConstraint(Constraint):
-  """Constrains `Conv2D` kernel weights to be the same for each radius.
-
-  For example, the desired output for the following 4-by-4 kernel::
-
-  ```
-      kernel = [[v_00, v_01, v_02, v_03],
-                [v_10, v_11, v_12, v_13],
-                [v_20, v_21, v_22, v_23],
-                [v_30, v_31, v_32, v_33]]
-  ```
-
-  is this::
-
-  ```
-      kernel = [[v_11, v_11, v_11, v_11],
-                [v_11, v_33, v_33, v_11],
-                [v_11, v_33, v_33, v_11],
-                [v_11, v_11, v_11, v_11]]
-  ```
-
-  This constraint can be applied to any `Conv2D` layer version, including
-  `Conv2DTranspose` and `SeparableConv2D`, and with either `"channels_last"` or
-  `"channels_first"` data format. The method assumes the weight tensor is of
-  shape `(rows, cols, input_depth, output_depth)`.
-  """
-
-  def __call__(self, w):
-    w_shape = w.shape
-    if w_shape.rank is None or w_shape.rank != 4:
-      raise ValueError(
-          'The weight tensor must be of rank 4, but is of shape: %s' % w_shape)
-
-    height, width, channels, kernels = w_shape
-    w = K.reshape(w, (height, width, channels * kernels))
-    # TODO(cpeter): Switch map_fn for a faster tf.vectorized_map once K.switch
-    # is supported.
-    w = K.map_fn(
-        self._kernel_constraint,
-        K.stack(array_ops.unstack(w, axis=-1), axis=0))
-    return K.reshape(K.stack(array_ops.unstack(w, axis=0), axis=-1),
-                     (height, width, channels, kernels))
-
-  def _kernel_constraint(self, kernel):
-    """Radially constraints a kernel with shape (height, width, channels)."""
-    padding = K.constant([[1, 1], [1, 1]], dtype='int32')
-
-    kernel_shape = K.shape(kernel)[0]
-    start = K.cast(kernel_shape / 2, 'int32')
-
-    kernel_new = K.switch(
-        K.cast(math_ops.floormod(kernel_shape, 2), 'bool'),
-        lambda: kernel[start - 1:start, start - 1:start],
-        lambda: kernel[start - 1:start, start - 1:start] + K.zeros(  # pylint: disable=g-long-lambda
-            (2, 2), dtype=kernel.dtype))
-    index = K.switch(
-        K.cast(math_ops.floormod(kernel_shape, 2), 'bool'),
-        lambda: K.constant(0, dtype='int32'),
-        lambda: K.constant(1, dtype='int32'))
-    while_condition = lambda index, *args: K.less(index, start)
-
-    def body_fn(i, array):
-      return i + 1, array_ops.pad(
-          array,
-          padding,
-          constant_values=kernel[start + i, start + i])
-
-    _, kernel_new = control_flow_ops.while_loop(
-        while_condition,
-        body_fn,
-        [index, kernel_new],
-        shape_invariants=[index.get_shape(),
-                          tensor_shape.TensorShape([None, None])])
-    return kernel_new
-
-
-# Aliases.
-
-max_norm = MaxNorm
-non_neg = NonNeg
-unit_norm = UnitNorm
-min_max_norm = MinMaxNorm
-radial_constraint = RadialConstraint
-
-# Legacy aliases.
-maxnorm = max_norm
-nonneg = non_neg
-unitnorm = unit_norm
-
-
-def serialize(constraint):
-  return serialize_keras_object(constraint)
-
-
-def deserialize(config, custom_objects=None):
-  return deserialize_keras_object(
-      config,
-      module_objects=globals(),
-      custom_objects=custom_objects,
-      printable_module_name='constraint')
-
-
-def get(identifier):
-  if identifier is None:
-    return None
-  if isinstance(identifier, dict):
-    return deserialize(identifier)
-  elif isinstance(identifier, six.string_types):
-    config = {'class_name': str(identifier), 'config': {}}
-    return deserialize(config)
-  elif callable(identifier):
-    return identifier
-  else:
-    raise ValueError('Could not interpret constraint identifier: ' +
-                     str(identifier))
diff --git a/tensorflow/python/frozen_keras/engine/BUILD b/tensorflow/python/frozen_keras/engine/BUILD
deleted file mode 100644
index 337e27079de..00000000000
--- a/tensorflow/python/frozen_keras/engine/BUILD
+++ /dev/null
@@ -1,151 +0,0 @@
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
-
-package(
-    default_visibility = ["//tensorflow:__subpackages__"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-#TODO(scottzhu): Cleanup all the deps to python/keras
-
-py_library(
-    name = "legacy_base_layer",
-    srcs = ["legacy_base_layer.py"],
-    deps = [
-        ":base_layer_utils",
-        ":input_spec",
-        ":node",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:auto_control_deps",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:func_graph",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resource_variable_ops",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python:tensor_util",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/autograph/core",
-        "//tensorflow/python/autograph/impl",
-        "//tensorflow/python/distribute:distribute_lib",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/eager:execute",
-        "//tensorflow/python/eager:function",
-        "//tensorflow/python/frozen_keras:backend",
-        "//tensorflow/python/frozen_keras:constraint",
-        "//tensorflow/python/frozen_keras:initializers",
-        "//tensorflow/python/frozen_keras:regularizers",
-        "//tensorflow/python/frozen_keras/utils:generic_utils",
-        "//tensorflow/python/frozen_keras/utils:layer_utils",
-        "//tensorflow/python/frozen_keras/utils:tf_utils",
-        "//tensorflow/python/keras:metrics",
-        "//tensorflow/python/module",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//tensorflow/python/training/tracking",
-        "//tensorflow/python/training/tracking:base",
-        "//tensorflow/python/training/tracking:data_structures",
-        "//tensorflow/python/training/tracking:layer_utils",
-        "//tensorflow/tools/docs:doc_controls",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
-py_library(
-    name = "base_layer_utils",
-    srcs = ["base_layer_utils.py"],
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:control_flow_util",
-        "//tensorflow/python:control_flow_v2_func_graphs",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:init_ops_v2",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_util",
-        "//tensorflow/python:tf2",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/distribute:distribute_lib",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/frozen_keras:backend",
-        "//tensorflow/python/training/tracking:base",
-    ],
-)
-
-py_library(
-    name = "input_spec",
-    srcs = ["input_spec.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:lib",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python/frozen_keras:backend",
-        "@six_archive//:six",
-    ],
-)
-
-py_library(
-    name = "node",
-    srcs = ["node.py"],
-    deps = [
-        ":base_layer_utils",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/frozen_keras:backend",
-    ],
-)
-
-tf_py_test(
-    name = "legacy_base_layer_test",
-    size = "medium",
-    srcs = ["legacy_base_layer_test.py"],
-    python_version = "PY3",
-    shard_count = 8,
-    tags = [
-        "no_rocm",
-        "nomac",  # TODO(mihaimaruseac): b/127695564
-    ],
-    deps = [
-        ":legacy_base_layer",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "base_layer_utils_test",
-    srcs = ["base_layer_utils_test.py"],
-    python_version = "PY3",
-    tags = [
-        "nomac",  # TODO(mihaimaruseac): b/127695564
-    ],
-    deps = [
-        ":base_layer_utils",
-        "//tensorflow/python:client_testlib",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "input_spec_test",
-    size = "small",
-    srcs = ["input_spec_test.py"],
-    python_version = "PY3",
-    tags = [
-        "nomac",  # TODO(mihaimaruseac): b/127695564
-    ],
-    deps = [
-        ":input_spec",
-        "//tensorflow/python:client_testlib",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
diff --git a/tensorflow/python/frozen_keras/engine/base_layer_utils.py b/tensorflow/python/frozen_keras/engine/base_layer_utils.py
deleted file mode 100644
index 897e4bedb4c..00000000000
--- a/tensorflow/python/frozen_keras/engine/base_layer_utils.py
+++ /dev/null
@@ -1,781 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains private utilities used mainly by the base Layer class."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import threading
-
-from tensorflow.python import tf2
-from tensorflow.python.distribute import distribution_strategy_context
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_util
-from tensorflow.python.ops import control_flow_v2_func_graphs
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import init_ops_v2
-from tensorflow.python.ops import variables as tf_variables
-from tensorflow.python.training.tracking import base as tracking
-from tensorflow.python.util import nest
-from tensorflow.python.util import tf_contextlib
-
-_call_context = threading.local()
-
-
-def make_variable(name,
-                  shape=None,
-                  dtype=dtypes.float32,
-                  initializer=None,
-                  trainable=None,
-                  caching_device=None,
-                  validate_shape=True,
-                  constraint=None,
-                  use_resource=None,
-                  collections=None,
-                  synchronization=tf_variables.VariableSynchronization.AUTO,
-                  aggregation=tf_variables.VariableAggregation.NONE,
-                  partitioner=None):  # pylint: disable=unused-argument
-  """Temporary util to create a variable (relies on `variable_scope.variable`).
-
-  Some reuse-related technicalities prevent us from using
-  `variable_scope.get_variable()` directly, so we use a subcomponent
-  that has fewer constraints (`variable_scope.variable()`).
-
-  In the longer term, it seems like a similar "default variable creator" method
-  should exist in `Trackable` instead. When this happens, we can get
-  rid of this temporary solution.
-
-  TODO(fchollet): remove this method when no longer needed.
-
-  Arguments:
-    name: Variable name.
-    shape: Variable shape.
-    dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
-    initializer: Initializer instance (callable).
-    trainable: Whether the variable should be part of the layer's
-      "trainable_variables" (e.g. variables, biases)
-      or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
-      Note, if the current variable scope is marked as non-trainable
-      then this parameter is ignored and any added variables are also
-      marked as non-trainable. `trainable` defaults to `True` unless
-      `synchronization` is set to `ON_READ`.
-    caching_device: Passed to `tf.Variable`.
-    validate_shape: Passed to `tf.Variable`.
-    constraint: Constraint instance (callable).
-    use_resource: Whether to use a `ResourceVariable`.
-    collections: List of graph collections keys. The new variable is added to
-      these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
-    synchronization: Indicates when a distributed a variable will be
-      aggregated. Accepted values are constants defined in the class
-      `tf.VariableSynchronization`. By default the synchronization is set to
-      `AUTO` and the current `DistributionStrategy` chooses
-      when to synchronize. If `synchronization` is set to `ON_READ`,
-      `trainable` must not be set to `True`.
-    aggregation: Indicates how a distributed variable will be aggregated.
-      Accepted values are constants defined in the class
-      `tf.VariableAggregation`.
-    partitioner: Not handled at this time.
-
-  Returns:
-    Variable instance.
-  """
-  initializing_from_value = False
-  if initializer is not None and not callable(initializer):
-    initializing_from_value = True
-
-  if initializing_from_value:
-    init_val = initializer
-    variable_dtype = None
-  else:
-    # Instantiate initializer if provided initializer is a type object.
-    if isinstance(
-        initializer,
-        (type(init_ops.Initializer), type(init_ops_v2.Initializer))):
-      initializer = initializer()
-    init_val = lambda: initializer(shape, dtype=dtype)
-    variable_dtype = dtype.base_dtype
-  if use_resource is None:
-    use_resource = True
-
-  # TODO(apassos,rohanj) figure out how to remove collections from here so we
-  # can remove the V1.
-  variable_shape = tensor_shape.TensorShape(shape)
-  return tf_variables.VariableV1(
-      initial_value=init_val,
-      name=name,
-      trainable=trainable,
-      caching_device=caching_device,
-      dtype=variable_dtype,
-      validate_shape=validate_shape,
-      constraint=constraint,
-      use_resource=use_resource,
-      collections=collections,
-      synchronization=synchronization,
-      aggregation=aggregation,
-      shape=variable_shape if variable_shape else None)
-
-
-def collect_previous_mask(input_tensors):
-  """Retrieves the output mask(s) of the previous node.
-
-  Arguments:
-      input_tensors: An arbitrary structure of Tensors.
-
-  Returns:
-      A mask tensor or list of mask tensors.
-  """
-
-  def _collect_previous_mask(x):
-    return getattr(x, '_keras_mask', None)
-
-  return nest.map_structure(_collect_previous_mask, input_tensors)
-
-
-def have_all_keras_metadata(tensors):
-  return all(hasattr(x, '_keras_history') for x in nest.flatten(tensors))
-
-
-def generate_placeholders_from_shape(shape):
-  return array_ops.placeholder(shape=shape, dtype=backend.floatx())
-
-
-def create_keras_history(tensors):
-  """Wraps TensorFlow Operations for compatibility with the Functional API.
-
-  This method checks to see if a Tensor in `tensors` is missing Keras metadata
-  and has its origin in a Keras `Input` Layer. If so, this method will replace
-  the raw TensorFlow Operations that created this tensor with
-  `TensorFlowOpLayer` instances that create identical operations.
-
-  Any Tensors not originating from a Keras `Input` Layer will be treated as
-  constants when constructing `TensorFlowOpLayer` instances.
-
-  Arguments:
-    tensors: A structure of Tensors, some of which come from raw TensorFlow
-      operations and need to have Keras metadata assigned to them.
-
-  Returns:
-    created_layers: List. The `TensorFlowOpLayer` instances created to wrap
-      the raw Tensorflow operations.
-  """
-  _, created_layers = _create_keras_history_helper(tensors, set(), [])
-  return created_layers
-
-
-def _create_keras_history_helper(tensors, processed_ops, created_layers):
-  """Helper method for `create_keras_history`.
-
-  Arguments:
-    tensors: A structure of Tensors for which to create Keras metadata.
-    processed_ops: Set. TensorFlow operations that have already been wrapped in
-      `TensorFlowOpLayer` instances.
-    created_layers: List. The `TensorFlowOpLayer` instances created.
-
-  Returns:
-    Tuple. First element is the updated set of TensorFlow Operations that
-    have been wrapped in `TensorFlowOpLayer` instances. Second element is
-    a list of the `TensorFlowOpLayer` instances created.
-  """
-  # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
-  # Cannot be imported at top because of circular dependencies.
-  # TODO(omalleyt): Resolve circular dependency.
-  from tensorflow.python.frozen_keras.engine import legacy_base_layer as base_layer  # pylint: disable=g-import-not-at-top
-  tensor_list = nest.flatten(tensors)
-  for tensor in tensor_list:
-    if getattr(tensor, '_keras_history', None) is not None:
-      continue
-    op = tensor.op  # The Op that created this Tensor.
-    if op not in processed_ops:
-      if op.type.startswith('Sparse'):
-        lambda_example = """
-        weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights)
-        output = tf.keras.layers.Lambda(weights_mult)(input)
-        """
-        raise ValueError(
-            'Sparse ops are not supported with functional models with built-in '
-            'layer wrapping. Please wrap the sparse ops in a Lambda layer like'
-            ': \n{lambda_example}\n'.format(lambda_example=lambda_example))
-
-      # Recursively set `_keras_history`.
-      op_inputs = list(op.inputs)
-      constants = {}
-      layer_inputs = []
-      for i, op_input in enumerate(op_inputs):
-        if uses_keras_history(op_input):
-          layer_inputs.append(op_input)
-        else:
-          # Treat any value not originating from a `keras.Input` as
-          # a constant. Variables cannot be supported.
-          ds_with_session = (
-              distribution_strategy_context.in_cross_replica_context() and
-              not ops.executing_eagerly_outside_functions())
-          using_xla = control_flow_util.GraphOrParentsInXlaContext(
-              ops.get_default_graph())
-          if ds_with_session or using_xla:
-            # In Legacy Graph mode, evaluating here makes Session be
-            # configured improperly. The downside of this is that saving
-            # via `get_config` breaks, but SavedModel still works.
-            constants[i] = op_input
-          else:
-            with ops.init_scope():
-              constants[i] = backend.function([], op_input)([])
-      layer_inputs = unnest_if_single_tensor(layer_inputs)
-      processed_ops, created_layers = _create_keras_history_helper(
-          layer_inputs, processed_ops, created_layers)
-      name = op.name
-      node_def = op.node_def.SerializeToString()
-      op_layer = base_layer.TensorFlowOpLayer(
-          node_def, constants=constants, name=name)
-      created_layers.append(op_layer)
-      op_layer._add_inbound_node(  # pylint: disable=protected-access
-          layer_inputs, op.outputs)
-      processed_ops.update([op])
-  return processed_ops, created_layers
-
-
-def unnest_if_single_tensor(input_tensors):
-  # Preserve compatibility with older configs
-  flat_input_tensors = nest.flatten(input_tensors)
-  # If this is a single element but not a dict, unwrap. If this is a dict,
-  # assume the first layer expects a dict (as is the case with a
-  # DenseFeatures layer); pass through.
-  if not isinstance(input_tensors, dict) and len(flat_input_tensors) == 1:
-    input_tensors = flat_input_tensors[0]
-  return input_tensors
-
-
-def needs_keras_history(tensors, ignore_call_context=False):
-  """Check if any Tensors need to be wrapped in TensorFlowOpLayers.
-
-  This will never return True inside a sublayer, because sublayers
-  do not need to create Keras History. Otherwise, this returns True
-  if one or more of `tensors` originates from a `keras.Input` and
-  does not have `_keras_history` set.
-
-  Arguments:
-    tensors: An arbitrary nested structure of Tensors.
-    ignore_call_context: Whether to ignore the check of if currently
-      outside of a `call` context. This is `True` when creating
-      KerasHistory inside `Node`, where we always know that Tensors
-      are being used with the Functional API.
-
-  Returns:
-    Bool, whether at least one Tensor needs to be wrapped.
-  """
-  input_tensors = nest.flatten(tensors)
-  if call_context().in_call and not ignore_call_context:
-    return False
-  if all(
-      getattr(tensor, '_keras_history', None) is not None
-      for tensor in input_tensors):
-    # KerasHistory already set.
-    return False
-  return uses_keras_history(tensors)
-
-
-def is_in_keras_graph():
-  """Returns if currently executing inside of a Keras graph."""
-  return call_context().in_keras_graph
-
-
-def is_in_eager_or_tf_function():
-  """Returns if in eager mode or inside of a tf.function."""
-  return context.executing_eagerly() or is_in_tf_function()
-
-
-def is_in_tf_function():
-  """Returns if inside of a tf.function."""
-  # Check if running in V1 graph mode.
-  if not ops.executing_eagerly_outside_functions():
-    return False
-  if not ops.inside_function():
-    return False
-  # Check if inside Keras FuncGraph.
-  if is_in_keras_graph():
-    return False
-  # Check for a v1 `wrap_function` FuncGraph.
-  graph = ops.get_default_graph()
-  if (getattr(graph, 'name', False) and
-      graph.name.startswith('wrapped_function')):
-    return False
-  return True
-
-
-def uses_keras_history(tensors):
-  """Check if at least one Tensor originates from a `keras.Input`.
-
-  This is `True` if at least one Tensor has its origin in a `keras.Input`.
-  Any Tensor that originates from a `keras.Input` will have a dependency
-  Tensor with a `_keras_history` attribute attached. Tensors that have
-  already been checked to not originate from a `keras.Input`
-  are marked as `_keras_history_checked`.
-
-  Arguments:
-    tensors: An arbitrary nested structure of Tensors.
-
-  Returns:
-    Bool, whether at least one Tensor originates from a `keras.Input`.
-  """
-  checked_tensors = set()
-  tensors_to_check = nest.flatten(tensors)
-
-  while tensors_to_check:
-    new_tensors_to_check = []
-    for tensor in tensors_to_check:
-      if id(tensor) in checked_tensors:
-        continue
-
-      checked_tensors.add(id(tensor))
-
-      if getattr(tensor, '_keras_history_checked', None) is not None:
-        continue
-      if getattr(tensor, '_keras_history', None) is not None:
-        return True
-
-      try:
-        new_tensors_to_check.extend(tensor.op.inputs)
-      except AttributeError:
-        # In case `tensor` is a Variable created in an Eager context.
-        pass
-
-    tensors_to_check = new_tensors_to_check
-
-  # Mark that these Tensors have been checked once for `_keras_history`,
-  # and should not be checked again for performance reasons.
-  mark_checked(tensors)
-  return False
-
-
-def mark_checked(tensors):
-  """Marks that these Tensors should not be tracked.
-
-  This prevents Layers from attempting to create TensorFlowOpLayers
-  for these Tensors.
-
-  Arguments:
-    tensors: An arbitrary structure of Tensors.
-  """
-
-  def _mark_checked(tensor):
-    tensor._keras_history_checked = True  # pylint: disable=protected-access
-
-  nest.map_structure(_mark_checked, tensors)
-
-
-def call_context():
-  """Returns currently active `CallContext`."""
-  if getattr(_call_context, 'call_context', None) is None:
-    _call_context.call_context = CallContext()
-  return _call_context.call_context
-
-
-class CallContext(object):
-  """Keeps track of properties currently inside a Layer/Model's `call`.
-
-  Attributes:
-    layer: The `Layer` whose `call` is currently active.
-    inputs: The inputs to the currently active `Layer`.
-    frozen: Whether currently executing inside a `Layer` with `trainable` set to
-      `False`.
-    in_call: Whether currently inside the `call` of a Layer.
-    training: Whether currently executing in training or inference mode.
-    in_keras_graph: Whether executing inside the Keras Graph.
-    saving: Whether currently saving to SavedModel.
-  """
-
-  def __init__(self):
-    self.layer = None
-    self.inputs = None
-    self.frozen = False
-    self.in_call = False
-    self.training = None
-    self._in_keras_graph = False
-    self.saving = False
-
-  @tf_contextlib.contextmanager
-  def enter(self, layer, inputs, build_graph, training, saving=None):
-    """Push a Layer and its inputs and state onto the current call context."""
-    prev_layer = self.layer
-    prev_inputs = self.inputs
-    prev_frozen = self.frozen
-    prev_in_call = self.in_call
-    prev_training = self.training
-    prev_in_keras_graph = self._in_keras_graph
-    prev_saving = self.saving
-
-    self.layer = layer
-    self.inputs = inputs
-    self.frozen = self.frozen or not layer.trainable
-    self.in_call = True
-    self.training = training
-    self._in_keras_graph = (
-        self._in_keras_graph or
-        (build_graph and
-         getattr(backend.get_graph(), 'name', None) == 'keras_graph'))
-    self.saving = prev_saving if saving is None else saving
-
-    try:
-      yield
-    finally:
-      self.layer = prev_layer
-      self.inputs = prev_inputs
-      self.frozen = prev_frozen
-      self.in_call = prev_in_call
-      self.training = prev_training
-      self._in_keras_graph = prev_in_keras_graph
-      self.saving = prev_saving
-
-  @property
-  def in_keras_graph(self):
-    # Returns True even if in a subgraph of the Keras graph, such as those
-    # created by control flow ops.
-    if context.executing_eagerly():
-      return False
-    return (self._in_keras_graph or
-            getattr(backend.get_graph(), 'name', None) == 'keras_graph')
-
-
-def training_arg_passed_to_call(argspec, args, kwargs):
-  """Returns whether a user passed the `training` argument in `__call__`."""
-  # `argspec.args` starts with ['self', 'inputs']
-  full_args = dict(zip(argspec.args[2:], args))
-  full_args.update(kwargs)
-  return 'training' in full_args and full_args['training'] is not None
-
-
-def autocast_context_manager(dtype):
-  """Returns a context manager to autocast AutoCastVariables.
-
-  Under this context manager, AutoCastVariables will be casted to `dtype` if
-  `dtype` is floating-point. Otherwise, AutoCastVariables will not be casted.
-
-  Args:
-    dtype: The dtype to cast AutoCastVariables to, or None.
-
-  Returns:
-    A context manager to automatically cast AutoCastVariables.
-  """
-  if dtype and not dtypes.as_dtype(dtype).is_floating:
-    dtype = None
-  return ops.get_default_graph()._enable_auto_casting_variables(dtype)  # pylint: disable=protected-access
-
-
-def is_subclassed(layer):
-  """Returns True if the object is a subclassed layer or subclassed model."""
-  return (layer.__module__.find('keras.engine') == -1 and
-          layer.__module__.find('keras.layers') == -1)
-
-
-def from_saved_model(layer):
-  """Returns whether the layer is loaded from a SavedModel."""
-  return layer.__module__.find('keras.saving.saved_model') != -1
-
-
-def check_graph_consistency(tensor=None, method='add_loss', force_raise=False):
-  """Checks that tensors passed to `add_*` method match the Keras graph.
-
-  When one of the `add_*` method is called inside a V2 conditional branch,
-  the underlying tensor gets created in a FuncGraph managed by control_flow_v2.
-  We need to raise clear error messages in such cases.
-
-  Arguments:
-    tensor: Tensor to check, or `False` if it is known that an error
-      should be raised.
-    method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}.
-    force_raise: If an error should be raised regardless of `tensor`.
-
-  Raises:
-    RuntimeError: In case of an out-of-graph tensor.
-  """
-  if (force_raise or
-      (ops.executing_eagerly_outside_functions() and
-       hasattr(tensor, 'graph') and
-       isinstance(tensor.graph,
-                  (control_flow_v2_func_graphs.CondBranchFuncGraph,
-                   control_flow_v2_func_graphs.WhileCondFuncGraph,
-                   control_flow_v2_func_graphs.WhileBodyFuncGraph)))):
-    if method == 'activity_regularizer':
-      bad_example = """
-      class TestModel(tf.keras.Model):
-
-        def __init__(self):
-          super(TestModel, self).__init__(name='test_model')
-          self.dense = tf.keras.layers.Dense(2, activity_regularizer='l2')
-
-        def call(self, x, training=None):
-          if training:
-            return self.dense(x)
-          else:
-            return self.dense(x)
-      """
-      correct_example = """
-      class TestModel(tf.keras.Model):
-
-        def __init__(self):
-          super(TestModel, self).__init__(name='test_model')
-          self.dense = tf.keras.layers.Dense(2, activity_regularizer='l2')
-
-        def call(self, x, training=None):
-          return self.dense(x)
-      """
-      raise RuntimeError(
-          'You are using a layer with `activity_regularizer` in a control flow '
-          'branch, e.g.:\n{bad_example}\nThis is currently not supported. '
-          'Please move your call to the layer with `activity_regularizer` out '
-          'of the control flow branch, e.g.:\n{correct_example}\n'
-          'You can also resolve this by marking your outer model/layer dynamic'
-          ' (eager-only) by passing `dynamic=True` to the layer constructor. '
-          'Any kind of control flow is supported with dynamic layers. '
-          'Note that using `dynamic=True` requires you to implement static '
-          'shape inference in the `compute_output_shape(input_shape)` '
-          'method.'.format(
-              bad_example=bad_example, correct_example=correct_example))
-
-    if method == 'add_metric':
-      bad_example = """
-      def call(self, inputs, training=None):
-        if training:
-          metric = compute_metric(inputs)
-          self.add_metric(metric, name='my_metric', aggregation='mean')
-        return inputs
-      """
-      correct_example = """
-      def call(self, inputs, training=None):
-        if training:
-          metric = compute_metric(inputs)
-        else:
-          metric = 0.
-        self.add_metric(metric, name='my_metric', aggregation='mean')
-        return inputs
-      """
-    elif method == 'add_loss':
-      bad_example = """
-      def call(self, inputs, training=None):
-        if training:
-          loss = compute_loss(inputs)
-          self.add_loss(loss)
-        return inputs
-      """
-      correct_example = """
-      def call(self, inputs, training=None):
-        if training:
-          loss = compute_loss(inputs)
-        else:
-          loss = 0.
-        self.add_loss(loss)
-        return inputs
-      """
-    else:
-      bad_example = """
-      def call(self, inputs, training=None):
-        if training:
-          self.add_update(self.w.assign_add(1))
-        return inputs
-      """
-      correct_example = """
-      def call(self, inputs, training=None):
-        if training:
-          increment = 1
-        else:
-          increment = 0
-        self.add_update(self.w.assign_add(increment))
-        return inputs
-      """
-    raise RuntimeError(
-        'You are using the method `{method}` in a control flow branch '
-        'in your layer, e.g.:\n{bad_example}\n'
-        'This is not currently supported. '
-        'Please move your call to {method} out of the control flow branch, '
-        'e.g.:\n{correct_example}\n'
-        'You can also resolve this by marking your layer '
-        'as dynamic (eager-only) by passing '
-        '`dynamic=True` to the layer constructor. '
-        'Any kind of control flow is supported with dynamic layers. '
-        'Note that using `dynamic=True` requires you '
-        'to implement static shape inference '
-        'in the `compute_output_shape(input_shape)` method.'.format(
-            method=method,
-            bad_example=bad_example,
-            correct_example=correct_example))
-
-
-def mark_as_return(outputs, acd):
-  """Marks `outputs` as the return values for automatic control deps."""
-
-  def _mark_as_return(tensor):
-    """Marks `tensor` as the return value for automatic control deps."""
-    if not tensor_util.is_tensor(tensor):
-      return tensor
-
-    # pylint: disable=protected-access
-    return_tensor = acd.mark_as_return(tensor)
-    if getattr(tensor, '_keras_mask', None) is not None:
-      return_tensor._keras_mask = acd.mark_as_return(tensor._keras_mask)
-    else:
-      return_tensor._keras_mask = None
-
-    # Handle TensorFlow Probability attached metadata.
-    # TODO(b/132076537): Remove this once TFP uses `CompositeTensor`.
-    if getattr(tensor, '_tfp_distribution', None) is not None:
-      return_tensor._tfp_distribution = tensor._tfp_distribution
-
-    return return_tensor
-    # pylint: enable=protected-access
-
-  return nest.map_structure(_mark_as_return, outputs)
-
-
-V2_DTYPE_BEHAVIOR = None
-
-
-# These two functions are not exported because we plan on removing them in the
-# future.
-def enable_v2_dtype_behavior():
-  """Enable the V2 dtype behavior for Keras layers.
-
-  By default, the V2 dtype behavior is enabled in TensorFlow 2.
-
-  When enabled, the dtype of Keras layers defaults to floatx (which is typically
-  float32) instead of None. In addition, layers will automatically cast
-  floating-point inputs to the layer's dtype.
-
-  For example, once enabled, the following block will run a Conv2D layer
-  in float32:
-
-  ```python
-  x = tf.ones((4, 4, 4, 4), dtype='float64')
-  layer = tf.keras.layers.Conv2D(filters=4, kernel_size=2)
-  print(layer.dtype)  # Float32 when enabled. None when disabled.
-  # When enabled, will cast inputs to the layer's dtype, which is float32. When
-  # disabled, will do no casting, so the layer is done in float64.
-  y = layer(x)
-  ```
-
-  A layer author can opt-out their layer from the automatic input casting by
-  passing `autocast=False` to the base Layer's constructor. This disables the
-  autocasting part of the V2 behavior for that layer, but not the defaulting to
-  floatx part of the V2 behavior.
-
-  When a global `tf.keras.mixed_precision.experimental.Policy` is set, the
-  layer's dtype will default to the global policy instead of floatx. Layers
-  will automatically cast inputs to the policy's compute_dtype.
-  """
-  global V2_DTYPE_BEHAVIOR
-  V2_DTYPE_BEHAVIOR = True
-
-
-def disable_v2_dtype_behavior():
-  """Disables the V2 dtype behavior for Keras layers.
-
-  See `enable_v2_dtype_behavior`.
-
-  This function will be removed in the future.
-  """
-  global V2_DTYPE_BEHAVIOR
-  V2_DTYPE_BEHAVIOR = False
-
-
-def v2_dtype_behavior_enabled():
-  """Returns True if the V2 dtype behavior is enabled."""
-  if V2_DTYPE_BEHAVIOR is None:
-    return tf2.enabled()
-  return V2_DTYPE_BEHAVIOR
-
-
-class TrackableWeightHandler(object):
-  """Keras wrapper for handling tracking.Trackable object saving and restoring.
-
-  This class handles Trackables in both V1 and V2 modes, ensuring that they can
-  be saved and restored with the correct data and without adding additional ops
-  on every save.
-
-  Attributes:
-    trackable: The trackable to wrap.
-    num_tensors: The number of tensors that this trackable requires for saving.
-  """
-
-  def __init__(self, trackable):
-    if not isinstance(trackable, tracking.Trackable):
-      raise ValueError('%s is not a Trackable object.' % (trackable,))
-    self._trackable = trackable
-
-    # TODO(b/141682913): Figure out why this is private and fix it.
-    saveables = trackable._gather_saveables_for_checkpoint().values()  # pylint: disable=protected-access
-    if len(saveables) != 1:
-      raise ValueError('Only Trackables with one Saveable are supported.')
-    saveable = list(saveables)[0]
-
-    if ops.executing_eagerly_outside_functions():
-      # If we're in eager mode, we need to defer calling the Trackable's
-      # saveable() callable until data export time.
-      # However, it is safe to call the saveable as many times as we want, so
-      # we will call it now to figure out how many tensors this Trackable will
-      # produce.
-      self._saveable = saveable
-      self._num_tensors = len(self._saveable().specs)
-      self._setter = lambda weights: self._saveable().restore(weights, None)
-      self._getter = lambda: [spec.tensor for spec in self._saveable().specs]
-    else:
-      # If we're in Graph mode, we need to evaluate the Saveable only once and
-      # cache the resulting restore graph. Failing to do this will result in
-      # new assignment ops being added to the graph each time set_weights() is
-      # called.
-      self._placeholder_tensors = []
-      self._saveable = saveable()
-      self._num_tensors = len(self._saveable.specs)
-      for spec in self._saveable.specs:
-        tensor = spec.tensor
-        self._placeholder_tensors.append(
-            array_ops.placeholder(tensor.dtype, tensor.shape))
-      self._assign_op = self._saveable.restore(self._placeholder_tensors, None)
-      self._setter = self._set_weights_v1
-      self._getter = lambda: [spec.tensor for spec in self._saveable.specs]
-
-  @property
-  def num_tensors(self):
-    return self._num_tensors
-
-  def set_weights(self, weights):
-    if len(weights) != self._num_tensors:
-      raise ValueError(
-          ('Weight handler for trackable %s received the wrong number of ' +
-           'weights: expected %s, got %s.') %
-          (self._trackable, self._num_tensors, len(weights)))
-    self._setter(weights)
-
-  def get_tensors(self):
-    return self._getter()
-
-  def _set_weights_v1(self, weights):
-    feed_dict = {}
-    for idx, tensor in enumerate(weights):
-      feed_dict[self._placeholder_tensors[idx]] = tensor
-    backend.get_session().run(self._assign_op, feed_dict)
-
-
-# TODO(kathywu): This is a temporary hack. When a network of layers is revived
-# from SavedModel, only the top-level layer will have losses. This causes issues
-# in eager mode because the child layers may have graph losses
-# (thus model.losses returns a mix of Eager and graph tensors). To fix this,
-# whenever eager losses are added to one layer, add eager losses to all
-# child layers. This causes `.losses` to only return eager losses.
-REVIVED_LOSS_PLACEHOLDER = (
-    'This layer\'s losses have been added to the parent layer.')
diff --git a/tensorflow/python/frozen_keras/engine/base_layer_utils_test.py b/tensorflow/python/frozen_keras/engine/base_layer_utils_test.py
deleted file mode 100644
index 8ddfc0b823e..00000000000
--- a/tensorflow/python/frozen_keras/engine/base_layer_utils_test.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl.testing import parameterized
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_util
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.frozen_keras.engine import base_layer_utils
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.platform import test
-
-
-@test_util.run_all_in_graph_and_eager_modes
-class TrackableWeightHandlerTest(test.TestCase, parameterized.TestCase):
-
-  def get_table_handler(self):
-    # Note: There is some repetition in these tests' setup. However, Tensorflow
-    # does not play nicely with a separate setUp() call (causing errors related
-    # to graph building), so we have to use a called setup instead of a setUp()
-    # call.
-    table = lookup_ops.MutableHashTable(
-        key_dtype=dtypes.string, value_dtype=dtypes.int32, default_value=0)
-    return base_layer_utils.TrackableWeightHandler(table)
-
-  def test_get_num_tensors(self):
-    table_handler = self.get_table_handler()
-    self.assertEqual(2, table_handler.num_tensors)
-
-  def test_get_and_set_weights(self):
-    table_handler = self.get_table_handler()
-
-    table_data = {b"a": 1, b"b": 2, b"c": 3}
-    table_handler.set_weights(
-        [list(table_data.keys()),
-         list(table_data.values())])
-    weights = backend.batch_get_value(table_handler.get_tensors())
-    weight_data = {key: value for key, value in zip(weights[0], weights[1])}
-    self.assertDictEqual(table_data, weight_data)
-
-  def test_get_and_set_weights_does_not_add_ops(self):
-    table_handler = self.get_table_handler()
-    table_data = {b"a": 1, b"b": 2, b"c": 3}
-    table_handler.set_weights(
-        [list(table_data.keys()),
-         list(table_data.values())])
-    _ = backend.batch_get_value(table_handler.get_tensors())
-    backend.get_session().graph.finalize()
-    table_handler.set_weights(
-        [list(table_data.keys()),
-         list(table_data.values())])
-    _ = backend.batch_get_value(table_handler.get_tensors())
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/frozen_keras/engine/input_spec.py b/tensorflow/python/frozen_keras/engine/input_spec.py
deleted file mode 100644
index c46dde0ba6f..00000000000
--- a/tensorflow/python/frozen_keras/engine/input_spec.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=protected-access
-# pylint: disable=g-classes-have-attributes
-"""Contains the InputSpec class."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from six.moves import zip  # pylint: disable=redefined-builtin
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.util import nest
-
-
-class InputSpec(object):
-  """Specifies the rank, dtype and shape of every input to a layer.
-
-  Layers can expose (if appropriate) an `input_spec` attribute:
-  an instance of `InputSpec`, or a nested structure of `InputSpec` instances
-  (one per input tensor). These objects enable the layer to run input
-  compatibility checks for input structure, input rank, input shape, and
-  input dtype.
-
-  A None entry in a shape is compatible with any dimension,
-  a None shape is compatible with any shape.
-
-  Arguments:
-      dtype: Expected DataType of the input.
-      shape: Shape tuple, expected shape of the input
-          (may include None for unchecked axes).
-      ndim: Integer, expected rank of the input.
-      max_ndim: Integer, maximum rank of the input.
-      min_ndim: Integer, minimum rank of the input.
-      axes: Dictionary mapping integer axes to
-          a specific dimension value.
-  """
-
-  def __init__(self,
-               dtype=None,
-               shape=None,
-               ndim=None,
-               max_ndim=None,
-               min_ndim=None,
-               axes=None):
-    self.dtype = dtypes.as_dtype(dtype).name if dtype is not None else None
-    if shape is not None:
-      self.ndim = len(shape)
-      self.shape = shape
-    else:
-      self.ndim = ndim
-      self.shape = None
-    self.max_ndim = max_ndim
-    self.min_ndim = min_ndim
-    try:
-      axes = axes or {}
-      self.axes = {int(k): axes[k] for k in axes}
-    except (ValueError, TypeError):
-      raise TypeError('The keys in axes must be integers.')
-
-    if self.axes and (self.ndim is not None or self.max_ndim is not None):
-      max_dim = (self.ndim if self.ndim else self.max_ndim) - 1
-      max_axis = max(self.axes)
-      if max_axis > max_dim:
-        raise ValueError('Axis {} is greater than the maximum allowed value: {}'
-                         .format(max_axis, max_dim))
-
-  def __repr__(self):
-    spec = [('dtype=' + str(self.dtype)) if self.dtype else '',
-            ('shape=' + str(self.shape)) if self.shape else '',
-            ('ndim=' + str(self.ndim)) if self.ndim else '',
-            ('max_ndim=' + str(self.max_ndim)) if self.max_ndim else '',
-            ('min_ndim=' + str(self.min_ndim)) if self.min_ndim else '',
-            ('axes=' + str(self.axes)) if self.axes else '']
-    return 'InputSpec(%s)' % ', '.join(x for x in spec if x)
-
-  def get_config(self):
-    return {
-        'dtype': self.dtype,
-        'shape': self.shape,
-        'ndim': self.ndim,
-        'max_ndim': self.max_ndim,
-        'min_ndim': self.min_ndim,
-        'axes': self.axes}
-
-  @classmethod
-  def from_config(cls, config):
-    return cls(**config)
-
-
-def to_tensor_shape(spec):
-  """Returns a tf.TensorShape object that matches the shape specifications.
-
-  If the InputSpec's shape or ndim is defined, this method will return a fully
-  or partially-known shape. Otherwise, the returned TensorShape is None.
-
-  Args:
-    spec: an InputSpec object.
-
-  Returns:
-    a tf.TensorShape object
-  """
-  if spec.ndim is None and spec.shape is None:
-    return tensor_shape.TensorShape(None)
-  elif spec.shape is not None:
-    return tensor_shape.TensorShape(spec.shape)
-  else:
-    shape = [None] * spec.ndim
-    for a in spec.axes:
-      shape[a] = spec.axes[a]  # Assume that axes is defined
-    return tensor_shape.TensorShape(shape)
-
-
-def assert_input_compatibility(input_spec, inputs, layer_name):
-  """Checks compatibility between the layer and provided inputs.
-
-  This checks that the tensor(s) `inputs` verify the input assumptions
-  of a layer (if any). If not, a clear and actional exception gets raised.
-
-  Arguments:
-      input_spec: An InputSpec instance, list of InputSpec instances, a nested
-          structure of InputSpec instances, or None.
-      inputs: Input tensor, list of input tensors, or a nested structure of
-          input tensors.
-      layer_name: String, name of the layer (for error message formatting).
-
-  Raises:
-      ValueError: in case of mismatch between
-          the provided inputs and the expectations of the layer.
-  """
-  if not input_spec:
-    return
-
-  inputs = nest.flatten(inputs)
-  input_spec = nest.flatten(input_spec)
-  if len(inputs) != len(input_spec):
-    raise ValueError('Layer ' + layer_name + ' expects ' +
-                     str(len(input_spec)) + ' inputs, '
-                     'but it received ' + str(len(inputs)) +
-                     ' input tensors. Inputs received: ' + str(inputs))
-  for input_index, (x, spec) in enumerate(zip(inputs, input_spec)):
-    if spec is None:
-      continue
-
-    if (spec.ndim is not None or
-        spec.min_ndim is not None or
-        spec.max_ndim is not None):
-      if x.shape.ndims is None:
-        raise ValueError('Input ' + str(input_index) + ' of layer ' +
-                         layer_name + ' is incompatible with the layer: '
-                         'its rank is undefined, but the layer requires a '
-                         'defined rank.')
-
-    # Check ndim.
-    if spec.ndim is not None:
-      ndim = x.shape.ndims
-      if ndim != spec.ndim:
-        raise ValueError('Input ' + str(input_index) + ' of layer ' +
-                         layer_name + ' is incompatible with the layer: '
-                         'expected ndim=' + str(spec.ndim) + ', found ndim=' +
-                         str(ndim) + '. Full shape received: ' +
-                         str(x.shape.as_list()))
-    if spec.max_ndim is not None:
-      ndim = x.shape.ndims
-      if ndim is not None and ndim > spec.max_ndim:
-        raise ValueError('Input ' + str(input_index) + ' of layer ' +
-                         layer_name + ' is incompatible with the layer: '
-                         'expected max_ndim=' + str(spec.max_ndim) +
-                         ', found ndim=' + str(ndim))
-    if spec.min_ndim is not None:
-      ndim = x.shape.ndims
-      if ndim is not None and ndim < spec.min_ndim:
-        raise ValueError('Input ' + str(input_index) + ' of layer ' +
-                         layer_name + ' is incompatible with the layer: '
-                         ': expected min_ndim=' + str(spec.min_ndim) +
-                         ', found ndim=' + str(ndim) +
-                         '. Full shape received: ' +
-                         str(x.shape.as_list()))
-    # Check dtype.
-    if spec.dtype is not None:
-      if x.dtype != spec.dtype:
-        raise ValueError('Input ' + str(input_index) + ' of layer ' +
-                         layer_name + ' is incompatible with the layer: '
-                         'expected dtype=' + str(spec.dtype) +
-                         ', found dtype=' + str(x.dtype))
-    # Check specific shape axes.
-    if spec.axes:
-      shape = x.shape.as_list()
-      if shape is not None:
-        for axis, value in spec.axes.items():
-          if hasattr(value, 'value'):
-            value = value.value
-          if value is not None and shape[int(axis)] not in {value, None}:
-            raise ValueError(
-                'Input ' + str(input_index) + ' of layer ' + layer_name + ' is'
-                ' incompatible with the layer: expected axis ' + str(axis) +
-                ' of input shape to have value ' + str(value) +
-                ' but received input with shape ' + str(shape))
-    # Check shape.
-    if spec.shape is not None:
-      shape = x.shape.as_list()
-      if shape is not None:
-        for spec_dim, dim in zip(spec.shape, shape):
-          if spec_dim is not None and dim is not None:
-            if spec_dim != dim:
-              raise ValueError('Input ' + str(input_index) +
-                               ' is incompatible with layer ' + layer_name +
-                               ': expected shape=' + str(spec.shape) +
-                               ', found shape=' + str(shape))
-
-
-def to_tensor_spec(input_spec, default_dtype=None):
-  """Converts a Keras InputSpec object to a TensorSpec."""
-  default_dtype = default_dtype or backend.floatx()
-  if isinstance(input_spec, InputSpec):
-    dtype = input_spec.dtype or default_dtype
-    return tensor_spec.TensorSpec(to_tensor_shape(input_spec), dtype)
-  return tensor_spec.TensorSpec(None, default_dtype)
diff --git a/tensorflow/python/frozen_keras/engine/input_spec_test.py b/tensorflow/python/frozen_keras/engine/input_spec_test.py
deleted file mode 100644
index 17423a14886..00000000000
--- a/tensorflow/python/frozen_keras/engine/input_spec_test.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""InputSpec tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.frozen_keras.engine import input_spec
-from tensorflow.python.platform import test
-
-
-class InputSpecTest(test.TestCase):
-
-  def test_axes_initialization(self):
-    input_spec.InputSpec(shape=[1, None, 2, 3], axes={3: 5, '2': 2})
-    with self.assertRaisesRegexp(ValueError, 'Axis 4 is greater than'):
-      input_spec.InputSpec(shape=[1, None, 2, 3], axes={4: 5})
-    with self.assertRaisesRegexp(TypeError, 'keys in axes must be integers'):
-      input_spec.InputSpec(shape=[1, None, 2, 3], axes={'string': 5})
-
-
-class InputSpecToTensorShapeTest(test.TestCase):
-
-  def test_defined_shape(self):
-    spec = input_spec.InputSpec(shape=[1, None, 2, 3])
-    self.assertAllEqual(
-        [1, None, 2, 3], input_spec.to_tensor_shape(spec).as_list())
-
-  def test_defined_ndims(self):
-    spec = input_spec.InputSpec(ndim=5)
-    self.assertAllEqual(
-        [None] * 5, input_spec.to_tensor_shape(spec).as_list())
-
-    spec = input_spec.InputSpec(ndim=0)
-    self.assertAllEqual(
-        [], input_spec.to_tensor_shape(spec).as_list())
-
-    spec = input_spec.InputSpec(ndim=3, axes={1: 3, -1: 2})
-    self.assertAllEqual(
-        [None, 3, 2], input_spec.to_tensor_shape(spec).as_list())
-
-  def test_undefined_shapes(self):
-    spec = input_spec.InputSpec(max_ndim=5)
-    with self.assertRaisesRegexp(ValueError, 'unknown TensorShape'):
-      input_spec.to_tensor_shape(spec).as_list()
-
-    spec = input_spec.InputSpec(min_ndim=5, max_ndim=5)
-    with self.assertRaisesRegexp(ValueError, 'unknown TensorShape'):
-      input_spec.to_tensor_shape(spec).as_list()
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/frozen_keras/engine/legacy_base_layer.py b/tensorflow/python/frozen_keras/engine/legacy_base_layer.py
deleted file mode 100644
index 478976b6b9f..00000000000
--- a/tensorflow/python/frozen_keras/engine/legacy_base_layer.py
+++ /dev/null
@@ -1,2784 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=protected-access
-"""Contains the legacy base Layer class.
-
-This is intended to be used by legacy TF v1 layers that is deprecated, but still
-using Keras Layer as base class. This copy of the Layer will stay unchanged,
-which ensure the stability of v1 functionality not to be affected by the active
-development of Keras Layer.
-"""
-# pylint: disable=g-classes-have-attributes
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import functools
-import itertools
-import threading
-import weakref
-
-import numpy as np
-import six
-from six.moves import zip  # pylint: disable=redefined-builtin
-
-from google.protobuf import json_format
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.autograph.core import ag_ctx
-from tensorflow.python.autograph.impl import api as autograph
-from tensorflow.python.distribute import distribution_strategy_context as ds_context
-from tensorflow.python.eager import context
-from tensorflow.python.eager import execute
-from tensorflow.python.eager import function
-from tensorflow.python.framework import auto_control_deps
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import func_graph
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.frozen_keras import constraints
-from tensorflow.python.frozen_keras import initializers
-from tensorflow.python.frozen_keras import regularizers
-from tensorflow.python.frozen_keras.engine import base_layer_utils
-from tensorflow.python.frozen_keras.engine import input_spec
-from tensorflow.python.frozen_keras.engine import node as node_module
-from tensorflow.python.frozen_keras.utils import generic_utils
-from tensorflow.python.frozen_keras.utils import layer_utils
-from tensorflow.python.frozen_keras.utils import tf_utils
-# A module that only depends on `keras.layers` import these from here.
-from tensorflow.python.frozen_keras.utils.generic_utils import to_snake_case  # pylint: disable=unused-import
-from tensorflow.python.frozen_keras.utils.tf_utils import is_tensor_or_tensor_list  # pylint: disable=unused-import
-from tensorflow.python.module import module
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import variables as tf_variables
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.training.tracking import base as trackable
-from tensorflow.python.training.tracking import data_structures
-from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
-from tensorflow.python.training.tracking import tracking
-from tensorflow.python.util import compat
-from tensorflow.python.util import deprecation
-from tensorflow.python.util import nest
-from tensorflow.python.util import object_identity
-from tensorflow.python.util import tf_inspect
-from tensorflow.tools.docs import doc_controls
-
-# Prefix that is added to the TF op layer names.
-_TF_OP_LAYER_NAME_PREFIX = 'tf_op_layer_'
-
-
-class LegacyBaseLayer(module.Module):
-  """This is the class from which all layers inherit.
-
-  A layer is a callable object that takes as input one or more tensors and
-  that outputs one or more tensors. It involves *computation*, defined
-  in the `call()` method, and a *state* (weight variables), defined
-  either in the constructor `__init__()` or in the `build()` method.
-
-  Users will just instantiate a layer and then treat it as a callable.
-
-  We recommend that descendants of `Layer` implement the following methods:
-
-  * `__init__()`: Defines custom layer attributes, and creates layer state
-    variables that do not depend on input shapes, using `add_weight()`.
-  * `build(self, input_shape)`: This method can be used to create weights that
-    depend on the shape(s) of the input(s), using `add_weight()`. `__call__()`
-    will automatically build the layer (if it has not been built yet) by
-    calling `build()`.
-  * `call(self, *args, **kwargs)`: Called in `__call__` after making sure
-    `build()` has been called. `call()` performs the logic of applying the
-    layer to the input tensors (which should be passed in as argument).
-    Two reserved keyword arguments you can optionally use in `call()` are:
-      - `training` (boolean, whether the call is in
-        inference mode or training mode)
-      - `mask` (boolean tensor encoding masked timesteps in the input, used
-        in RNN layers)
-  * `get_config(self)`: Returns a dictionary containing the configuration used
-    to initialize this layer. If the keys differ from the arguments
-    in `__init__`, then override `from_config(self)` as well.
-    This method is used when saving
-    the layer or a model that contains this layer.
-
-  Examples:
-
-  Here's a basic example: a layer with two variables, `w` and `b`,
-  that returns `y = w . x + b`.
-  It shows how to implement `build()` and `call()`.
-  Variables set as attributes of a layer are tracked as weights
-  of the layers (in `layer.weights`).
-
-  ```python
-  class SimpleDense(Layer):
-
-    def __init__(self, units=32):
-        super(SimpleDense, self).__init__()
-        self.units = units
-
-    def build(self, input_shape):  # Create the state of the layer (weights)
-      w_init = tf.random_normal_initializer()
-      self.w = tf.Variable(
-          initial_value=w_init(shape=(input_shape[-1], self.units),
-                               dtype='float32'),
-          trainable=True)
-      b_init = tf.zeros_initializer()
-      self.b = tf.Variable(
-          initial_value=b_init(shape=(self.units,), dtype='float32'),
-          trainable=True)
-
-    def call(self, inputs):  # Defines the computation from inputs to outputs
-        return tf.matmul(inputs, self.w) + self.b
-
-  # Instantiates the layer.
-  linear_layer = SimpleDense(4)
-
-  # This will also call `build(input_shape)` and create the weights.
-  y = linear_layer(tf.ones((2, 2)))
-  assert len(linear_layer.weights) == 2
-
-  # These weights are trainable, so they're listed in `trainable_weights`:
-  assert len(linear_layer.trainable_weights) == 2
-  ```
-
-  Note that the method `add_weight()` offers a shortcut to create weights:
-
-  ```python
-  class SimpleDense(Layer):
-
-    def __init__(self, units=32):
-        super(SimpleDense, self).__init__()
-        self.units = units
-
-    def build(self, input_shape):
-        self.w = self.add_weight(shape=(input_shape[-1], self.units),
-                                 initializer='random_normal',
-                                 trainable=True)
-        self.b = self.add_weight(shape=(self.units,),
-                                 initializer='random_normal',
-                                 trainable=True)
-
-    def call(self, inputs):
-        return tf.matmul(inputs, self.w) + self.b
-  ```
-
-  Besides trainable weights, updated via backpropagation during training,
-  layers can also have non-trainable weights. These weights are meant to
-  be updated manually during `call()`. Here's a example layer that computes
-  the running sum of its inputs:
-
-  ```python
-  class ComputeSum(Layer):
-
-    def __init__(self, input_dim):
-        super(ComputeSum, self).__init__()
-        # Create a non-trainable weight.
-        self.total = tf.Variable(initial_value=tf.zeros((input_dim,)),
-                                 trainable=False)
-
-    def call(self, inputs):
-        self.total.assign_add(tf.reduce_sum(inputs, axis=0))
-        return self.total
-
-  my_sum = ComputeSum(2)
-  x = tf.ones((2, 2))
-
-  y = my_sum(x)
-  print(y.numpy())  # [2. 2.]
-
-  y = my_sum(x)
-  print(y.numpy())  # [4. 4.]
-
-  assert my_sum.weights == [my_sum.total]
-  assert my_sum.non_trainable_weights == [my_sum.total]
-  assert my_sum.trainable_weights == []
-  ```
-
-  For more information about creating layers, see the guide
-  [Writing custom layers and models with Keras](
-    https://www.tensorflow.org/guide/keras/custom_layers_and_models)
-
-  Arguments:
-    trainable: Boolean, whether the layer's variables should be trainable.
-    name: String name of the layer.
-    dtype: The dtype of the layer's computations and weights (default of
-      `None` means use `tf.keras.backend.floatx` in TensorFlow 2, or the type
-      of the first input in TensorFlow 1).
-    dynamic: Set this to `True` if your layer should only be run eagerly, and
-      should not be used to generate a static computation graph.
-      This would be the case for a Tree-RNN or a recursive network,
-      for example, or generally for any layer that manipulates tensors
-      using Python control flow. If `False`, we assume that the layer can
-      safely be used to generate a static computation graph.
-
-  Attributes:
-    name: The name of the layer (string).
-    dtype: The dtype of the layer's computations and weights.
-    updates: List of update ops of this layer.
-    losses: List of losses added by this layer.
-    trainable_weights: List of variables to be included in backprop.
-    non_trainable_weights: List of variables that should not be
-      included in backprop.
-    weights: The concatenation of the lists trainable_weights and
-      non_trainable_weights (in this order).
-    trainable: Whether the layer should be trained (boolean).
-    input_spec: Optional (list of) `InputSpec` object(s) specifying the
-      constraints on inputs that can be accepted by the layer.
-
-  Each layer has a dtype, which is typically the dtype of the layer's
-  computations and variables. A layer's dtype can be queried via the
-  `Layer.dtype` property. The dtype is specified with the `dtype` constructor
-  argument. In TensorFlow 2, the dtype defaults to `tf.keras.backend.floatx()`
-  if no dtype is passed. `floatx()` itself defaults to "float32".
-  """
-
-  # See tf.Module for the usage of this property.
-  # The key for _obj_reference_counts_dict is a Trackable, which could be a
-  # variable or layer etc. tf.Module._flatten will fail to flatten the key
-  # since it is trying to convert Trackable to a string. This attribute can be
-  # ignored even after the fix of nest lib, since the trackable object should
-  # already been available as individual attributes. _obj_reference_counts_dict
-  # just contains a copy of them.
-  _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain(
-      ('_obj_reference_counts_dict',),
-      module.Module._TF_MODULE_IGNORED_PROPERTIES
-  ))
-
-  @trackable.no_automatic_dependency_tracking
-  def __init__(self, trainable=True, name=None, dtype=None, dynamic=False,
-               **kwargs):
-    # These properties should be set by the user via keyword arguments.
-    # note that 'dtype', 'input_shape' and 'batch_input_shape'
-    # are only applicable to input layers: do not pass these keywords
-    # to non-input layers.
-    allowed_kwargs = {
-        'input_shape',
-        'batch_input_shape',
-        'batch_size',
-        'weights',
-        'activity_regularizer',
-    }
-    # Validate optional keyword arguments.
-    generic_utils.validate_kwargs(kwargs, allowed_kwargs)
-
-    # Mutable properties
-    # Indicates whether the layer's weights are updated during training
-    # and whether the layer's updates are run during training.
-    self._trainable = trainable
-    # A stateful layer is a layer whose updates are run during inference too,
-    # for instance stateful RNNs.
-    self._stateful = False
-    # Indicates whether `build` needs to be called upon layer call, to create
-    # the layer's weights.
-    self.built = False
-    # Record the build input shape for loading purposes.
-    # TODO(kathywu): Move this to Layer._set_save_spec once cl/290121460 is
-    # submitted.
-    self._build_input_shape = None
-    # Provides information about which inputs are compatible with the layer.
-    self._input_spec = None
-    self.supports_masking = False
-    self._supports_ragged_inputs = False
-
-    self._init_set_name(name)
-    self._activity_regularizer = kwargs.pop('activity_regularizer', None)
-    self._maybe_create_attribute('_trainable_weights', [])
-    self._maybe_create_attribute('_non_trainable_weights', [])
-    self._updates = []
-    # Object to store all thread local layer properties.
-    self._thread_local = threading.local()
-    # A list of zero-argument lambdas which return Tensors, used for variable
-    # regularizers.
-    self._callable_losses = []
-    # A list of symbolic Tensors containing activity regularizers and losses
-    # manually added through `add_loss` in graph-building mode.
-    self._losses = []
-    # A list of metric instances corresponding to the symbolic metric tensors
-    # added using the `add_metric` API.
-    self._metrics = []
-    # Ensures the same metric is not added multiple times in `MirroredStrategy`.
-    self._metrics_lock = threading.Lock()
-    self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name
-
-    # Dependencies tracked via attribute assignment.
-    # All layers in order of horizontal graph traversal.
-    # Entries are unique. For models includes input and output layers.
-    self._maybe_create_attribute('_layers', [])
-
-    # These lists will be filled via successive calls
-    # to self._add_inbound_node().
-    # Used in symbolic mode only, only in conjunction with graph-networks
-    self._inbound_nodes = []
-    self._outbound_nodes = []
-
-    self._init_call_fn_args()
-
-    # Whether the `call` method can be used to build a TF graph without issues.
-    # This attribute has no effect if the model is created using the Functional
-    # API. Instead, `model.dynamic` is determined based on the internal layers.
-    self._dynamic = dynamic
-
-    # Manage input shape information if passed.
-    if 'input_shape' in kwargs or 'batch_input_shape' in kwargs:
-      # In this case we will later create an input layer
-      # to insert before the current layer
-      if 'batch_input_shape' in kwargs:
-        batch_input_shape = tuple(kwargs['batch_input_shape'])
-      elif 'input_shape' in kwargs:
-        if 'batch_size' in kwargs:
-          batch_size = kwargs['batch_size']
-        else:
-          batch_size = None
-        batch_input_shape = (batch_size,) + tuple(kwargs['input_shape'])
-      self._batch_input_shape = batch_input_shape
-
-    # Manage initial weight values if passed.
-    self._initial_weights = kwargs.get('weights', None)
-
-    # Whether the layer will track any layers that is set as attribute on itself
-    # as sub-layers, the weights from the sub-layers will be included in the
-    # parent layer's variables() as well.
-    # Default to True, which means auto tracking is turned on. Certain subclass
-    # might want to turn it off, like Sequential model.
-    self._auto_track_sub_layers = True
-
-  @trackable.no_automatic_dependency_tracking
-  @generic_utils.default
-  def build(self, input_shape):
-    """Creates the variables of the layer (optional, for subclass implementers).
-
-    This is a method that implementers of subclasses of `Layer` or `Model`
-    can override if they need a state-creation step in-between
-    layer instantiation and layer call.
-
-    This is typically used to create the weights of `Layer` subclasses.
-
-    Arguments:
-      input_shape: Instance of `TensorShape`, or list of instances of
-        `TensorShape` if the layer expects a list of inputs
-        (one instance per input).
-    """
-    # Only record the build input shapes of overridden the build methods.
-    if not hasattr(self.build, '_is_default'):
-      self._build_input_shape = input_shape
-    self.built = True
-
-  @doc_controls.for_subclass_implementers
-  def call(self, inputs, **kwargs):  # pylint: disable=unused-argument
-    """This is where the layer's logic lives.
-
-    Arguments:
-        inputs: Input tensor, or list/tuple of input tensors.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        A tensor or list/tuple of tensors.
-    """
-    return inputs
-
-  @doc_controls.for_subclass_implementers
-  def _add_trackable(self, trackable_object, trainable):
-    """Adds a Trackable object to this layer's state.
-
-    Arguments:
-      trackable_object: The tf.tracking.Trackable object to add.
-      trainable: Boolean, whether the variable should be part of the layer's
-        "trainable_variables" (e.g. variables, biases) or
-        "non_trainable_variables" (e.g. BatchNorm mean and variance).
-
-    Returns:
-      The TrackableWeightHandler used to track this object.
-    """
-    handler = base_layer_utils.TrackableWeightHandler(trackable_object)
-    if trainable:
-      self._trainable_weights.append(handler)
-    else:
-      self._non_trainable_weights.append(handler)
-    return handler
-
-  @doc_controls.for_subclass_implementers
-  def add_weight(self,
-                 name=None,
-                 shape=None,
-                 dtype=None,
-                 initializer=None,
-                 regularizer=None,
-                 trainable=None,
-                 constraint=None,
-                 partitioner=None,
-                 use_resource=None,
-                 synchronization=tf_variables.VariableSynchronization.AUTO,
-                 aggregation=tf_variables.VariableAggregation.NONE,
-                 **kwargs):
-    """Adds a new variable to the layer.
-
-    Arguments:
-      name: Variable name.
-      shape: Variable shape. Defaults to scalar if unspecified.
-      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
-      initializer: Initializer instance (callable).
-      regularizer: Regularizer instance (callable).
-      trainable: Boolean, whether the variable should be part of the layer's
-        "trainable_variables" (e.g. variables, biases)
-        or "non_trainable_variables" (e.g. BatchNorm mean and variance).
-        Note that `trainable` cannot be `True` if `synchronization`
-        is set to `ON_READ`.
-      constraint: Constraint instance (callable).
-      partitioner: Partitioner to be passed to the `Trackable` API.
-      use_resource: Whether to use `ResourceVariable`.
-      synchronization: Indicates when a distributed a variable will be
-        aggregated. Accepted values are constants defined in the class
-        `tf.VariableSynchronization`. By default the synchronization is set to
-        `AUTO` and the current `DistributionStrategy` chooses
-        when to synchronize. If `synchronization` is set to `ON_READ`,
-        `trainable` must not be set to `True`.
-      aggregation: Indicates how a distributed variable will be aggregated.
-        Accepted values are constants defined in the class
-        `tf.VariableAggregation`.
-      **kwargs: Additional keyword arguments. Accepted values are `getter`,
-        `collections` and `caching_device`.
-
-    Returns:
-      The created variable. Usually either a `Variable` or `ResourceVariable`
-      instance. If `partitioner` is not `None`, a `PartitionedVariable`
-      instance is returned.
-
-    Raises:
-      RuntimeError: If called with partitioned variable regularization and
-        eager execution is enabled.
-      ValueError: When giving unsupported dtype and no initializer or when
-        trainable has been set to True with synchronization set as `ON_READ`.
-    """
-    if shape is None:
-      shape = ()
-    # Validate optional keyword arguments.
-    for kwarg in kwargs:
-      if kwarg not in ['getter', 'collections', 'caching_device']:
-        raise TypeError('Unknown keyword argument:', kwarg)
-    getter = kwargs.pop('getter', base_layer_utils.make_variable)
-    collections_arg = kwargs.pop('collections', None)
-    # See the docstring for tf.Variable about the details for caching_device.
-    caching_device = kwargs.pop('caching_device', None)
-
-    if dtype is None:
-      dtype = self.dtype or backend.floatx()
-    dtype = dtypes.as_dtype(dtype)
-    initializer = initializers.get(initializer)
-    regularizer = regularizers.get(regularizer)
-    constraint = constraints.get(constraint)
-
-    if synchronization == tf_variables.VariableSynchronization.ON_READ:
-      if trainable:
-        raise ValueError(
-            'Synchronization value can be set to '
-            'VariableSynchronization.ON_READ only for non-trainable variables. '
-            'You have specified trainable=True and '
-            'synchronization=VariableSynchronization.ON_READ.')
-      else:
-        # Set trainable to be false when variable is to be synced on read.
-        trainable = False
-    elif trainable is None:
-      trainable = True
-
-    # Initialize variable when no initializer provided
-    if initializer is None:
-      # If dtype is DT_FLOAT, provide a uniform unit scaling initializer
-      if dtype.is_floating:
-        initializer = initializers.glorot_uniform()
-      # If dtype is DT_INT/DT_UINT, provide a default value `zero`
-      # If dtype is DT_BOOL, provide a default value `FALSE`
-      elif dtype.is_integer or dtype.is_unsigned or dtype.is_bool:
-        initializer = initializers.zeros()
-      # NOTES:Do we need to support for handling DT_STRING and DT_COMPLEX here?
-      else:
-        raise ValueError('An initializer for variable %s of type %s is required'
-                         ' for layer %s' % (name, dtype.base_dtype, self.name))
-
-    variable = self._add_variable_with_custom_getter(
-        name=name,
-        shape=shape,
-        # TODO(allenl): a `make_variable` equivalent should be added as a
-        # `Trackable` method.
-        getter=getter,
-        # Manage errors in Layer rather than Trackable.
-        overwrite=True,
-        initializer=initializer,
-        dtype=dtype,
-        constraint=constraint,
-        trainable=trainable,
-        partitioner=partitioner,
-        use_resource=use_resource,
-        collections=collections_arg,
-        synchronization=synchronization,
-        aggregation=aggregation,
-        caching_device=caching_device)
-    if regularizer is not None:
-      # TODO(fchollet): in the future, this should be handled at the
-      # level of variable creation, and weight regularization losses
-      # should be variable attributes.
-      name_in_scope = variable.name[:variable.name.find(':')]
-      self._handle_weight_regularization(name_in_scope,
-                                         variable,
-                                         regularizer)
-    if isinstance(variable, tf_variables.PartitionedVariable):
-      for v in variable:
-        backend.track_variable(v)
-        if trainable:
-          self._trainable_weights.append(v)
-        else:
-          self._non_trainable_weights.append(v)
-    else:
-      backend.track_variable(variable)
-      if trainable:
-        self._trainable_weights.append(variable)
-      else:
-        self._non_trainable_weights.append(variable)
-    return variable
-
-  @generic_utils.default
-  def get_config(self):
-    """Returns the config of the layer.
-
-    A layer config is a Python dictionary (serializable)
-    containing the configuration of a layer.
-    The same layer can be reinstantiated later
-    (without its trained weights) from this configuration.
-
-    The config of a layer does not include connectivity
-    information, nor the layer class name. These are handled
-    by `Network` (one layer of abstraction above).
-
-    Returns:
-        Python dictionary.
-    """
-    all_args = tf_inspect.getfullargspec(self.__init__).args
-    config = {'name': self.name, 'trainable': self.trainable}
-    if hasattr(self, '_batch_input_shape'):
-      config['batch_input_shape'] = self._batch_input_shape
-    config['dtype'] = self.dtype
-    if hasattr(self, 'dynamic'):
-      # Only include `dynamic` in the `config` if it is `True`
-      if self.dynamic:
-        config['dynamic'] = self.dynamic
-      elif 'dynamic' in all_args:
-        all_args.remove('dynamic')
-    expected_args = config.keys()
-    # Finds all arguments in the `__init__` that are not in the config:
-    extra_args = [arg for arg in all_args if arg not in expected_args]
-    # Check that either the only argument in the `__init__` is  `self`,
-    # or that `get_config` has been overridden:
-    if len(extra_args) > 1 and hasattr(self.get_config, '_is_default'):
-      raise NotImplementedError('Layer %s has arguments in `__init__` and '
-                                'therefore must override `get_config`.' %
-                                self.__class__.__name__)
-    return config
-
-  @classmethod
-  def from_config(cls, config):
-    """Creates a layer from its config.
-
-    This method is the reverse of `get_config`,
-    capable of instantiating the same layer from the config
-    dictionary. It does not handle layer connectivity
-    (handled by Network), nor weights (handled by `set_weights`).
-
-    Arguments:
-        config: A Python dictionary, typically the
-            output of get_config.
-
-    Returns:
-        A layer instance.
-    """
-    return cls(**config)
-
-  def compute_output_shape(self, input_shape):
-    """Computes the output shape of the layer.
-
-    If the layer has not been built, this method will call `build` on the
-    layer. This assumes that the layer will later be used with inputs that
-    match the input shape provided here.
-
-    Arguments:
-        input_shape: Shape tuple (tuple of integers)
-            or list of shape tuples (one per output tensor of the layer).
-            Shape tuples can include None for free dimensions,
-            instead of an integer.
-
-    Returns:
-        An input shape tuple.
-    """
-    if context.executing_eagerly():
-      # In this case we build the model first in order to do shape inference.
-      # This is acceptable because the framework only calls
-      # `compute_output_shape` on shape values that the layer would later be
-      # built for. It would however cause issues in case a user attempts to
-      # use `compute_output_shape` manually with shapes that are incompatible
-      # with the shape the Layer will be called on (these users will have to
-      # implement `compute_output_shape` themselves).
-      self._maybe_build(input_shape)
-      with func_graph.FuncGraph('graph').as_default():
-        input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)
-        def _make_placeholder_like(shape):
-          ph = backend.placeholder(shape=shape, dtype=self.dtype)
-          ph._keras_mask = None
-          return ph
-        inputs = nest.map_structure(_make_placeholder_like, input_shape)
-        try:
-          outputs = self(inputs, training=False)
-        except TypeError as e:
-          six.raise_from(
-              NotImplementedError(
-                  'We could not automatically infer the static shape of the '
-                  'layer\'s output. Please implement the '
-                  '`compute_output_shape` method on your layer (%s).' %
-                  self.__class__.__name__), e)
-      return nest.map_structure(lambda t: t.shape, outputs)
-    raise NotImplementedError
-
-  @doc_controls.for_subclass_implementers
-  def compute_output_signature(self, input_signature):
-    """Compute the output tensor signature of the layer based on the inputs.
-
-    Unlike a TensorShape object, a TensorSpec object contains both shape
-    and dtype information for a tensor. This method allows layers to provide
-    output dtype information if it is different from the input dtype.
-    For any layer that doesn't implement this function,
-    the framework will fall back to use `compute_output_shape`, and will
-    assume that the output dtype matches the input dtype.
-
-    Args:
-      input_signature: Single TensorSpec or nested structure of TensorSpec
-        objects, describing a candidate input for the layer.
-
-    Returns:
-      Single TensorSpec or nested structure of TensorSpec objects, describing
-        how the layer would transform the provided input.
-
-    Raises:
-      TypeError: If input_signature contains a non-TensorSpec object.
-    """
-    def check_type_return_shape(s):
-      if not isinstance(s, tensor_spec.TensorSpec):
-        raise TypeError(
-            'Only TensorSpec signature types are supported, '
-            'but saw signature signature entry: {}.'.format(s))
-      return s.shape
-    input_shape = nest.map_structure(check_type_return_shape, input_signature)
-    output_shape = self.compute_output_shape(input_shape)
-    dtype = self._compute_dtype
-    if dtype is None:
-      input_dtypes = [s.dtype for s in nest.flatten(input_signature)]
-      # Default behavior when self.dtype is None, is to use the first input's
-      # dtype.
-      dtype = input_dtypes[0]
-    return nest.map_structure(
-        lambda s: tensor_spec.TensorSpec(dtype=dtype, shape=s),
-        output_shape)
-
-  @generic_utils.default
-  def compute_mask(self, inputs, mask=None):  # pylint: disable=unused-argument
-    """Computes an output mask tensor.
-
-    Arguments:
-        inputs: Tensor or list of tensors.
-        mask: Tensor or list of tensors.
-
-    Returns:
-        None or a tensor (or list of tensors,
-            one per output tensor of the layer).
-    """
-    if not self.supports_masking:
-      if any(m is not None for m in nest.flatten(mask)):
-        raise TypeError('Layer ' + self.name + ' does not support masking, '
-                        'but was passed an input_mask: ' + str(mask))
-      # masking not explicitly supported: return None as mask.
-      return None
-    # if masking is explicitly supported, by default
-    # carry over the input mask
-    return mask
-
-  def __call__(self, *args, **kwargs):
-    """Wraps `call`, applying pre- and post-processing steps.
-
-    Arguments:
-      *args: Positional arguments to be passed to `self.call`.
-      **kwargs: Keyword arguments to be passed to `self.call`.
-
-    Returns:
-      Output tensor(s).
-
-    Note:
-      - The following optional keyword arguments are reserved for specific uses:
-        * `training`: Boolean scalar tensor of Python boolean indicating
-          whether the `call` is meant for training or inference.
-        * `mask`: Boolean input mask.
-      - If the layer's `call` method takes a `mask` argument (as some Keras
-        layers do), its default value will be set to the mask generated
-        for `inputs` by the previous layer (if `input` did come from
-        a layer that generated a corresponding mask, i.e. if it came from
-        a Keras layer with masking support.
-
-    Raises:
-      ValueError: if the layer's `call` method returns None (an invalid value).
-      RuntimeError: if `super().__init__()` was not called in the constructor.
-    """
-    if not hasattr(self, '_thread_local'):
-      raise RuntimeError(
-          'You must call `super().__init__()` in the layer constructor.')
-
-    # Grab the first positional or keyword argument.
-    if args:
-      inputs = args[0]
-      args = args[1:]
-    elif self._call_fn_args[0] in kwargs:
-      inputs = kwargs.pop(self._call_fn_args[0])
-    else:
-      raise ValueError(
-          'The first argument to `Layer.call` must always be passed.')
-
-    call_context = base_layer_utils.call_context()
-    input_list = nest.flatten(inputs)
-
-    # We will attempt to build a TF graph if & only if all inputs are symbolic.
-    # This is always the case in graph mode. It can also be the case in eager
-    # mode when all inputs can be traced back to `keras.Input()` (when building
-    # models using the functional API).
-    build_graph = tf_utils.are_all_symbolic_tensors(input_list)
-
-    # Accept NumPy and scalar inputs by converting to Tensors.
-    if any(isinstance(x, (np.ndarray, float, int)) for x in input_list):
-      def _convert_non_tensor(x):
-        # Don't call `ops.convert_to_tensor_v2` on all `inputs` because
-        # `SparseTensors` can't be converted to `Tensor`.
-        if isinstance(x, (np.ndarray, float, int)):
-          return ops.convert_to_tensor_v2(x)
-        return x
-      inputs = nest.map_structure(_convert_non_tensor, inputs)
-      input_list = nest.flatten(inputs)
-
-    # Handle `mask` propagation from previous layer to current layer. Masks can
-    # be propagated explicitly via the `mask` argument, or implicitly via
-    # setting the `_keras_mask` attribute on the inputs to a Layer. Masks passed
-    # explicitly take priority.
-    mask_arg_passed_by_framework = False
-    input_masks = self._collect_input_masks(inputs, args, kwargs)
-    if (self._expects_mask_arg and input_masks is not None and
-        not self._call_arg_was_passed('mask', args, kwargs)):
-      mask_arg_passed_by_framework = True
-      kwargs['mask'] = input_masks
-
-    # If `training` argument was not explicitly passed, propagate `training`
-    # value from this layer's calling layer.
-    training_arg_passed_by_framework = False
-    # Priority 1: `training` was explicitly passed.
-    if self._call_arg_was_passed('training', args, kwargs):
-      training_value = self._get_call_arg_value('training', args, kwargs)
-      if not self._expects_training_arg:
-        kwargs.pop('training')
-    else:
-      training_value = None
-      # Priority 2: `training` was passed to a parent layer.
-      if call_context.training is not None:
-        training_value = call_context.training
-      # Priority 3a: `learning_phase()` has been set.
-      elif backend.global_learning_phase_is_set():
-        training_value = backend.learning_phase()
-      # Priority 3b: Pass the `learning_phase()` if in the Keras FuncGraph.
-      elif build_graph:
-        with backend.get_graph().as_default():
-          if base_layer_utils.is_in_keras_graph():
-            training_value = backend.learning_phase()
-
-      if self._expects_training_arg and training_value is not None:
-        # Force the training_value to be bool type which matches to the contract
-        # for layer/model call args.
-        if tensor_util.is_tensor(training_value):
-          training_value = math_ops.cast(training_value, dtypes.bool)
-        else:
-          training_value = bool(training_value)
-        kwargs['training'] = training_value
-        training_arg_passed_by_framework = True
-
-    # Only create Keras history if at least one tensor originates from a
-    # `keras.Input`. Otherwise this Layer may be being used outside the Keras
-    # framework.
-    if build_graph and base_layer_utils.needs_keras_history(inputs):
-      base_layer_utils.create_keras_history(inputs)
-
-    # Clear eager losses on top level model call.
-    # We are clearing the losses only on the top level model call and not on
-    # every layer/model call because layer/model may be reused.
-    if (base_layer_utils.is_in_eager_or_tf_function() and
-        not call_context.in_call):
-      self._clear_losses()
-
-    with call_context.enter(self, inputs, build_graph, training_value):
-      # Check input assumptions set after layer building, e.g. input shape.
-      if build_graph:
-        # Symbolic execution on symbolic tensors. We will attempt to build
-        # the corresponding TF subgraph inside `backend.get_graph()`
-        # TODO(reedwm): We should assert input compatibility after the inputs
-        # are casted, not before.
-        input_spec.assert_input_compatibility(self.input_spec, inputs,
-                                              self.name)
-        if (any(isinstance(x, ragged_tensor.RaggedTensor) for x in input_list)
-            and self._supports_ragged_inputs is False):  # pylint: disable=g-bool-id-comparison
-          raise ValueError('Layer %s does not support RaggedTensors as input. '
-                           'Inputs received: %s. You can try converting your '
-                           'input to an uniform tensor.' % (self.name, inputs))
-
-        graph = backend.get_graph()
-        with graph.as_default(), backend.name_scope(self._name_scope()):
-          # Build layer if applicable (if the `build` method has been
-          # overridden).
-          self._maybe_build(inputs)
-
-          if not self.dynamic:
-            # Wrapping `call` function in autograph to allow for dynamic control
-            # flow and control dependencies in call. We are limiting this to
-            # subclassed layers as autograph is strictly needed only for
-            # subclassed layers and models.
-            # tf_convert will respect the value of autograph setting in the
-            # enclosing tf.function, if any.
-            if (base_layer_utils.is_subclassed(self) and
-                not base_layer_utils.from_saved_model(self)):
-              call_fn = autograph.tf_convert(
-                  self.call, ag_ctx.control_status_ctx())
-            else:
-              call_fn = self.call
-
-            try:
-              # Add auto_control_deps in V2 when they are not already added by
-              # a `tf.function`.
-              if (ops.executing_eagerly_outside_functions() and
-                  not base_layer_utils.is_in_eager_or_tf_function()):
-                with auto_control_deps.AutomaticControlDependencies() as acd:
-                  outputs = call_fn(inputs, *args, **kwargs)
-                  # Wrap Tensors in `outputs` in `tf.identity` to avoid
-                  # circular dependencies.
-                  outputs = base_layer_utils.mark_as_return(outputs, acd)
-              else:
-                outputs = call_fn(inputs, *args, **kwargs)
-
-            except errors.OperatorNotAllowedInGraphError as e:
-              raise TypeError('You are attempting to use Python control '
-                              'flow in a layer that was not declared to be '
-                              'dynamic. Pass `dynamic=True` to the class '
-                              'constructor.\nEncountered error:\n"""\n' +
-                              str(e) + '\n"""')
-          else:
-            # We will use static shape inference to return symbolic tensors
-            # matching the specifications of the layer outputs.
-            # Since `self.dynamic` is True, we will never attempt to
-            # run the underlying TF graph (which is disconnected).
-            # TODO(fchollet): consider py_func as an alternative, which
-            # would enable us to run the underlying graph if needed.
-            outputs = self._symbolic_call(inputs)
-
-          if outputs is None:
-            raise ValueError('A layer\'s `call` method should return a '
-                             'Tensor or a list of Tensors, not None '
-                             '(layer: ' + self.name + ').')
-          if base_layer_utils.have_all_keras_metadata(inputs):
-            if training_arg_passed_by_framework:
-              kwargs.pop('training')
-            if mask_arg_passed_by_framework:
-              kwargs.pop('mask')
-            inputs, outputs = self._set_connectivity_metadata_(
-                inputs, outputs, args, kwargs)
-          self._handle_activity_regularization(inputs, outputs)
-          self._set_mask_metadata(inputs, outputs, input_masks)
-          if hasattr(self, '_set_inputs') and not self.inputs:
-            # Subclassed network: explicitly set metadata normally set by
-            # a call to self._set_inputs().
-            self._set_inputs(inputs, outputs)
-      else:
-        # Eager execution on data tensors.
-        with backend.name_scope(self._name_scope()):
-          self._maybe_build(inputs)
-          outputs = self.call(inputs, *args, **kwargs)
-          self._handle_activity_regularization(inputs, outputs)
-          self._set_mask_metadata(inputs, outputs, input_masks)
-          if hasattr(self, '_set_save_spec'):
-            self._set_save_spec(inputs)
-
-    return outputs
-
-  @property
-  def dtype(self):
-    """Dtype used by the weights of the layer, set in the constructor."""
-    return self._dtype
-
-  @property
-  def name(self):
-    """Name of the layer (string), set in the constructor."""
-    return self._name
-
-  @property
-  @trackable_layer_utils.cache_recursive_attribute('dynamic')
-  def dynamic(self):
-    """Whether the layer is dynamic (eager-only); set in the constructor."""
-    # NOTE(taylorrobie): Currently self._dynamic is read-only. If that changes
-    #                    then this cache logic must be updated.
-    return self._dynamic
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  @trackable_layer_utils.cache_recursive_attribute('stateful')
-  def stateful(self):
-    return self._stateful
-
-  @stateful.setter
-  @trackable_layer_utils.invalidate_recursive_cache('stateful')
-  def stateful(self, value):
-    self._stateful = value
-
-  @property
-  def trainable(self):
-    return self._trainable
-
-  @trainable.setter
-  def trainable(self, value):
-    self._trainable = value
-    for layer in getattr(self, '_layers', []):
-      layer.trainable = value
-
-  @property
-  def activity_regularizer(self):
-    """Optional regularizer function for the output of this layer."""
-    return self._activity_regularizer
-
-  @activity_regularizer.setter
-  def activity_regularizer(self, regularizer):
-    """Optional regularizer function for the output of this layer."""
-    self._activity_regularizer = regularizer
-
-  @property
-  def input_spec(self):
-    """`InputSpec` instance(s) describing the input format for this layer.
-
-    When you create a layer subclass, you can set `self.input_spec` to enable
-    the layer to run input compatibility checks when it is called.
-    Consider a `Conv2D` layer: it can only be called on a single input tensor
-    of rank 4. As such, you can set, in `__init__()`:
-
-    ```python
-    self.input_spec = tf.keras.layers.InputSpec(ndim=4)
-    ```
-
-    Now, if you try to call the layer on an input that isn't rank 4
-    (for instance, an input of shape `(2,)`, it will raise a nicely-formatted
-    error:
-
-    ```
-    ValueError: Input 0 of layer conv2d is incompatible with the layer:
-    expected ndim=4, found ndim=1. Full shape received: [2]
-    ```
-
-    Input checks that can be specified via `input_spec` include:
-    - Structure (e.g. a single input, a list of 2 inputs, etc)
-    - Shape
-    - Rank (ndim)
-    - Dtype
-
-    For more information, see `tf.keras.layers.InputSpec`.
-
-    Returns:
-      A `tf.keras.layers.InputSpec` instance, or nested structure thereof.
-    """
-    return self._input_spec
-
-  @input_spec.setter
-  # Must be decorated to prevent tracking, since the input_spec can be nested
-  # InputSpec objects.
-  @trackable.no_automatic_dependency_tracking
-  def input_spec(self, value):
-    for v in nest.flatten(value):
-      if v is not None and not isinstance(v, input_spec.InputSpec):
-        raise TypeError('Layer input_spec must be an instance of InputSpec. '
-                        'Got: {}'.format(v))
-    self._input_spec = value
-
-  @property
-  def trainable_weights(self):
-    """List of all trainable weights tracked by this layer.
-
-    Trainable weights are updated via gradient descent during training.
-
-    Returns:
-      A list of trainable variables.
-    """
-    if self.trainable:
-      children_weights = self._gather_children_attribute('trainable_weights')
-      return self._dedup_weights(self._trainable_weights + children_weights)
-    else:
-      return []
-
-  @property
-  def non_trainable_weights(self):
-    """List of all non-trainable weights tracked by this layer.
-
-    Non-trainable weights are *not* updated during training. They are expected
-    to be updated manually in `call()`.
-
-    Returns:
-      A list of non-trainable variables.
-    """
-    if self.trainable:
-      children_weights = self._gather_children_attribute(
-          'non_trainable_weights')
-      non_trainable_weights = self._non_trainable_weights + children_weights
-    else:
-      children_weights = self._gather_children_attribute('weights')
-      non_trainable_weights = (
-          self._trainable_weights + self._non_trainable_weights +
-          children_weights)
-    return self._dedup_weights(non_trainable_weights)
-
-  @property
-  def weights(self):
-    """Returns the list of all layer variables/weights.
-
-    Returns:
-      A list of variables.
-    """
-    return self.trainable_weights + self.non_trainable_weights
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def updates(self):
-    collected_updates = []
-    all_layers = self._gather_unique_layers()
-    with backend.get_graph().as_default():
-      for layer in all_layers:
-        if not layer.trainable and not layer.stateful:
-          continue
-        for u in layer._updates:
-          if callable(u):
-            try:
-              u = u()
-            except errors.InaccessibleTensorError:
-              base_layer_utils.check_graph_consistency(
-                  method='add_update', force_raise=True)
-              raise  # check_graph_consistency may not always raise.
-          base_layer_utils.check_graph_consistency(u, method='add_update')
-          collected_updates.append(u)
-    return collected_updates
-
-  @property
-  def losses(self):
-    """Losses which are associated with this `Layer`.
-
-    Variable regularization tensors are created when this property is accessed,
-    so it is eager safe: accessing `losses` under a `tf.GradientTape` will
-    propagate gradients back to the corresponding variables.
-
-    Returns:
-      A list of tensors.
-    """
-    collected_losses = []
-    all_layers = self._gather_unique_layers()
-    for layer in all_layers:
-      # If any eager losses are present, we assume the model to be part of an
-      # eager training loop (either a custom one or the one used when
-      # `run_eagerly=True`) and so we always return just the eager losses.
-      if layer._eager_losses:
-        # Filter placeholder losses that may have been added by revived layers.
-        # (see base_layer_utils for details).
-        if (layer._eager_losses[0] is
-            not base_layer_utils.REVIVED_LOSS_PLACEHOLDER):
-          collected_losses.extend(layer._eager_losses)
-      else:
-        collected_losses.extend(layer._losses)
-      for regularizer in layer._callable_losses:
-        loss_tensor = regularizer()
-        if loss_tensor is not None:
-          collected_losses.append(loss_tensor)
-    return collected_losses
-
-  def add_loss(self, losses, inputs=None):
-    """Add loss tensor(s), potentially dependent on layer inputs.
-
-    Some losses (for instance, activity regularization losses) may be dependent
-    on the inputs passed when calling a layer. Hence, when reusing the same
-    layer on different inputs `a` and `b`, some entries in `layer.losses` may
-    be dependent on `a` and some on `b`. This method automatically keeps track
-    of dependencies.
-
-    This method can be used inside a subclassed layer or model's `call`
-    function, in which case `losses` should be a Tensor or list of Tensors.
-
-    Example:
-
-    ```python
-    class MyLayer(tf.keras.layers.Layer):
-      def call(inputs, self):
-        self.add_loss(tf.abs(tf.reduce_mean(inputs)), inputs=True)
-        return inputs
-    ```
-
-    This method can also be called directly on a Functional Model during
-    construction. In this case, any loss Tensors passed to this Model must
-    be symbolic and be able to be traced back to the model's `Input`s. These
-    losses become part of the model's topology and are tracked in `get_config`.
-
-    Example:
-
-    ```python
-    inputs = tf.keras.Input(shape=(10,))
-    x = tf.keras.layers.Dense(10)(inputs)
-    outputs = tf.keras.layers.Dense(1)(x)
-    model = tf.keras.Model(inputs, outputs)
-    # Activity regularization.
-    model.add_loss(tf.abs(tf.reduce_mean(x)))
-    ```
-
-    If this is not the case for your loss (if, for example, your loss references
-    a `Variable` of one of the model's layers), you can wrap your loss in a
-    zero-argument lambda. These losses are not tracked as part of the model's
-    topology since they can't be serialized.
-
-    Example:
-
-    ```python
-    inputs = tf.keras.Input(shape=(10,))
-    x = tf.keras.layers.Dense(10)(inputs)
-    outputs = tf.keras.layers.Dense(1)(x)
-    model = tf.keras.Model(inputs, outputs)
-    # Weight regularization.
-    model.add_loss(lambda: tf.reduce_mean(x.kernel))
-    ```
-
-    The `get_losses_for` method allows to retrieve the losses relevant to a
-    specific set of inputs.
-
-    Arguments:
-      losses: Loss tensor, or list/tuple of tensors. Rather than tensors, losses
-        may also be zero-argument callables which create a loss tensor.
-      inputs: Ignored when executing eagerly. If anything other than None is
-        passed, it signals the losses are conditional on some of the layer's
-        inputs, and thus they should only be run where these inputs are
-        available. This is the case for activity regularization losses, for
-        instance. If `None` is passed, the losses are assumed
-        to be unconditional, and will apply across all dataflows of the layer
-        (e.g. weight regularization losses).
-    """
-    def _tag_unconditional(loss):
-      """Process the loss and tag it by setting loss._unconditional_loss."""
-      if callable(loss):
-        # We run the loss without autocasting, as regularizers are often
-        # numerically unstable in float16.
-        with base_layer_utils.autocast_context_manager(None):
-          loss = loss()
-      if loss is None:
-        return None  # Will be filtered out when computing the .losses property
-      if not tensor_util.is_tensor(loss):
-        loss = ops.convert_to_tensor_v2(loss, dtype=backend.floatx())
-      loss._unconditional_loss = (inputs is None)  # pylint: disable=protected-access
-      return loss
-
-    losses = nest.flatten(losses)
-
-    callable_losses = []
-    eager_losses = []
-    symbolic_losses = []
-    for loss in losses:
-      if callable(loss):
-        callable_losses.append(functools.partial(_tag_unconditional, loss))
-        continue
-      if loss is None:
-        continue
-      if not tensor_util.is_tensor(loss):
-        loss = ops.convert_to_tensor_v2(loss, dtype=backend.floatx())
-      # TF Functions should take the eager path.
-      if (tf_utils.is_symbolic_tensor(loss) and
-          not base_layer_utils.is_in_tf_function()):
-        symbolic_losses.append(_tag_unconditional(loss))
-        base_layer_utils.check_graph_consistency(loss, method='add_loss')
-      elif tensor_util.is_tensor(loss):
-        eager_losses.append(_tag_unconditional(loss))
-
-    self._callable_losses.extend(callable_losses)
-
-    in_call_context = base_layer_utils.call_context().in_call
-    if eager_losses and not in_call_context:
-      raise ValueError(
-          'Expected a symbolic Tensors or a callable for the loss value. '
-          'Please wrap your loss computation in a zero argument `lambda`.')
-
-    self._eager_losses.extend(eager_losses)
-
-    if in_call_context:
-      for symbolic_loss in symbolic_losses:
-        self._losses.append(symbolic_loss)
-    else:
-      for symbolic_loss in symbolic_losses:
-        if getattr(self, '_is_graph_network', False):
-          self._graph_network_add_loss(symbolic_loss)
-        else:
-          # Possible a loss was added in a Layer's `build`.
-          self._losses.append(symbolic_loss)
-
-  @trackable.no_automatic_dependency_tracking
-  def _clear_losses(self):
-    """Used every step in eager to reset losses."""
-    self._eager_losses = []
-    if hasattr(self, '_layers'):
-      for layer in trackable_layer_utils.filter_empty_layer_containers(
-          self._layers):
-        layer._clear_losses()
-
-  @property
-  def metrics(self):
-    """List of `tf.keras.metrics.Metric` instances tracked by the layer."""
-    collected_metrics = []
-    all_layers = self._gather_unique_layers()
-    for layer in all_layers:
-      with layer._metrics_lock:
-        collected_metrics.extend(layer._metrics)
-    return collected_metrics
-
-  def add_metric(self, value, aggregation=None, name=None):
-    """Adds metric tensor to the layer.
-
-    Args:
-      value: Metric tensor.
-      aggregation: Sample-wise metric reduction function. If `aggregation=None`,
-        it indicates that the metric tensor provided has been aggregated
-        already. eg, `bin_acc = BinaryAccuracy(name='acc')` followed by
-        `model.add_metric(bin_acc(y_true, y_pred))`. If aggregation='mean', the
-        given metric tensor will be sample-wise reduced using `mean` function.
-        eg, `model.add_metric(tf.reduce_sum(outputs), name='output_mean',
-        aggregation='mean')`.
-      name: String metric name.
-
-    Raises:
-      ValueError: If `aggregation` is anything other than None or `mean`.
-    """
-    if aggregation is not None and aggregation != 'mean':
-      raise ValueError(
-          'We currently support only `mean` sample-wise metric aggregation. '
-          'You provided aggregation=`%s`' % aggregation)
-
-    from_metric_obj = hasattr(value, '_metric_obj')
-    is_symbolic = tf_utils.is_symbolic_tensor(value)
-    in_call_context = base_layer_utils.call_context().in_call
-
-    if name is None and not from_metric_obj:
-      # Eg. `self.add_metric(math_ops.reduce_sum(x), aggregation='mean')`
-      # In eager mode, we use metric name to lookup a metric. Without a name,
-      # a new Mean metric wrapper will be created on every model/layer call.
-      # So, we raise an error when no name is provided.
-      # We will do the same for symbolic mode for consistency although a name
-      # will be generated if no name is provided.
-
-      # We will not raise this error in the foll use case for the sake of
-      # consistency as name in provided in the metric constructor.
-      # mean = metrics.Mean(name='my_metric')
-      # model.add_metric(mean(outputs))
-      raise ValueError('Please provide a name for your metric like '
-                       '`self.add_metric(tf.reduce_sum(inputs), '
-                       'name=\'mean_activation\', aggregation=\'mean\')`')
-    elif from_metric_obj:
-      name = value._metric_obj.name
-
-    if in_call_context:
-      # TF Function path should take the eager path.
-      if is_symbolic and not base_layer_utils.is_in_tf_function():
-        self._symbolic_add_metric(value, aggregation, name)
-      else:
-        self._eager_add_metric(value, aggregation, name)
-    else:
-      if not is_symbolic:
-        raise ValueError('Expected a symbolic Tensor for the metric value, '
-                         'received: ' + str(value))
-
-      # Possible a metric was added in a Layer's `build`.
-      if not getattr(self, '_is_graph_network', False):
-        with backend.get_graph().as_default():
-          self._symbolic_add_metric(value, aggregation, name)
-        return
-
-      if from_metric_obj:
-        raise ValueError('Using the result of calling a `Metric` object '
-                         'when calling `add_metric` on a Functional '
-                         'Model is not supported. Please pass the '
-                         'Tensor to monitor directly.')
-
-      # Insert layers into the Keras Graph Network.
-      self._graph_network_add_metric(value, aggregation, name)
-
-  @deprecation.deprecated_args(None, '`inputs` is now automatically inferred',
-                               'inputs')
-  @doc_controls.do_not_doc_inheritable
-  def add_update(self, updates, inputs=None):
-    """Add update op(s), potentially dependent on layer inputs.
-
-    Weight updates (for instance, the updates of the moving mean and variance
-    in a BatchNormalization layer) may be dependent on the inputs passed
-    when calling a layer. Hence, when reusing the same layer on
-    different inputs `a` and `b`, some entries in `layer.updates` may be
-    dependent on `a` and some on `b`. This method automatically keeps track
-    of dependencies.
-
-    The `get_updates_for` method allows to retrieve the updates relevant to a
-    specific set of inputs.
-
-    This call is ignored when eager execution is enabled (in that case, variable
-    updates are run on the fly and thus do not need to be tracked for later
-    execution).
-
-    Arguments:
-      updates: Update op, or list/tuple of update ops, or zero-arg callable
-        that returns an update op. A zero-arg callable should be passed in
-        order to disable running the updates by setting `trainable=False`
-        on this Layer, when executing in Eager mode.
-      inputs: Deprecated, will be automatically inferred.
-    """
-    call_context = base_layer_utils.call_context()
-
-    if (ds_context.has_strategy() and
-        ds_context.in_cross_replica_context() and
-        # When saving the model, the distribution strategy context should be
-        # ignored, following the default path for adding updates.
-        not call_context.saving):
-      # Updates don't need to be run in a cross-replica context.
-      return
-
-    updates = generic_utils.to_list(updates)
-
-    # All updates can be run immediately in Eager or in a tf.function.
-    if base_layer_utils.is_in_eager_or_tf_function():
-      if not call_context.frozen:
-        for update in updates:
-          if callable(update):
-            update()
-      return
-
-    if call_context.in_call:
-      relevant_inputs = call_context.inputs
-    else:
-      inbound_nodes = getattr(self, '_inbound_nodes', [])
-      relevant_inputs = [node.input_tensors for node in inbound_nodes]
-
-    def process_update(x):
-      """Standardize update ops.
-
-      Arguments:
-        x: Tensor, op, or callable.
-
-      Returns:
-        An update op.
-      """
-      if callable(x):
-        update = lambda: process_update(x())
-        if not ops.executing_eagerly_outside_functions():
-          # In V1 mode, call the callable right away and process. This is needed
-          # for TPU strategy.
-          return update()
-      elif isinstance(x, ops.Operation):
-        update = x
-      elif hasattr(x, 'op'):
-        update = x.op
-      else:
-        update = ops.convert_to_tensor_v2(x)
-
-      reachable = tf_utils.get_reachable_from_inputs(relevant_inputs, [update])
-      update._unconditional_update = update not in reachable
-      return update
-
-    updates = [process_update(x) for x in updates]
-    # Non-callable Updates are run automatically inside `call` in V2, so
-    # they do not need to be tracked later.
-    if ops.executing_eagerly_outside_functions() and call_context.in_call:
-      updates = [u for u in updates if callable(u)]
-    self._updates.extend(updates)
-
-  def set_weights(self, weights):
-    """Sets the weights of the layer, from Numpy arrays.
-
-    The weights of a layer represent the state of the layer. This function
-    sets the weight values from numpy arrays. The weight values should be
-    passed in the order they are created by the layer. Note that the layer's
-    weights must be instantiated before calling this function by calling
-    the layer.
-
-    For example, a Dense layer returns a list of two values-- per-output
-    weights and the bias value. These can be used to set the weights of another
-    Dense layer:
-
-    >>> a = tf.keras.layers.Dense(1,
-    ...   kernel_initializer=tf.constant_initializer(1.))
-    >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]]))
-    >>> a.get_weights()
-    [array([[1.],
-           [1.],
-           [1.]], dtype=float32), array([0.], dtype=float32)]
-    >>> b = tf.keras.layers.Dense(1,
-    ...   kernel_initializer=tf.constant_initializer(2.))
-    >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]]))
-    >>> b.get_weights()
-    [array([[2.],
-           [2.],
-           [2.]], dtype=float32), array([0.], dtype=float32)]
-    >>> b.set_weights(a.get_weights())
-    >>> b.get_weights()
-    [array([[1.],
-           [1.],
-           [1.]], dtype=float32), array([0.], dtype=float32)]
-
-    Arguments:
-        weights: a list of Numpy arrays. The number
-            of arrays and their shape must match
-            number of the dimensions of the weights
-            of the layer (i.e. it should match the
-            output of `get_weights`).
-
-    Raises:
-        ValueError: If the provided weights list does not match the
-            layer's specifications.
-    """
-    params = self.weights
-
-    expected_num_weights = 0
-    for param in params:
-      if isinstance(param, base_layer_utils.TrackableWeightHandler):
-        expected_num_weights += param.num_tensors
-      else:
-        expected_num_weights += 1
-
-    if expected_num_weights != len(weights):
-      raise ValueError(
-          'You called `set_weights(weights)` on layer "%s" '
-          'with a weight list of length %s, but the layer was '
-          'expecting %s weights. Provided weights: %s...' %
-          (self.name, len(weights), expected_num_weights, str(weights)[:50]))
-
-    weight_index = 0
-    weight_value_tuples = []
-    for param in params:
-      if isinstance(param, base_layer_utils.TrackableWeightHandler):
-        num_tensors = param.num_tensors
-        tensors = weights[weight_index:weight_index + num_tensors]
-        param.set_weights(tensors)
-        weight_index += num_tensors
-      else:
-        weight = weights[weight_index]
-        ref_shape = param.shape
-        if not ref_shape.is_compatible_with(weight.shape):
-          raise ValueError(
-              'Layer weight shape %s not compatible with provided weight '
-              'shape %s' % (ref_shape, weight.shape))
-        weight_value_tuples.append((param, weight))
-        weight_index += 1
-
-    backend.batch_set_value(weight_value_tuples)
-
-  def get_weights(self):
-    """Returns the current weights of the layer.
-
-    The weights of a layer represent the state of the layer. This function
-    returns both trainable and non-trainable weight values associated with this
-    layer as a list of Numpy arrays, which can in turn be used to load state
-    into similarly parameterized layers.
-
-    For example, a Dense layer returns a list of two values-- per-output
-    weights and the bias value. These can be used to set the weights of another
-    Dense layer:
-
-    >>> a = tf.keras.layers.Dense(1,
-    ...   kernel_initializer=tf.constant_initializer(1.))
-    >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]]))
-    >>> a.get_weights()
-    [array([[1.],
-           [1.],
-           [1.]], dtype=float32), array([0.], dtype=float32)]
-    >>> b = tf.keras.layers.Dense(1,
-    ...   kernel_initializer=tf.constant_initializer(2.))
-    >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]]))
-    >>> b.get_weights()
-    [array([[2.],
-           [2.],
-           [2.]], dtype=float32), array([0.], dtype=float32)]
-    >>> b.set_weights(a.get_weights())
-    >>> b.get_weights()
-    [array([[1.],
-           [1.],
-           [1.]], dtype=float32), array([0.], dtype=float32)]
-
-    Returns:
-        Weights values as a list of numpy arrays.
-    """
-    weights = self.weights
-    output_weights = []
-    for weight in weights:
-      if isinstance(weight, base_layer_utils.TrackableWeightHandler):
-        output_weights.extend(weight.get_tensors())
-      else:
-        output_weights.append(weight)
-    return backend.batch_get_value(output_weights)
-
-  @doc_controls.do_not_generate_docs
-  def get_updates_for(self, inputs):
-    """Retrieves updates relevant to a specific set of inputs.
-
-    Arguments:
-      inputs: Input tensor or list/tuple of input tensors.
-
-    Returns:
-      List of update ops of the layer that depend on `inputs`.
-    """
-    if inputs is None:
-      # Requesting unconditional updates.
-      return [u for u in self.updates if u._unconditional_update]
-
-    # Requesting input-conditional updates.
-    updates = [u for u in self.updates if not u._unconditional_update]
-    inputs = nest.flatten(inputs)
-    reachable = tf_utils.get_reachable_from_inputs(inputs, updates)
-    return [u for u in updates if u in reachable]
-
-  @doc_controls.do_not_doc_inheritable
-  def get_losses_for(self, inputs):
-    """Retrieves losses relevant to a specific set of inputs.
-
-    Arguments:
-      inputs: Input tensor or list/tuple of input tensors.
-
-    Returns:
-      List of loss tensors of the layer that depend on `inputs`.
-    """
-    if inputs is None:
-      # Requesting unconditional losses.
-      return [l for l in self.losses if l._unconditional_loss]
-
-    # Requesting input-conditional losses.
-    losses = [l for l in self.losses if not l._unconditional_loss]
-    inputs = nest.flatten(inputs)
-    reachable = tf_utils.get_reachable_from_inputs(inputs, losses)
-    return [l for l in losses if l in reachable]
-
-  @doc_controls.do_not_doc_inheritable
-  def get_input_mask_at(self, node_index):
-    """Retrieves the input mask tensor(s) of a layer at a given node.
-
-    Arguments:
-        node_index: Integer, index of the node
-            from which to retrieve the attribute.
-            E.g. `node_index=0` will correspond to the
-            first time the layer was called.
-
-    Returns:
-        A mask tensor
-        (or list of tensors if the layer has multiple inputs).
-    """
-    inputs = self.get_input_at(node_index)
-    if isinstance(inputs, list):
-      return [getattr(x, '_keras_mask', None) for x in inputs]
-    else:
-      return getattr(inputs, '_keras_mask', None)
-
-  @doc_controls.do_not_doc_inheritable
-  def get_output_mask_at(self, node_index):
-    """Retrieves the output mask tensor(s) of a layer at a given node.
-
-    Arguments:
-        node_index: Integer, index of the node
-            from which to retrieve the attribute.
-            E.g. `node_index=0` will correspond to the
-            first time the layer was called.
-
-    Returns:
-        A mask tensor
-        (or list of tensors if the layer has multiple outputs).
-    """
-    output = self.get_output_at(node_index)
-    if isinstance(output, list):
-      return [getattr(x, '_keras_mask', None) for x in output]
-    else:
-      return getattr(output, '_keras_mask', None)
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def input_mask(self):
-    """Retrieves the input mask tensor(s) of a layer.
-
-    Only applicable if the layer has exactly one inbound node,
-    i.e. if it is connected to one incoming layer.
-
-    Returns:
-        Input mask tensor (potentially None) or list of input
-        mask tensors.
-
-    Raises:
-        AttributeError: if the layer is connected to
-        more than one incoming layers.
-    """
-    inputs = self.input
-    if isinstance(inputs, list):
-      return [getattr(x, '_keras_mask', None) for x in inputs]
-    else:
-      return getattr(inputs, '_keras_mask', None)
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def output_mask(self):
-    """Retrieves the output mask tensor(s) of a layer.
-
-    Only applicable if the layer has exactly one inbound node,
-    i.e. if it is connected to one incoming layer.
-
-    Returns:
-        Output mask tensor (potentially None) or list of output
-        mask tensors.
-
-    Raises:
-        AttributeError: if the layer is connected to
-        more than one incoming layers.
-    """
-    output = self.output
-    if isinstance(output, list):
-      return [getattr(x, '_keras_mask', None) for x in output]
-    else:
-      return getattr(output, '_keras_mask', None)
-
-  @doc_controls.do_not_doc_inheritable
-  def get_input_shape_at(self, node_index):
-    """Retrieves the input shape(s) of a layer at a given node.
-
-    Arguments:
-        node_index: Integer, index of the node
-            from which to retrieve the attribute.
-            E.g. `node_index=0` will correspond to the
-            first time the layer was called.
-
-    Returns:
-        A shape tuple
-        (or list of shape tuples if the layer has multiple inputs).
-
-    Raises:
-      RuntimeError: If called in Eager mode.
-    """
-    return self._get_node_attribute_at_index(node_index, 'input_shapes',
-                                             'input shape')
-
-  @doc_controls.do_not_doc_inheritable
-  def get_output_shape_at(self, node_index):
-    """Retrieves the output shape(s) of a layer at a given node.
-
-    Arguments:
-        node_index: Integer, index of the node
-            from which to retrieve the attribute.
-            E.g. `node_index=0` will correspond to the
-            first time the layer was called.
-
-    Returns:
-        A shape tuple
-        (or list of shape tuples if the layer has multiple outputs).
-
-    Raises:
-      RuntimeError: If called in Eager mode.
-    """
-    return self._get_node_attribute_at_index(node_index, 'output_shapes',
-                                             'output shape')
-
-  @doc_controls.do_not_doc_inheritable
-  def get_input_at(self, node_index):
-    """Retrieves the input tensor(s) of a layer at a given node.
-
-    Arguments:
-        node_index: Integer, index of the node
-            from which to retrieve the attribute.
-            E.g. `node_index=0` will correspond to the
-            first time the layer was called.
-
-    Returns:
-        A tensor (or list of tensors if the layer has multiple inputs).
-
-    Raises:
-      RuntimeError: If called in Eager mode.
-    """
-    return self._get_node_attribute_at_index(node_index, 'input_tensors',
-                                             'input')
-
-  @doc_controls.do_not_doc_inheritable
-  def get_output_at(self, node_index):
-    """Retrieves the output tensor(s) of a layer at a given node.
-
-    Arguments:
-        node_index: Integer, index of the node
-            from which to retrieve the attribute.
-            E.g. `node_index=0` will correspond to the
-            first time the layer was called.
-
-    Returns:
-        A tensor (or list of tensors if the layer has multiple outputs).
-
-    Raises:
-      RuntimeError: If called in Eager mode.
-    """
-    return self._get_node_attribute_at_index(node_index, 'output_tensors',
-                                             'output')
-
-  @property
-  def input(self):
-    """Retrieves the input tensor(s) of a layer.
-
-    Only applicable if the layer has exactly one input,
-    i.e. if it is connected to one incoming layer.
-
-    Returns:
-        Input tensor or list of input tensors.
-
-    Raises:
-      RuntimeError: If called in Eager mode.
-      AttributeError: If no inbound nodes are found.
-    """
-    if not self._inbound_nodes:
-      raise AttributeError('Layer ' + self.name +
-                           ' is not connected, no input to return.')
-    return self._get_node_attribute_at_index(0, 'input_tensors', 'input')
-
-  @property
-  def output(self):
-    """Retrieves the output tensor(s) of a layer.
-
-    Only applicable if the layer has exactly one output,
-    i.e. if it is connected to one incoming layer.
-
-    Returns:
-      Output tensor or list of output tensors.
-
-    Raises:
-      AttributeError: if the layer is connected to more than one incoming
-        layers.
-      RuntimeError: if called in Eager mode.
-    """
-    if not self._inbound_nodes:
-      raise AttributeError('Layer ' + self.name + ' has no inbound nodes.')
-    return self._get_node_attribute_at_index(0, 'output_tensors', 'output')
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def input_shape(self):
-    """Retrieves the input shape(s) of a layer.
-
-    Only applicable if the layer has exactly one input,
-    i.e. if it is connected to one incoming layer, or if all inputs
-    have the same shape.
-
-    Returns:
-        Input shape, as an integer shape tuple
-        (or list of shape tuples, one tuple per input tensor).
-
-    Raises:
-        AttributeError: if the layer has no defined input_shape.
-        RuntimeError: if called in Eager mode.
-    """
-    if not self._inbound_nodes:
-      raise AttributeError('The layer has never been called '
-                           'and thus has no defined input shape.')
-    all_input_shapes = set(
-        [str(node.input_shapes) for node in self._inbound_nodes])
-    if len(all_input_shapes) == 1:
-      return self._inbound_nodes[0].input_shapes
-    else:
-      raise AttributeError('The layer "' + str(self.name) +
-                           ' has multiple inbound nodes, '
-                           'with different input shapes. Hence '
-                           'the notion of "input shape" is '
-                           'ill-defined for the layer. '
-                           'Use `get_input_shape_at(node_index)` '
-                           'instead.')
-
-  def count_params(self):
-    """Count the total number of scalars composing the weights.
-
-    Returns:
-        An integer count.
-
-    Raises:
-        ValueError: if the layer isn't yet built
-          (in which case its weights aren't yet defined).
-    """
-    if not self.built:
-      if getattr(self, '_is_graph_network', False):
-        with tf_utils.maybe_init_scope(self):
-          self._maybe_build(self.inputs)
-      else:
-        raise ValueError('You tried to call `count_params` on ' + self.name +
-                         ', but the layer isn\'t built. '
-                         'You can build it manually via: `' + self.name +
-                         '.build(batch_input_shape)`.')
-    return layer_utils.count_params(self.weights)
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def output_shape(self):
-    """Retrieves the output shape(s) of a layer.
-
-    Only applicable if the layer has one output,
-    or if all outputs have the same shape.
-
-    Returns:
-        Output shape, as an integer shape tuple
-        (or list of shape tuples, one tuple per output tensor).
-
-    Raises:
-        AttributeError: if the layer has no defined output shape.
-        RuntimeError: if called in Eager mode.
-    """
-    if not self._inbound_nodes:
-      raise AttributeError('The layer has never been called '
-                           'and thus has no defined output shape.')
-    all_output_shapes = set(
-        [str(node.output_shapes) for node in self._inbound_nodes])
-    if len(all_output_shapes) == 1:
-      return self._inbound_nodes[0].output_shapes
-    else:
-      raise AttributeError('The layer "%s"'
-                           ' has multiple inbound nodes, '
-                           'with different output shapes. Hence '
-                           'the notion of "output shape" is '
-                           'ill-defined for the layer. '
-                           'Use `get_output_shape_at(node_index)` '
-                           'instead.' % self.name)
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def inbound_nodes(self):
-    """Deprecated, do NOT use! Only for compatibility with external Keras."""
-    return self._inbound_nodes
-
-  @property
-  @doc_controls.do_not_doc_inheritable
-  def outbound_nodes(self):
-    """Deprecated, do NOT use! Only for compatibility with external Keras."""
-    return self._outbound_nodes
-
-  ##############################################################################
-  # Methods & attributes below are public aliases of other methods.            #
-  ##############################################################################
-
-  @deprecation.deprecated(
-      date=None, instructions='Please use `layer.__call__` method instead.')
-  @doc_controls.do_not_doc_inheritable
-  def apply(self, inputs, *args, **kwargs):
-    """Deprecated, do NOT use!
-
-    This is an alias of `self.__call__`.
-
-    Arguments:
-      inputs: Input tensor(s).
-      *args: additional positional arguments to be passed to `self.call`.
-      **kwargs: additional keyword arguments to be passed to `self.call`.
-
-    Returns:
-      Output tensor(s).
-    """
-    return self.__call__(inputs, *args, **kwargs)
-
-  @deprecation.deprecated(
-      date=None, instructions='Please use `layer.add_weight` method instead.')
-  @doc_controls.do_not_doc_inheritable
-  def add_variable(self, *args, **kwargs):
-    """Deprecated, do NOT use! Alias for `add_weight`."""
-    return self.add_weight(*args, **kwargs)
-
-  @property
-  @doc_controls.do_not_generate_docs
-  def variables(self):
-    """Returns the list of all layer variables/weights.
-
-    Alias of `self.weights`.
-
-    Returns:
-      A list of variables.
-    """
-    return self.weights
-
-  @property
-  @doc_controls.do_not_generate_docs
-  def trainable_variables(self):
-    return self.trainable_weights
-
-  @property
-  @doc_controls.do_not_generate_docs
-  def non_trainable_variables(self):
-    return self.non_trainable_weights
-
-  ##############################################################################
-  # Methods & attributes below are all private and only used by the framework. #
-  ##############################################################################
-
-  # TODO(scottzhu): Remove this?
-  @property
-  def _compute_dtype(self):
-    """The layer's compute dtype.
-
-    Unless mixed-precision is used, this is the same as `Layer.dtype`.
-
-    If self._autocast is True, layer's will cast floating-point inputs to this.
-
-    Returns:
-      The layer's compute dtype.
-    """
-    return self.dtype
-
-  def _name_scope(self):
-    return self.name
-
-  def _init_set_name(self, name, zero_based=True):
-    if not name:
-      self._name = backend.unique_object_name(
-          generic_utils.to_snake_case(self.__class__.__name__),
-          zero_based=zero_based)
-    else:
-      self._name = name
-
-  def _get_existing_metric(self, name=None):
-    match = [m for m in self._metrics if m.name == name]
-    if not match:
-      return
-    if len(match) > 1:
-      raise ValueError(
-          'Please provide different names for the metrics you have added. '
-          'We found {} metrics with the name: "{}"'.format(len(match), name))
-    return match[0]
-
-  def _eager_add_metric(self, value, aggregation=None, name=None):
-    # If the given metric is available in `metrics` list we just update state
-    # on it, otherwise we create a new metric instance and
-    # add it to the `metrics` list.
-    metric_obj = getattr(value, '_metric_obj', None)
-    # Tensors that come from a Metric object already updated the Metric state.
-    should_update_state = not metric_obj
-    name = metric_obj.name if metric_obj else name
-
-    with self._metrics_lock:
-      match = self._get_existing_metric(name)
-      if match:
-        metric_obj = match
-      elif metric_obj:
-        self._metrics.append(metric_obj)
-      else:
-        from tensorflow.python.keras import metrics as metrics_mod  # pylint:disable=g-import-not-at-top
-        if aggregation is None:
-          raise ValueError(
-              '`aggregation` must be specified when passing a `Tensor` '
-              'to `add_metric`.')
-        assert aggregation is not None
-        metric_obj = metrics_mod.Mean(name=name, dtype=value.dtype)
-        self._metrics.append(metric_obj)
-
-    if should_update_state:
-      metric_obj(value)
-    return
-
-  def _symbolic_add_metric(self, value, aggregation=None, name=None):
-    base_layer_utils.check_graph_consistency(value, method='add_metric')
-    match = self._get_existing_metric(name)
-    if aggregation is None:
-      # Iterate over the metrics and check if the given metric exists already.
-      # This can happen when a metric instance is created in subclassed model
-      # layer `__init__` and we have tracked that instance already in
-      # model.__setattr__.
-      if match:
-        result_tensor = value
-        metric_obj = match
-      elif hasattr(value, '_metric_obj'):
-        # We track the instance using the metadata on the result tensor.
-        result_tensor = value
-        metric_obj = result_tensor._metric_obj
-        self._metrics.append(metric_obj)
-      else:
-        raise ValueError(
-            'We do not support adding an aggregated metric result tensor that '
-            'is not the output of a `tf.keras.metrics.Metric` metric instance. '
-            'Without having access to the metric instance we cannot reset the '
-            'state of a metric after every epoch during training. You can '
-            'create a `tf.keras.metrics.Metric` instance and pass the result '
-            'here or pass an un-aggregated result with `aggregation` parameter '
-            'set as `mean`. For example: `self.add_metric(tf.reduce_sum(inputs)'
-            ', name=\'mean_activation\', aggregation=\'mean\')`')
-    else:
-      # If a non-aggregated tensor is given as input (ie. `aggregation` is
-      # explicitly set to `mean`), we wrap the tensor in `Mean` metric.
-      if match:
-        result_tensor = match(value)
-        metric_obj = match
-      else:
-        metric_obj, result_tensor = base_layer_utils.create_mean_metric(
-            value, name)
-        self._metrics.append(metric_obj)
-
-  def _handle_weight_regularization(self, name, variable, regularizer):
-    """Create lambdas which compute regularization losses."""
-
-    def _loss_for_variable(v):
-      """Creates a regularization loss `Tensor` for variable `v`."""
-      with backend.name_scope(name + '/Regularizer'):
-        regularization = regularizer(v)
-      return regularization
-
-    if isinstance(variable, tf_variables.PartitionedVariable):
-      for v in variable:
-        self.add_loss(functools.partial(_loss_for_variable, v))
-    else:
-      self.add_loss(functools.partial(_loss_for_variable, variable))
-
-  def _handle_activity_regularization(self, inputs, outputs):
-    # Apply activity regularization.
-    # Note that it should be applied every time the layer creates a new
-    # output, since it is output-specific.
-    if self._activity_regularizer:
-      output_list = nest.flatten(outputs)
-      with backend.name_scope('ActivityRegularizer'):
-        for output in output_list:
-          activity_loss = self._activity_regularizer(output)
-          batch_size = math_ops.cast(
-              array_ops.shape(output)[0], activity_loss.dtype)
-          # Make activity regularization strength batch-agnostic.
-          mean_activity_loss = activity_loss / batch_size
-          base_layer_utils.check_graph_consistency(
-              mean_activity_loss, method='activity_regularizer')
-          self.add_loss(mean_activity_loss, inputs=inputs)
-
-  def _set_mask_metadata(self, inputs, outputs, previous_mask):
-    flat_outputs = nest.flatten(outputs)
-
-    mask_already_computed = (
-        getattr(self, '_compute_output_and_mask_jointly', False) or
-        all(getattr(x, '_keras_mask', None) is not None for x in flat_outputs))
-
-    # Only compute the mask if the Layer explicitly supports masking or has
-    # overridden `compute_mask`.
-    should_compute_mask = (
-        hasattr(self, 'compute_mask') and
-        (self.supports_masking or
-         not getattr(self.compute_mask, '_is_default', False)))
-
-    if mask_already_computed:
-      flat_masks = [getattr(x, '_keras_mask', None) for x in flat_outputs]
-    elif not should_compute_mask:
-      flat_masks = [None for _ in flat_outputs]
-    else:
-      output_masks = self.compute_mask(inputs, previous_mask)
-      # `compute_mask` can return a single `None` even when a Layer
-      # has multiple outputs.
-      if output_masks is None:
-        flat_masks = [None for _ in flat_outputs]
-      else:
-        flat_masks = nest.flatten(output_masks)
-
-    for output, mask in zip(flat_outputs, flat_masks):
-      try:
-        output._keras_mask = mask
-      except AttributeError:
-        # C Type such as np.ndarray.
-        pass
-
-    if tf_utils.are_all_symbolic_tensors(flat_outputs):
-      for output in flat_outputs:
-        if getattr(output, '_keras_mask', None) is not None:
-          # Do not track masks for `TensorFlowOpLayer` construction.
-          output._keras_mask._keras_history_checked = True
-
-  def _collect_input_masks(self, inputs, args, kwargs):
-    """Checks if `mask` argument was passed, else gathers mask from inputs."""
-    if self._call_arg_was_passed('mask', args, kwargs):
-      return self._get_call_arg_value('mask', args, kwargs)
-
-    if not self._should_compute_mask:
-      return None
-
-    input_masks = nest.map_structure(lambda t: getattr(t, '_keras_mask', None),
-                                     inputs)
-    if generic_utils.is_all_none(input_masks):
-      return None
-    return input_masks
-
-  def _call_arg_was_passed(self, arg_name, args, kwargs, inputs_in_args=False):
-    if arg_name in kwargs:
-      return True
-    call_fn_args = self._call_fn_args
-    if not inputs_in_args:
-      # Ignore `inputs` arg.
-      call_fn_args = call_fn_args[1:]
-    if arg_name in dict(zip(call_fn_args, args)):
-      return True
-    return False
-
-  def _get_call_arg_value(self, arg_name, args, kwargs, inputs_in_args=False):
-    if arg_name in kwargs:
-      return kwargs[arg_name]
-    call_fn_args = self._call_fn_args
-    if not inputs_in_args:
-      # Ignore `inputs` arg.
-      call_fn_args = call_fn_args[1:]
-    args_dict = dict(zip(call_fn_args, args))
-    return args_dict[arg_name]
-
-  def _set_connectivity_metadata_(self, inputs, outputs, args, kwargs):
-
-    # If the layer returns tensors from its inputs, unmodified,
-    # we copy them to avoid loss of tensor metadata.
-    output_ls = nest.flatten(outputs)
-    inputs_ls = object_identity.ObjectIdentitySet(nest.flatten(inputs))
-    output_ls_copy = []
-    for x in output_ls:
-      if x in inputs_ls:
-        with backend.name_scope(self.name):
-          x = array_ops.identity(x)
-      output_ls_copy.append(x)
-    outputs = nest.pack_sequence_as(outputs, output_ls_copy)
-
-    # Ignore `inputs` arg.
-    arguments = dict(zip(self._call_fn_args[1:], args))
-    arguments.update(kwargs)
-
-    # Add an inbound node to the layer, so it can keep track of this call.
-    # This updates the layer history of the output tensor(s).
-    self._add_inbound_node(
-        input_tensors=inputs, output_tensors=outputs, arguments=arguments)
-    return inputs, outputs
-
-  def _add_inbound_node(self,
-                        input_tensors,
-                        output_tensors,
-                        arguments=None):
-    """Internal method to create an inbound node for the layer.
-
-    Arguments:
-        input_tensors: list of input tensors.
-        output_tensors: list of output tensors.
-        arguments: dictionary of keyword arguments that were passed to the
-            `call` method of the layer at the call that created the node.
-    """
-    inbound_layers = nest.map_structure(lambda t: t._keras_history.layer,
-                                        input_tensors)
-    node_indices = nest.map_structure(lambda t: t._keras_history.node_index,
-                                      input_tensors)
-    tensor_indices = nest.map_structure(lambda t: t._keras_history.tensor_index,
-                                        input_tensors)
-
-    # Create node, add it to inbound nodes.
-    node_module.Node(
-        self,
-        inbound_layers=inbound_layers,
-        node_indices=node_indices,
-        tensor_indices=tensor_indices,
-        input_tensors=input_tensors,
-        output_tensors=output_tensors,
-        arguments=arguments)
-
-    # Update tensor history metadata.
-    # The metadata attribute consists of
-    # 1) a layer instance
-    # 2) a node index for the layer
-    # 3) a tensor index for the node.
-    # The allows layer reuse (multiple nodes per layer) and multi-output
-    # or multi-input layers (e.g. a layer can return multiple tensors,
-    # and each can be sent to a different layer).
-    for i, tensor in enumerate(nest.flatten(output_tensors)):
-      tensor._keras_history = KerasHistory(self,
-                                           len(self._inbound_nodes) - 1, i)  # pylint: disable=protected-access
-
-  def _get_node_attribute_at_index(self, node_index, attr, attr_name):
-    """Private utility to retrieves an attribute (e.g. inputs) from a node.
-
-    This is used to implement the methods:
-        - get_input_shape_at
-        - get_output_shape_at
-        - get_input_at
-        etc...
-
-    Arguments:
-        node_index: Integer index of the node from which
-            to retrieve the attribute.
-        attr: Exact node attribute name.
-        attr_name: Human-readable attribute name, for error messages.
-
-    Returns:
-        The layer's attribute `attr` at the node of index `node_index`.
-
-    Raises:
-        RuntimeError: If the layer has no inbound nodes, or if called in Eager
-        mode.
-        ValueError: If the index provided does not match any node.
-    """
-    if not self._inbound_nodes:
-      raise RuntimeError('The layer has never been called '
-                         'and thus has no defined ' + attr_name + '.')
-    if not len(self._inbound_nodes) > node_index:
-      raise ValueError('Asked to get ' + attr_name + ' at node ' +
-                       str(node_index) + ', but the layer has only ' +
-                       str(len(self._inbound_nodes)) + ' inbound nodes.')
-    values = getattr(self._inbound_nodes[node_index], attr)
-    if isinstance(values, list) and len(values) == 1:
-      return values[0]
-    else:
-      return values
-
-  def _maybe_build(self, inputs):
-    # Check input assumptions set before layer building, e.g. input rank.
-    if not self.built:
-      input_spec.assert_input_compatibility(
-          self.input_spec, inputs, self.name)
-      input_list = nest.flatten(inputs)
-      if input_list and self._dtype is None:
-        try:
-          self._dtype = input_list[0].dtype.base_dtype.name
-        except AttributeError:
-          pass
-      input_shapes = None
-      if all(hasattr(x, 'shape') for x in input_list):
-        input_shapes = nest.map_structure(lambda x: x.shape, inputs)
-      # Only call `build` if the user has manually overridden the build method.
-      if not hasattr(self.build, '_is_default'):
-        # Any setup work performed only once should happen in an `init_scope`
-        # to avoid creating symbolic Tensors that will later pollute any eager
-        # operations.
-        with tf_utils.maybe_init_scope(self):
-          self.build(input_shapes)  # pylint:disable=not-callable
-      # We must set also ensure that the layer is marked as built, and the build
-      # shape is stored since user defined build functions may not be calling
-      # `super.build()`
-      LegacyBaseLayer.build(self, input_shapes)
-
-    # Optionally load weight values specified at layer instantiation.
-    if self._initial_weights is not None:
-      if ops.executing_eagerly_outside_functions():
-        with ops.init_scope():
-          # Using `init_scope` since we want variable assignment in
-          # `set_weights` to be treated like variable initialization.
-          self.set_weights(self._initial_weights)
-      else:
-        self.set_weights(self._initial_weights)
-      self._initial_weights = None
-
-  def _symbolic_call(self, inputs):
-    input_shapes = nest.map_structure(lambda x: x.shape, inputs)
-    output_shapes = self.compute_output_shape(input_shapes)
-    # Convert to TensorShape so that nest.map_structure will not map into
-    # individual dim of the shape.
-    output_shapes = tf_utils.convert_shapes(output_shapes, to_tuples=False)
-
-    def _make_placeholder_like(shape):
-      ph = backend.placeholder(shape=shape, dtype=self.dtype)
-      ph._keras_mask = None
-      return ph
-    return nest.map_structure(_make_placeholder_like, output_shapes)
-
-  def _get_trainable_state(self):
-    """Get the `trainable` state of each sublayer.
-
-    Returns:
-      A dict mapping all sublayers to their `trainable` value.
-    """
-    layers = trackable_layer_utils.filter_empty_layer_containers(self._layers)
-    # Keep track of each top-level layers' `trainable` as well as the
-    # state of all of its sublayers.
-    trainable_state = weakref.WeakKeyDictionary()
-    trainable_state[self] = self.trainable
-    for layer in layers:
-      trainable_state.update(layer._get_trainable_state())
-    return trainable_state
-
-  def _set_trainable_state(self, trainable_state):
-    """Set `trainable` state for each sublayer."""
-    layers = trackable_layer_utils.filter_empty_layer_containers(self._layers)
-    if self in trainable_state:
-      self.trainable = trainable_state[self]
-    for layer in layers:
-      layer._set_trainable_state(trainable_state)
-
-  @property
-  def _obj_reference_counts(self):
-    """A dictionary counting the number of attributes referencing an object."""
-    self._maybe_create_attribute('_obj_reference_counts_dict',
-                                 object_identity.ObjectIdentityDictionary())
-    return self._obj_reference_counts_dict
-
-  @trackable.no_automatic_dependency_tracking
-  def _maybe_create_attribute(self, name, default_value):
-    """Create the attribute with the default value if it hasn't been created.
-
-    This is useful for fields that is used for tracking purpose,
-    _trainable_weights, or _layers. Note that user could create a layer subclass
-    and assign an internal field before invoking the Layer.__init__(), the
-    __setattr__() need to create the tracking fields and __init__() need to not
-    override them.
-
-    Args:
-      name: String, the name of the attribute.
-      default_value: Object, the default value of the attribute.
-    """
-    if not hasattr(self, name):
-      super(LegacyBaseLayer, self).__setattr__(name, default_value)
-
-  def __delattr__(self, name):
-    # For any super.__delattr__() call, we will directly use the implementation
-    # in Trackable and skip the behavior in AutoTrackable. The Layer was
-    # originally use Trackable as base class, the change of using Module as base
-    # class forced us to have AutoTrackable in the class hierarchy. Skipping
-    # the __delattr__ and __setattr__ in AutoTrackable will keep the status quo.
-    existing_value = getattr(self, name, None)
-
-    # If this value is replacing an existing object assigned to an attribute, we
-    # should clean it out to avoid leaking memory. First we check if there are
-    # other attributes referencing it.
-    reference_counts = self._obj_reference_counts
-    if existing_value not in reference_counts:
-      super(tracking.AutoTrackable, self).__delattr__(name)
-      return
-
-    reference_count = reference_counts[existing_value]
-    if reference_count > 1:
-      # There are other remaining references. We can't remove this object from
-      # _layers etc.
-      reference_counts[existing_value] = reference_count - 1
-      super(tracking.AutoTrackable, self).__delattr__(name)
-      return
-    else:
-      # This is the last remaining reference.
-      del reference_counts[existing_value]
-
-    super(tracking.AutoTrackable, self).__delattr__(name)
-
-    if (isinstance(existing_value, LegacyBaseLayer)
-        or trackable_layer_utils.has_weights(existing_value)):
-      super(tracking.AutoTrackable, self).__setattr__(
-          '_layers',
-          [l for l in self._layers if l is not existing_value])
-      self._attribute_sentinel.invalidate_all()
-    if isinstance(existing_value, tf_variables.Variable):
-      super(tracking.AutoTrackable, self).__setattr__(
-          '_trainable_weights',
-          [w for w in self._trainable_weights if w is not existing_value])
-      super(tracking.AutoTrackable, self).__setattr__(
-          '_non_trainable_weights',
-          [w for w in self._non_trainable_weights if w is not existing_value])
-
-    # Any time we change `_layers` (either by deleting the attribute or by
-    # reassigning it which will call __delattr__ from __setattr__) the topology
-    # of the subgraph of Layers may change. In that case we will need to
-    # recompute any attribute which depends on that subgraph.
-    if name == '_layers':
-      self._attribute_sentinel.invalidate_all()
-
-  def __setattr__(self, name, value):
-    if (name == '_self_setattr_tracking' or
-        not getattr(self, '_self_setattr_tracking', True) or
-        # Exclude @property.setters from tracking
-        hasattr(self.__class__, name)):
-      try:
-        super(tracking.AutoTrackable, self).__setattr__(name, value)
-      except AttributeError:
-        raise AttributeError(
-            ('Can\'t set the attribute "{}", likely because it conflicts with '
-             'an existing read-only @property of the object. Please choose a '
-             'different name.').format(name))
-      return
-
-    # Keep track of trackable objects, for the needs of `Network.save_weights`.
-    value = data_structures.sticky_attribute_assignment(
-        trackable=self, value=value, name=name)
-
-    reference_counts = self._obj_reference_counts
-    reference_counts[value] = reference_counts.get(value, 0) + 1
-
-    # Clean out the old attribute, which clears _layers and _trainable_weights
-    # if necessary.
-    try:
-      self.__delattr__(name)
-    except AttributeError:
-      pass
-
-    # TODO(scottzhu): Need to track Module object as well for weight tracking.
-    # Be careful about metric if it becomes a Module in future.
-    # Append value to self._layers if relevant
-    if (getattr(self, '_auto_track_sub_layers', True) and
-        (isinstance(value, LegacyBaseLayer) or
-         trackable_layer_utils.has_weights(value))):
-      self._maybe_create_attribute('_layers', [])
-      # We need to check object identity to avoid de-duplicating empty
-      # container types which compare equal.
-      if not any((layer is value for layer in self._layers)):
-        self._layers.append(value)
-        if hasattr(value, '_attribute_sentinel'):
-          value._attribute_sentinel.add_parent(self._attribute_sentinel)
-        if hasattr(value, '_use_resource_variables'):
-          # Legacy layers (V1 tf.layers) must always use
-          # resource variables.
-          value._use_resource_variables = True
-
-    # Append value to list of trainable / non-trainable weights if relevant
-    # TODO(b/125122625): This won't pick up on any variables added to a
-    # list/dict after creation.
-    for val in nest.flatten(value):
-      # TODO(b/126450014): Remove `_UnreadVariable` check here when assign ops
-      # no longer return True for isinstance Variable checks.
-      if not isinstance(val, tf_variables.Variable):
-        continue
-      if isinstance(val, resource_variable_ops._UnreadVariable):  # pylint: disable=protected-access
-        continue
-
-      # Users may add extra weights/variables
-      # simply by assigning them to attributes (invalid for graph networks)
-      self._maybe_create_attribute('_trainable_weights', [])
-      self._maybe_create_attribute('_non_trainable_weights', [])
-      if val.trainable:
-        if any(val is w for w in self._trainable_weights):
-          continue
-        self._trainable_weights.append(val)
-      else:
-        if any(val is w for w in self._non_trainable_weights):
-          continue
-        self._non_trainable_weights.append(val)
-
-      backend.track_variable(val)
-
-    # Skip the auto trackable from tf.Module to keep status quo. See the comment
-    # at __delattr__.
-    super(tracking.AutoTrackable, self).__setattr__(name, value)
-
-  def _gather_children_attribute(self, attribute):
-    assert attribute in {
-        'weights', 'trainable_weights', 'non_trainable_weights'
-    }
-    if hasattr(self, '_layers'):
-      nested_layers = trackable_layer_utils.filter_empty_layer_containers(
-          self._layers)
-      return list(
-          itertools.chain.from_iterable(
-              getattr(layer, attribute) for layer in nested_layers))
-    return []
-
-  def _gather_unique_layers(self):
-    """Returns the current layer and all its children depth first deduped.
-
-    We are deduping after getting the layers to maintain the order.
-    """
-    all_layers = self._gather_layers()
-    unique_layers, seen_layers = [], object_identity.ObjectIdentitySet()
-    for layer in all_layers:
-      if layer not in seen_layers:
-        unique_layers.append(layer)
-        # Track the Variable's identity to avoid __eq__ issues.
-        seen_layers.add(layer)
-    return unique_layers
-
-  def _gather_layers(self):
-    """Returns the current layer and all its children depth first."""
-    all_layers = [self]
-    if hasattr(self, '_layers'):
-      child_layers = trackable_layer_utils.filter_empty_layer_containers(
-          self._layers)
-      for child_layer in child_layers:
-        all_layers.extend(child_layer._gather_layers())
-    return all_layers
-
-  @property
-  @tracking.cached_per_instance
-  def _attribute_sentinel(self):
-    return trackable_layer_utils.AttributeSentinel()
-
-  # This is a hack so that the is_layer (within
-  # training/trackable/layer_utils.py) check doesn't get the weights attr.
-  # TODO(b/110718070): Remove when fixed.
-  def _is_layer(self):
-    return True
-
-  def _init_call_fn_args(self):
-    # Clear cached call function arguments.
-    self.__class__._call_full_argspec.fget.cache.pop(self, None)
-    self.__class__._call_fn_args.fget.cache.pop(self, None)
-    self.__class__._call_accepts_kwargs.fget.cache.pop(self, None)
-
-    call_fn_args = self._call_fn_args
-    self._expects_training_arg = ('training' in call_fn_args or
-                                  self._call_accepts_kwargs)
-    self._expects_mask_arg = ('mask' in call_fn_args or
-                              self._call_accepts_kwargs)
-
-  @property
-  @tracking.cached_per_instance
-  def _call_full_argspec(self):
-    # Argspec inspection is expensive and the call spec is used often, so it
-    # makes sense to cache the result.
-    return tf_inspect.getfullargspec(self.call)
-
-  @property
-  @tracking.cached_per_instance
-  def _call_fn_args(self):
-    all_args = self._call_full_argspec.args
-    # Scrub `self` that appears if a decorator was applied.
-    if all_args and all_args[0] == 'self':
-      return all_args[1:]
-    return all_args
-
-  @property
-  @tracking.cached_per_instance
-  def _call_accepts_kwargs(self):
-    return self._call_full_argspec.varkw is not None
-
-  @property
-  @tracking.cached_per_instance
-  def _should_compute_mask(self):
-    return ('mask' in self._call_fn_args or
-            getattr(self, 'compute_mask', None) is not None)
-
-  @property
-  def _eager_losses(self):
-    # A list of loss values containing activity regularizers and losses
-    # manually added through `add_loss` during eager execution. It is cleared
-    # after every batch.
-    # Because we plan on eventually allowing a same model instance to be trained
-    # in eager mode or graph mode alternatively, we need to keep track of
-    # eager losses and symbolic losses via separate attributes.
-    if not hasattr(self._thread_local, '_eager_losses'):
-      self._thread_local._eager_losses = []
-    return self._thread_local._eager_losses
-
-  @_eager_losses.setter
-  def _eager_losses(self, losses):
-    self._thread_local._eager_losses = losses
-
-  def _dedup_weights(self, weights):
-    """Dedupe weights while maintaining order as much as possible."""
-    output, seen_weights = [], object_identity.ObjectIdentitySet()
-    for w in weights:
-      if w not in seen_weights:
-        output.append(w)
-        # Track the Variable's identity to avoid __eq__ issues.
-        seen_weights.add(w)
-    return output
-
-  # SavedModel properties. Please see keras/saving/saved_model for details.
-
-  def __getstate__(self):
-    # Override to support `copy.deepcopy` and pickling.
-    # Thread-local objects cannot be copied in Python 3, so pop these.
-    # Thread-local objects are used to cache losses in MirroredStrategy, and
-    # so shouldn't be copied.
-    state = self.__dict__.copy()
-    state.pop('_thread_local', None)
-    state.pop('_metrics_lock', None)
-    return state
-
-  def __setstate__(self, state):
-    state['_thread_local'] = threading.local()
-    state['_metrics_lock'] = threading.Lock()
-    # Bypass Trackable logic as `__dict__` already contains this info.
-    object.__setattr__(self, '__dict__', state)
-
-
-class TensorFlowOpLayer(LegacyBaseLayer):
-  """Wraps a TensorFlow Operation in a Layer.
-
-  This class is used internally by the Functional API. When a user
-  uses a raw TensorFlow Operation on symbolic tensors originating
-  from an `Input` Layer, the resultant operation will be wrapped
-  with this Layer object in order to make the operation compatible
-  with the Keras API.
-
-  This Layer will create a new, identical operation (except for inputs
-  and outputs) every time it is called. If `run_eagerly` is `True`,
-  the op creation and calculation will happen inside an Eager function.
-
-  Instances of this Layer are created when `autolambda` is called, which
-  is whenever a Layer's `__call__` encounters symbolic inputs that do
-  not have Keras metadata, or when a Network's `__init__` encounters
-  outputs that do not have Keras metadata.
-
-  Attributes:
-    node_def: String, the serialized NodeDef of the Op this layer will wrap.
-    name: String, the name of the Layer.
-    constants: Dict of NumPy arrays, the values of any Tensors needed for this
-      Operation that do not originate from a Keras `Input` Layer. Since all
-      placeholders must come from Keras `Input` Layers, these Tensors must be
-      treated as constant in the Functional API.
-    trainable: Bool, whether this Layer is trainable. Currently Variables are
-      not supported, and so this parameter has no effect.
-    dtype: The default dtype of this Layer. Inherited from `Layer` and has no
-      effect on this class, however is used in `get_config`.
-  """
-
-  @trackable.no_automatic_dependency_tracking
-  def __init__(self,
-               node_def,
-               name,
-               constants=None,
-               trainable=True,
-               dtype=None):
-    # Pass autocast=False, as if inputs are cast, input types might not match
-    # Operation type.
-    super(TensorFlowOpLayer, self).__init__(
-        name=_TF_OP_LAYER_NAME_PREFIX + name, trainable=trainable, dtype=dtype,
-        autocast=False)
-    if isinstance(node_def, dict):
-      self.node_def = json_format.ParseDict(node_def, node_def_pb2.NodeDef())
-    else:
-      if not isinstance(node_def, bytes):
-        node_def = node_def.encode('utf-8')
-      self.node_def = node_def_pb2.NodeDef.FromString(node_def)
-    # JSON serialization stringifies keys which are integer input indices.
-    self.constants = ({
-        int(index): constant for index, constant in constants.items()
-    } if constants is not None else {})
-    # Layer uses original op unless it is called on new inputs.
-    # This means `built` is not set in `__call__`.
-    self.built = True
-
-  def call(self, inputs):
-    if context.executing_eagerly():
-      return self._defun_call(inputs)
-    return self._make_op(inputs)
-
-  def _make_node_def(self, graph):
-    node_def = node_def_pb2.NodeDef()
-    node_def.CopyFrom(self.node_def)
-    # Used in TPUReplicateContext to indicate whether this node has been cloned
-    # and to not add TPU attributes.
-    node_def.attr['_cloned'].b = True
-    node_def.name = graph.unique_name(node_def.name)
-    return node_def
-
-  def _make_op(self, inputs):
-    inputs = nest.flatten(inputs)
-    graph = inputs[0].graph
-    node_def = self._make_node_def(graph)
-    with graph.as_default():
-      for index, constant in self.constants.items():
-        # Recreate constant in graph to add distribution context.
-        value = tensor_util.constant_value(constant)
-        if value is not None:
-          constant = constant_op.constant(value, name=node_def.input[index])
-        inputs.insert(index, constant)
-      c_op = ops._create_c_op(graph, node_def, inputs, control_inputs=[])
-      op = graph._create_op_from_tf_operation(c_op)
-      op._control_flow_post_processing()
-
-      # Record the gradient because custom-made ops don't go through the
-      # code-gen'd eager call path
-      op_type = compat.as_str(op.op_def.name)
-      attr_names = [compat.as_str(attr.name) for attr in op.op_def.attr]
-      attrs = []
-      for attr_name in attr_names:
-        attrs.append(attr_name)
-        attrs.append(op.get_attr(attr_name))
-      attrs = tuple(attrs)
-      execute.record_gradient(op_type, op.inputs, attrs, op.outputs)
-
-      if len(op.outputs) == 1:
-        return op.outputs[0]
-      return op.outputs
-
-  @function.defun
-  def _defun_call(self, inputs):
-    """Wraps the op creation method in an Eager function for `run_eagerly`."""
-    return self._make_op(inputs)
-
-  def get_config(self):
-    config = super(TensorFlowOpLayer, self).get_config()
-    config.update({
-        # `__init__` prefixes the name. Revert to the constructor argument.
-        'name': config['name'][len(_TF_OP_LAYER_NAME_PREFIX):],
-        'node_def': json_format.MessageToDict(self.node_def),
-        'constants': {
-            i: backend.get_value(c) for i, c in self.constants.items()
-        }
-    })
-    return config
-
-
-class AddLoss(LegacyBaseLayer):
-  """Adds its inputs as a loss.
-
-  Attributes:
-    unconditional: Whether or not the loss should be conditioned on the inputs.
-  """
-
-  def __init__(self, unconditional, **kwargs):
-    # Pass autocast=False, as there is no reason to cast loss to a different
-    # dtype.
-    kwargs['autocast'] = False
-    super(AddLoss, self).__init__(**kwargs)
-    self.unconditional = unconditional
-
-  def call(self, inputs):
-    self.add_loss(inputs, inputs=(not self.unconditional))
-    return inputs
-
-  def get_config(self):
-    config = super(AddLoss, self).get_config()
-    config.update({'unconditional': self.unconditional})
-    return config
-
-
-class AddMetric(LegacyBaseLayer):
-  """Adds its inputs as a metric.
-
-  Attributes:
-    aggregation: 'mean' or None. How the inputs should be aggregated.
-    metric_name: The name to use for this metric.
-  """
-
-  def __init__(self, aggregation=None, metric_name=None, **kwargs):
-    super(AddMetric, self).__init__(**kwargs)
-    self.aggregation = aggregation
-    self.metric_name = metric_name
-
-  def call(self, inputs):
-    self.add_metric(inputs, self.aggregation, self.metric_name)
-    return inputs
-
-  def get_config(self):
-    config = super(AddMetric, self).get_config()
-    config.update({
-        'aggregation': self.aggregation,
-        'metric_name': self.metric_name
-    })
-    return config
-
-
-class KerasHistory(
-    collections.namedtuple('KerasHistory',
-                           ['layer', 'node_index', 'tensor_index'])):
-  """Tracks the Layer call that created a Tensor, for Keras Graph Networks.
-
-  During construction of Keras Graph Networks, this metadata is added to
-  each Tensor produced as the output of a Layer, starting with an
-  `InputLayer`. This allows Keras to track how each Tensor was produced, and
-  this information is later retraced by the `keras.engine.Network` class to
-  reconstruct the Keras Graph Network.
-
-  Attributes:
-    layer: The Layer that produced the Tensor.
-    node_index: The specific call to the Layer that produced this Tensor. Layers
-      can be called multiple times in order to share weights. A new node is
-      created every time a Layer is called.
-    tensor_index: The output index for this Tensor. Always zero if the Layer
-      that produced this Tensor only has one output. Nested structures of
-      Tensors are deterministically assigned an index via `nest.flatten`.
-  """
-  # Added to maintain memory and performance characteristics of `namedtuple`
-  # while subclassing.
-  __slots__ = ()
diff --git a/tensorflow/python/frozen_keras/engine/legacy_base_layer_test.py b/tensorflow/python/frozen_keras/engine/legacy_base_layer_test.py
deleted file mode 100644
index 31883a5084e..00000000000
--- a/tensorflow/python/frozen_keras/engine/legacy_base_layer_test.py
+++ /dev/null
@@ -1,1274 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow 2.0 layer behavior."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import os
-import sys
-import traceback
-
-import numpy as np
-
-# TODO(scottzhu): Move to use all frozen_keras code when other deps are moved.
-from tensorflow.python import keras
-from tensorflow.python.eager import context
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import test_util
-from tensorflow.python.frozen_keras.engine import legacy_base_layer
-from tensorflow.python.frozen_keras.utils import tf_utils
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.optimizer_v2 import rmsprop
-from tensorflow.python.layers import core as legacy_core
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import summary_ops_v2
-from tensorflow.python.ops import tensor_array_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.summary import summary_iterator
-
-
-class DynamicLayer(legacy_base_layer.LegacyBaseLayer):
-
-  def __init__(self, dynamic=False, **kwargs):
-    super(DynamicLayer, self).__init__(dynamic=dynamic, **kwargs)
-
-  def call(self, inputs):
-    samples = tensor_array_ops.TensorArray(
-        dtype=dtypes.float32, size=array_ops.shape(inputs)[0])
-    for idx, sample in enumerate(inputs):
-      samples = samples.write(idx, math_ops.square(sample))
-    return samples.stack()
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-
-class InvalidLayer(legacy_base_layer.LegacyBaseLayer):
-
-  def call(self, inputs):
-    raise ValueError('You did something wrong!')
-
-
-class BaseLayerTest(keras_parameterized.TestCase):
-
-  @keras_parameterized.run_with_all_model_types
-  def test_dynamic_layer(self):
-    model = testing_utils.get_model_from_layers([DynamicLayer(dynamic=True)],
-                                                input_shape=(3,))
-    self.assertEqual(model.dynamic, True)
-    model.compile(rmsprop.RMSprop(0.001), loss='mse')
-    self.assertEqual(model.run_eagerly, True)
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-
-  @keras_parameterized.run_with_all_model_types
-  def test_dynamic_layer_error(self):
-    with self.assertRaisesRegexp(TypeError,
-                                 'attempting to use Python control flow'):
-      model = testing_utils.get_model_from_layers([DynamicLayer()],
-                                                  input_shape=(3,))
-      model.compile(rmsprop.RMSprop(0.001), loss='mse')
-      model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-
-  @keras_parameterized.run_with_all_model_types
-  def test_dynamic_layer_error_running_in_graph_mode(self):
-    with ops.get_default_graph().as_default():
-      model = testing_utils.get_model_from_layers([DynamicLayer(dynamic=True)],
-                                                  input_shape=(3,))
-      self.assertEqual(model.dynamic, True)
-      # But then you cannot run the model since you're in a graph scope.
-      with self.assertRaisesRegexp(
-          ValueError, 'You must enable eager execution'):
-        model.compile(rmsprop.RMSprop(0.001), loss='mse')
-
-  def test_manual_compute_output_shape(self):
-    class BuildCounter(keras.layers.Layer):
-
-      def __init__(self, *args, **kwargs):  # pylint: disable=redefined-outer-name
-        super(BuildCounter, self).__init__(*args, **kwargs)
-        self.build_counter = 0
-
-      def build(self, input_shape):
-        self.build_counter += 1
-
-      def call(self, inputs):
-        return inputs
-
-    with context.eager_mode():
-      layer = BuildCounter(dtype=dtypes.float64)
-      output_shape = layer.compute_output_shape((None, 10))
-      self.assertEqual(layer.build_counter, 1)
-      self.assertEqual(output_shape.as_list(), [None, 10])
-      output_signature = layer.compute_output_signature(
-          tensor_spec.TensorSpec(dtype=dtypes.float64, shape=[None, 10]))
-      self.assertEqual(layer.build_counter, 1)
-      self.assertEqual(output_signature.dtype, dtypes.float64)
-      self.assertEqual(output_signature.shape.as_list(), [None, 10])
-      layer(np.ones((5, 10)))
-      self.assertEqual(layer.build_counter, 1)
-
-  def test_eager_switch_case_input(self):
-    with context.eager_mode():
-      task = keras.Input(shape=(), dtype=dtypes.int32)
-      control_flow_ops.switch_case(
-          task[0], [lambda: constant_op.constant(1.0) for _ in range(10)])
-
-  # TODO(scottzhu): Reenable this once sequential is moved to frozen_keras.
-  def DISABLED_test_dynamic_layer_with_deferred_sequential_model(self):
-    model = keras.Sequential(
-        [DynamicLayer(dynamic=True),
-         keras.layers.Dense(3)])
-    self.assertEqual(model.dynamic, True)
-    model.compile(rmsprop.RMSprop(0.001), loss='mse')
-    self.assertEqual(model.run_eagerly, True)
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-
-  def test_nested_dynamic_layers_in_eager_mode(self):
-    inputs = keras.Input((3,))
-    outputs = DynamicLayer(dynamic=True)(inputs)  # pylint:disable=not-callable
-    inner_model = keras.Model(inputs, outputs)
-    self.assertEqual(inner_model.dynamic, True)
-
-    inputs = keras.Input((3,))
-    x = DynamicLayer(dynamic=True)(inputs)  # pylint:disable=not-callable
-    outputs = inner_model(x)
-
-    model = keras.Model(inputs, outputs)
-    self.assertEqual(model.dynamic, True)
-    model.compile(rmsprop.RMSprop(0.001), loss='mse')
-    self.assertEqual(model.run_eagerly, True)
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-
-  def test_dynamic_subclassed_model_no_shape_inference(self):
-
-    class MyModel(keras.Model):
-
-      def __init__(self):
-        super(MyModel, self).__init__(dynamic=True)
-        self.layer1 = keras.layers.Dense(3)
-        self.layer2 = keras.layers.Dense(3)
-
-      def call(self, inputs):
-        if math_ops.reduce_sum(inputs) > 0:
-          return self.layer1(inputs)
-        else:
-          return self.layer2(inputs)
-
-    model = MyModel()
-    self.assertEqual(model.dynamic, True)
-    model.compile(rmsprop.RMSprop(0.001), loss='mse')
-    self.assertEqual(model.run_eagerly, True)
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-    self.assertEqual(model.outputs, None)
-
-  def test_dynamic_subclassed_model_with_shape_inference(self):
-
-    class MyModel(keras.Model):
-
-      def __init__(self):
-        super(MyModel, self).__init__(dynamic=True)
-        self.layer1 = keras.layers.Dense(3)
-        self.layer2 = keras.layers.Dense(3)
-
-      def call(self, inputs):
-        if math_ops.reduce_sum(inputs) > 0:
-          return self.layer1(inputs)
-        else:
-          return self.layer2(inputs)
-
-      def compute_output_shape(self, input_shape):
-        return tuple(input_shape[:-1].as_list()) + (3,)
-
-    model = MyModel()
-    self.assertEqual(model.dynamic, True)
-    model.compile(rmsprop.RMSprop(0.001), loss='mse')
-    x, y = np.random.random((2, 3)), np.random.random((2, 3))
-    model.train_on_batch(x, y)
-    outputs = model(x)
-    self.assertEqual(outputs.shape.as_list(), [2, 3])
-
-  def test_deepcopy(self):
-    with context.eager_mode():
-      bias_reg = lambda x: 1e-3 * math_ops.reduce_sum(x)
-      layer = keras.layers.Conv2D(32, (3, 3), bias_regularizer=bias_reg)
-      # Call the Layer on data to generate regularize losses.
-      layer(array_ops.ones((1, 10, 10, 3)))
-      self.assertLen(layer.losses, 1)
-      new_layer = copy.deepcopy(layer)
-      self.assertEqual(new_layer.bias_regularizer, bias_reg)
-      self.assertEqual(layer.get_config(), new_layer.get_config())
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_invalid_forward_pass(self):
-    inputs = keras.Input((3,))
-    with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
-      _ = InvalidLayer()(inputs)  # pylint:disable=not-callable
-
-  def test_no_legacy_model(self):
-    inputs = keras.Input((1,))
-    legacy_dense_0 = legacy_core.Dense(1, name='legacy_dense_0')
-    legacy_dense_1 = legacy_core.Dense(1, name='legacy_dense_1')
-
-    layer = legacy_dense_0(inputs)
-    layer = keras.layers.Dense(1)(layer)
-    layer = legacy_dense_1(layer)
-
-    expected_regex = (r'The following are legacy tf\.layers\.Layers:\n  '
-                      '{}\n  {}'.format(legacy_dense_0, legacy_dense_1))
-
-    with self.assertRaisesRegexp(TypeError, expected_regex):
-      _ = keras.models.Model(inputs=[inputs], outputs=[layer])
-
-    model = keras.models.Model(inputs=[inputs], outputs=[inputs])
-    with self.assertRaisesRegexp(TypeError, expected_regex):
-      model._insert_layers([legacy_dense_0, legacy_dense_1])
-
-  def test_no_legacy_sequential(self):
-    layers = [
-        keras.layers.Dense(1),
-        legacy_core.Dense(1, name='legacy_dense_0')
-    ]
-
-    expected_regex = r'legacy tf\.layers\.Layers:\n  {}'.format(layers[1])
-    with self.assertRaisesRegexp(TypeError, expected_regex):
-      _ = keras.models.Sequential(layers)
-
-    with self.assertRaisesRegexp(TypeError, expected_regex):
-      _ = keras.models.Sequential([keras.layers.Input(shape=(4,))] + layers)
-
-    model = keras.models.Sequential()
-    with self.assertRaisesRegexp(TypeError, expected_regex):
-      for l in layers:
-        model.add(l)
-
-  @keras_parameterized.run_with_all_model_types
-  @test_util.run_in_graph_and_eager_modes
-  def test_build_with_numpy_data(self):
-    model_layers = [
-        keras.layers.Dense(3, activation='relu', kernel_initializer='ones'),
-        keras.layers.Dense(1, activation='sigmoid', kernel_initializer='ones')
-    ]
-    model = testing_utils.get_model_from_layers(model_layers, input_shape=(4,))
-    model(np.zeros((2, 4), dtype='float32'))
-    self.assertTrue(model.built)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_default_add_weight(self):
-
-    class TestLayer(keras.layers.Layer):
-
-      def __init__(self):
-        super(TestLayer, self).__init__()
-        self.default_weight = self.add_weight()
-        self.weight_without_name = self.add_weight(shape=(3, 4))
-        self.regularized_weight_without_name = self.add_weight(
-            shape=(3, 4), regularizer='l2')
-
-    layer = TestLayer()
-    self.assertEqual(layer.default_weight.shape.as_list(), [])
-    self.assertEqual(layer.weight_without_name.shape.as_list(), [3, 4])
-    self.assertEqual(layer.default_weight.dtype.name, 'float32')
-    self.assertEqual(layer.weight_without_name.dtype.name, 'float32')
-    self.assertEqual(len(layer.losses), 1)
-    if not context.executing_eagerly():
-      # Cannot access tensor.name in eager execution.
-      self.assertTrue('Variable_2/Regularizer' in layer.losses[0].name)
-
-  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-  def test_learning_phase_freezing_for_layers(self):
-    class LearningPhaseLayer(keras.layers.Layer):
-
-      def call(self, inputs):
-        return keras.backend.in_train_phase(
-            lambda: array_ops.ones_like(inputs),
-            lambda: array_ops.zeros_like(inputs))
-
-    def get_learning_phase_value():
-      model = keras.models.Sequential([LearningPhaseLayer(input_shape=(1,))])
-      model._run_eagerly = testing_utils.should_run_eagerly()
-      return np.sum(model(np.ones((1, 1))))
-
-    self.assertEqual(get_learning_phase_value(), 0)
-
-    # Test scope.
-    with keras.backend.learning_phase_scope(1):
-      self.assertEqual(get_learning_phase_value(), 1)
-
-    # The effects of the scope end after exiting it.
-    self.assertEqual(get_learning_phase_value(), 0)
-
-    # Test setting.
-    keras.backend.set_learning_phase(1)
-    self.assertEqual(get_learning_phase_value(), 1)
-    keras.backend.set_learning_phase(0)
-    self.assertEqual(get_learning_phase_value(), 0)
-
-  # Cannot be enabled with `run_eagerly=True`, see b/123904578
-  @test_util.run_all_in_graph_and_eager_modes
-  def test_layer_can_return_variable(self):
-
-    class ComputeSum(keras.layers.Layer):
-
-      def __init__(self):
-        super(ComputeSum, self).__init__()
-        self.total = variables.Variable(
-            initial_value=array_ops.zeros((1, 1)), trainable=False)
-        if not context.executing_eagerly():
-          keras.backend.get_session().run(self.total.initializer)
-
-      def call(self, inputs):
-        self.total.assign_add(inputs)
-        return self.total
-
-    inputs = keras.Input(shape=(1,))
-    model = keras.Model(inputs, ComputeSum()(inputs))
-    model.predict(np.ones((1, 1)))
-
-  def _get_layer_with_training_arg(self):
-
-    class TrainingLayer(keras.layers.Layer):
-      """A layer with a `training` argument in a defuned `call`."""
-
-      @def_function.function
-      def call(self, inputs, training=None):
-        if training is None:
-          training = keras.backend.learning_phase()
-        return tf_utils.smart_cond(training,
-                                   lambda: array_ops.ones_like(inputs),
-                                   lambda: array_ops.zeros_like(inputs))
-
-    return TrainingLayer()
-
-  @keras_parameterized.run_with_all_model_types
-  # b/124459427: can't test with `run_eagerly=True` for now.
-  @test_util.run_in_graph_and_eager_modes
-  def test_training_arg_in_defun(self):
-    layer = self._get_layer_with_training_arg()
-    model = testing_utils.get_model_from_layers([layer], input_shape=(1,))
-    model.compile(rmsprop.RMSprop(0.),
-                  loss='mae')
-    history = model.fit(np.zeros((1, 1)), np.zeros((1, 1)))
-    self.assertEqual(history.history['loss'][0], 1.)
-    loss = model.evaluate(np.zeros((1, 1)), np.zeros((1, 1)))
-    self.assertEqual(loss, 0.)
-
-    # Test that the argument injection performed in `call` is not active
-    # when the argument is passed explicitly.
-    layer = self._get_layer_with_training_arg()
-    inputs = keras.Input(shape=(1,))
-    # Pass `training` by name
-    outputs = layer(inputs, training=False)
-    model = keras.Model(inputs, outputs)
-    model.compile(rmsprop.RMSprop(0.),
-                  loss='mae')
-    history = model.fit(np.zeros((1, 1)), np.zeros((1, 1)))
-    self.assertEqual(history.history['loss'][0], 0.)
-
-  @keras_parameterized.run_with_all_model_types
-  @keras_parameterized.run_all_keras_modes
-  def test_raw_variable_assignment(self):
-
-    class RawVariableLayer(keras.layers.Layer):
-
-      def __init__(self, **kwargs):
-        super(RawVariableLayer, self).__init__(**kwargs)
-        # Test variables in nested structure.
-        self.var_list = [variables.Variable(1.), {'a': variables.Variable(2.)}]
-
-      def call(self, inputs):
-        return inputs * self.var_list[0] * self.var_list[1]['a']
-
-    model = testing_utils.get_model_from_layers([RawVariableLayer()],
-                                                input_shape=(10,))
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly())
-    x, y = np.ones((10, 10)), np.ones((10, 10))
-    # Checks that variables get initialized.
-    model.fit(x, y, batch_size=2, epochs=2)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_layer_names(self):
-    inputs = keras.layers.Input(shape=[2])
-    add1 = inputs + inputs
-    add2 = keras.layers.Add()([inputs, inputs])
-    add3 = inputs + inputs
-    add4 = keras.layers.Add()([inputs, inputs])
-    model = keras.models.Model(
-        inputs=[inputs], outputs=[add1, add2, add3, add4])
-    actual_names = [l.name for l in model.layers]
-    graph_names = [
-        'input_1', 'tf_op_layer_AddV2', 'add', 'tf_op_layer_AddV2_1', 'add_1'
-    ]
-    eager_names = [
-        'input_1', 'tf_op_layer_add', 'add', 'tf_op_layer_add_2', 'add_1'
-    ]
-    for actual, eager, graph in zip(actual_names, graph_names, eager_names):
-      self.assertIn(actual, {eager, graph})
-
-  def test_add_trainable_weight_on_frozen_layer(self):
-
-    class TestLayer(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.w = self.add_weight(shape=(), trainable=True)
-
-      def call(self, inputs):
-        return self.w * inputs
-
-    layer = TestLayer()
-    layer.trainable = False
-    layer.build(None)
-    layer.trainable = True
-    self.assertListEqual(layer.trainable_weights, [layer.w])
-
-  @keras_parameterized.run_with_all_model_types
-  @keras_parameterized.run_all_keras_modes
-  def test_passing_initial_weights_values(self):
-    kernel_value = np.random.random((10, 2))
-    layer_with_weights = keras.layers.Dense(
-        2, use_bias=False, weights=[kernel_value])
-
-    model = testing_utils.get_model_from_layers([layer_with_weights],
-                                                input_shape=(10,))
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly())
-    inputs = np.random.random((3, 10))
-    out = model.predict(inputs)
-    self.assertAllClose(model.layers[-1].get_weights()[0], kernel_value)
-    self.assertAllClose(out, np.dot(inputs, kernel_value))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_set_weights_and_get_weights(self):
-    layer = keras.layers.Dense(2)
-    layer.build((None, 10))
-    kernel = np.random.random((10, 2))
-    bias = np.random.random((2,))
-    layer.set_weights([kernel, bias])
-    weights = layer.get_weights()
-    self.assertEqual(len(weights), 2)
-    self.assertAllClose(weights[0], kernel)
-    self.assertAllClose(weights[1], bias)
-    with self.assertRaisesRegexp(
-        ValueError, 'but the layer was expecting 2 weights'):
-      layer.set_weights([1, 2, 3])
-    with self.assertRaisesRegexp(
-        ValueError, 'not compatible with provided weight shape'):
-      layer.set_weights([kernel.T, bias])
-
-  def test_get_config_error(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def __init__(self, my_kwarg='default', **kwargs):
-        super(MyLayer, self).__init__(**kwargs)
-        self.my_kwarg = my_kwarg
-
-    # `__init__` includes kwargs but `get_config` is not overridden, so
-    # an error should be thrown:
-    with self.assertRaisesRegexp(NotImplementedError, 'Layer MyLayer has'):
-      MyLayer('custom').get_config()
-
-    class MyLayerNew(keras.layers.Layer):
-
-      def __init__(self, my_kwarg='default', **kwargs):
-        super(MyLayerNew, self).__init__(**kwargs)
-        self.my_kwarg = my_kwarg
-
-      def get_config(self):
-        config = super(MyLayerNew, self).get_config()
-        config['my_kwarg'] = self.my_kwarg
-        return config
-
-    # Test to make sure that error is not raised if the method call is
-    # from an overridden `get_config`:
-    self.assertEqual(MyLayerNew('custom').get_config()['my_kwarg'], 'custom')
-
-    class MyLayerNew2(keras.layers.Layer):
-
-      def __init__(self, name='MyLayerName', dtype=None, **kwargs):  # pylint:disable=redefined-outer-name
-        super(MyLayerNew2, self).__init__(name=name, dtype=dtype, **kwargs)
-
-    # Check that if the kwargs in `__init__` are base layer constructor
-    # arguments, no error is thrown:
-    self.assertEqual(MyLayerNew2(name='New').get_config()['name'], 'New')
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_count_params(self):
-    dense = keras.layers.Dense(16)
-    dense.build((None, 4))
-    self.assertEqual(dense.count_params(), 16 * 4 + 16)
-
-    dense = keras.layers.Dense(16)
-    with self.assertRaisesRegexp(ValueError, 'call `count_params`'):
-      dense.count_params()
-
-    model = keras.Sequential(keras.layers.Dense(16))
-    with self.assertRaisesRegexp(ValueError, 'call `count_params`'):
-      model.count_params()
-
-    dense = keras.layers.Dense(16, input_dim=4)
-    model = keras.Sequential(dense)
-    self.assertEqual(model.count_params(), 16 * 4 + 16)
-
-  def test_super_not_called(self):
-
-    class CustomLayerNotCallingSuper(keras.layers.Layer):
-
-      def __init__(self):
-        pass
-
-    layer = CustomLayerNotCallingSuper()
-    with self.assertRaisesRegexp(RuntimeError, 'You must call `super()'):
-      layer(np.random.random((10, 2)))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_first_arg_not_called_inputs(self):
-    x, y = array_ops.ones((10, 1)), array_ops.ones((10, 1))
-
-    class ArgLayer(keras.layers.Layer):
-
-      def call(self, x, y):
-        return x + y
-
-    layer = ArgLayer()
-    out = self.evaluate(layer(x=x, y=y))
-    self.assertAllClose(out, 2 * np.ones((10, 1)))
-
-    class KwargLayer(keras.layers.Layer):
-
-      def call(self, x=None, y=None):
-        return x + y
-
-    layer = KwargLayer()
-    out = self.evaluate(layer(x=x, y=y))
-    self.assertAllClose(out, 2 * np.ones((10, 1)))
-
-    with self.assertRaisesRegexp(ValueError, 'must always be passed'):
-      layer(y=y)
-
-    class TFFunctionLayer(keras.layers.Layer):
-
-      @def_function.function
-      def call(self, x, y=None):
-        if y is None:
-          return x
-        return x + y
-
-    layer = TFFunctionLayer()
-    out = self.evaluate(layer(x=x, y=y))
-    self.assertAllClose(out, 2 * np.ones((10, 1)))
-
-  def test_build_input_shape(self):
-    class CustomLayer(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.add_weight('w', shape=input_shape[1:])
-        super(CustomLayer, self).build(input_shape)
-
-    layer = CustomLayer()
-    self.assertFalse(layer.built)
-
-    layer.build([None, 1, 2, 3])
-    self.assertTrue(layer.built)
-    self.assertEqual([None, 1, 2, 3], layer._build_input_shape)
-
-    layer = CustomLayer()
-    layer(keras.Input((3,)))
-    self.assertTrue(layer.built)
-    self.assertEqual([None, 3], layer._build_input_shape.as_list())
-
-
-class SymbolicSupportTest(test.TestCase):
-
-  def test_using_symbolic_tensors_with_tf_ops(self):
-    # Single-input.
-    x = keras.Input((3,))
-    y = math_ops.square(x)
-    self.assertEqual(y.graph, keras.backend.get_graph())
-
-    # Multi-inputs.
-    x1, x2 = keras.Input((3,)), keras.Input((3,))
-    y = array_ops.concat([x1, x2], axis=1)
-    self.assertEqual(y.graph, keras.backend.get_graph())
-
-    # Mixing Keras symbolic tensors and graph tensors from the same graph works.
-    with keras.backend.get_graph().as_default():
-      x1 = keras.Input((3,))
-    x2 = keras.Input((3,))
-    y = math_ops.matmul(x1, x2)
-    self.assertEqual(y.graph, keras.backend.get_graph())
-
-    # Creating same op type (matmul) multiple times in the Keras graph works.
-    x1 = keras.Input((3,))
-    x2 = keras.Input((3,))
-    y = math_ops.matmul(x1, x2)
-    self.assertEqual(y.graph, keras.backend.get_graph())
-
-  def test_mixing_eager_and_graph_tensors(self):
-    with ops.Graph().as_default():
-      x1 = array_ops.ones((3, 3))
-    x2 = array_ops.ones((3, 3))
-    self.assertIsInstance(x2, ops.EagerTensor)
-    with self.assertRaisesRegexp(TypeError, 'Graph tensors'):
-      math_ops.matmul(x1, x2)
-
-  def test_mixing_numpy_arrays_and_graph_tensors(self):
-    with ops.Graph().as_default():
-      x1 = array_ops.ones((3, 3))
-    x2 = np.ones((3, 3), dtype='float32')
-    with self.assertRaisesRegexp(TypeError, 'Graph tensors'):
-      math_ops.matmul(x1, x2)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_mixing_keras_symbolic_tensors_and_eager_tensors(self):
-    x1 = keras.Input((3,))
-    x2 = array_ops.ones((3, 3))
-    y = math_ops.matmul(x1, x2)
-    self.assertEqual(y.graph, keras.backend.get_graph())
-    fn = keras.backend.function(inputs=[x1], outputs=[y])
-    x_val = np.random.random((3, 3))
-    y_val = np.ones((3, 3))
-    self.assertAllClose(fn([x_val])[0],
-                        np.matmul(x_val, y_val),
-                        atol=1e-5)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self):
-    x1 = keras.Input((3,))
-    x2 = np.ones((3, 3), dtype='float32')
-    y = math_ops.matmul(x1, x2)
-    self.assertEqual(y.graph, keras.backend.get_graph())
-    fn = keras.backend.function(inputs=[x1], outputs=[y])
-    x_val = np.random.random((3, 3))
-    y_val = np.ones((3, 3))
-    self.assertAllClose(fn([x_val])[0],
-                        np.matmul(x_val, y_val),
-                        atol=1e-5)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_reraising_exception(self):
-    # When layer is not dynamic, we have some pattern matching during exception
-    # handling to detect when the user is trying to use python control flow.
-    # When an exception is thrown but the pattern doesn't match, we want to
-    # preserve the originating stack trace. An early implementation of this
-    # logic lost the stack trace. We test the correct behavior here.
-
-    class TypeErrorLayer(legacy_base_layer.LegacyBaseLayer):
-
-      def call(self, inputs):
-        def easily_identifiable_name():
-          raise TypeError('Non-matching TypeError message.')
-        easily_identifiable_name()
-
-    inputs = keras.Input((3,))
-
-    try:
-      _ = TypeErrorLayer()(inputs)  # pylint:disable=not-callable
-    except TypeError as e:
-      if hasattr(e, 'ag_error_metadata'):
-        self.assertIn('easily_identifiable_name', str(e))
-        # See ErrorMetadataBase in autograph/pyct/errors.py
-        function_name = e.ag_error_metadata.translated_stack[-1].function_name
-      else:
-        tb = traceback.extract_tb(sys.exc_info()[2])
-        last_entry = tb[-1]
-        function_name = last_entry[2]
-      self.assertEqual(function_name, 'easily_identifiable_name')
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_summaries_in_tf_function(self):
-    if not context.executing_eagerly():
-      return
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs):
-        summary_ops_v2.scalar('mean', math_ops.reduce_mean(inputs))
-        return inputs
-
-    tmp_dir = self.get_temp_dir()
-    writer = summary_ops_v2.create_file_writer_v2(tmp_dir)
-    with writer.as_default(), summary_ops_v2.always_record_summaries():
-      my_layer = MyLayer()
-      x = array_ops.ones((10, 10))
-
-      def my_fn(x):
-        return my_layer(x)
-
-      _ = my_fn(x)
-
-    event_file = gfile.Glob(os.path.join(tmp_dir, 'events*'))
-    self.assertLen(event_file, 1)
-    event_file = event_file[0]
-    tags = set()
-    for e in summary_iterator.summary_iterator(event_file):
-      for val in e.summary.value:
-        tags.add(val.tag)
-    self.assertEqual(set(['my_layer/mean']), tags)
-
-
-@test_util.run_all_in_graph_and_eager_modes
-class NestedTrackingTest(test.TestCase):
-
-  def test_nested_layer_variable_tracking(self):
-    # Test that variables from nested sublayers are
-    # being tracked by subclassed layers.
-
-    class MyLayer(keras.layers.Layer):
-
-      def __init__(self):
-        super(MyLayer, self).__init__()
-        self.dense1 = keras.layers.Dense(1)
-        self.dense2 = keras.layers.BatchNormalization()
-
-      def build(self, input_shape):
-        self.v1 = self.add_weight('v1', shape=input_shape[1:].as_list())
-        self.v2 = variables.Variable(
-            name='v2',
-            initial_value=np.zeros(input_shape[1:].as_list(), dtype='float32'),
-            trainable=False)
-
-      def call(self, inputs):
-        x = self.dense1(inputs) + self.dense2(inputs)
-        return x + self.v1 + self.v2
-
-    layer = MyLayer()
-    inputs = keras.Input((1,))
-    _ = layer(inputs)
-
-    self.assertEqual(len(layer.weights), 8)
-    self.assertEqual(len(layer.trainable_weights), 5)
-    self.assertEqual(len(layer.non_trainable_weights), 3)
-
-    layer.dense1.trainable = False
-    self.assertEqual(len(layer.weights), 8)
-    self.assertEqual(len(layer.trainable_weights), 3)
-    self.assertEqual(len(layer.non_trainable_weights), 5)
-
-    layer.trainable = False
-    self.assertEqual(len(layer.weights), 8)
-    self.assertEqual(len(layer.trainable_weights), 0)
-    self.assertEqual(len(layer.non_trainable_weights), 8)
-    self.assertEqual(
-        {id(v) for v in [layer.dense1, layer.dense2, layer.v1, layer.v2]},
-        {id(v) for _, v in layer._checkpoint_dependencies})
-
-  def test_nested_layer_updates_losses_tracking(self):
-    # Test that updates and losses from nested sublayers are
-    # being tracked by subclassed layers.
-
-    class UpdateAndLossLayer(keras.layers.Layer):
-
-      def build(self, _):
-        self.v1 = self.add_weight('v1', shape=())
-
-      def call(self, inputs):
-        self.add_loss(math_ops.reduce_sum(inputs))
-        self.add_update(state_ops.assign_add(self.v1, 1))
-        return inputs + 1
-
-    class MyLayer(keras.layers.Layer):
-
-      def build(self, _):
-        self.v1 = self.add_weight('v1', shape=())
-
-      def __init__(self):
-        super(MyLayer, self).__init__()
-        self.ul1 = UpdateAndLossLayer()
-        self.ul2 = UpdateAndLossLayer()
-
-      def call(self, inputs):
-        self.add_loss(math_ops.reduce_sum(inputs))
-        self.add_update(state_ops.assign_add(self.v1, 1))
-        x = self.ul1(inputs)
-        return self.ul2(x)
-
-    layer = MyLayer()
-
-    if context.executing_eagerly():
-      inputs = array_ops.ones((3, 1))
-      _ = layer(inputs)
-      self.assertEqual(len(layer.losses), 3)
-      self.assertLen(layer.get_losses_for(None), 3)
-    else:
-      inputs = keras.Input((1,))
-      _ = layer(inputs)
-      self.assertEqual(len(layer.losses), 3)
-      self.assertEqual(len(layer.updates), 3)
-      self.assertLen(layer.get_losses_for(None), 3)
-
-  def test_attribute_reassignment(self):
-    l = keras.layers.Layer()
-    l.a = keras.layers.Layer()
-    l.a = []
-    l.a = variables.Variable(1.)
-    l.a = keras.layers.Layer()
-    last_assignment = keras.layers.Layer()
-    l.a = last_assignment
-    l.b = variables.Variable(1.)
-    del l.b
-    l.c = keras.layers.Layer()
-    del l.c
-    l.d = last_assignment
-    del l.d
-    self.assertEqual([last_assignment], l._layers)
-    self.assertEqual([], l.trainable_weights)
-    self.assertEqual([], l.non_trainable_weights)
-    self.assertEqual([], l.weights)
-    del l.a
-    self.assertEqual([], l._layers)
-
-  def test_assign_op_not_tracked_as_variable(self):
-
-    class LayerWithAssignAttr(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.v = variables.Variable(1.)
-        self.v_assign = self.v.assign_add(2.)
-
-    layer = LayerWithAssignAttr()
-    layer.build((10, 10))
-
-    self.assertEqual([layer.v], layer.variables)
-
-  def test_layer_class_not_tracked_as_sublayer(self):
-    # See https://github.com/tensorflow/tensorflow/issues/27431 for details.
-
-    class LayerWithClassAttribute(keras.layers.Layer):
-
-      def __init__(self):
-        super(LayerWithClassAttribute, self).__init__()
-        self.layer_fn = keras.layers.Dense
-
-    layer = LayerWithClassAttribute()
-    self.assertEmpty(layer.variables)
-    self.assertEmpty(layer.submodules)
-
-  def test_layer_call_fn_args(self):
-
-    class NonDefunLayer(keras.layers.Layer):
-
-      def call(self, inputs, a, mask, b=None, training=None):
-        return inputs
-
-    class DefunLayer(keras.layers.Layer):
-
-      @def_function.function
-      def call(self, x, mask, a, training=None, b=None):
-        return x
-
-    nondefun_layer = NonDefunLayer()
-    self.assertEqual(nondefun_layer._call_fn_args,
-                     ['inputs', 'a', 'mask', 'b', 'training'])
-    defun_layer = DefunLayer()
-    self.assertEqual(defun_layer._call_fn_args,
-                     ['x', 'mask', 'a', 'training', 'b'])
-
-  def test_sequential_model(self):
-    model = keras.Sequential([keras.layers.Dense(10, input_shape=(10,)),
-                              keras.layers.Dense(5)])
-    self.assertLen(model.layers, 2)
-    self.assertLen(model.weights, 4)
-
-    # Make sure a subclass model also works when it is called 'Sequential'.
-    class Sequential(keras.Model):
-
-      def __init__(self):
-        super(Sequential, self).__init__()
-        self.dense_layers = [keras.layers.Dense(10),
-                             keras.layers.Dense(5)]
-
-      def call(self, inputs):
-        x = inputs
-        for d in self.dense_layers:
-          x = d(x)
-        return x
-
-    s = Sequential()
-    self.assertLen(s.layers, 2)
-    self.assertLen(s.weights, 0)
-
-    s(keras.Input((10,)))
-    self.assertLen(s.weights, 4)
-
-
-@test_util.run_all_in_graph_and_eager_modes
-class NameScopingTest(keras_parameterized.TestCase):
-
-  def test_name_scope_layer(self):
-    x = keras.backend.placeholder(shape=(10, 10))
-    layer = keras.layers.Dense(10, name='MyName')
-    layer(x)
-    self.assertEqual(layer.bias.name, 'MyName/bias:0')
-    self.assertEqual(layer.kernel.name, 'MyName/kernel:0')
-
-  def test_name_scope_sublayer(self):
-
-    class NameScopeTracker(keras.layers.Layer):
-
-      def call(self, inputs):
-        self.active_name_scope = ops.get_name_scope()
-        return inputs
-
-    x = keras.backend.placeholder(shape=(10, 10))
-    sublayer = NameScopeTracker(name='Sublayer')
-    layer = keras.layers.Dense(10, activation=sublayer, name='MyName2')
-    layer(x)
-    self.assertEqual(layer.bias.name, 'MyName2/bias:0')
-    self.assertEqual(layer.kernel.name, 'MyName2/kernel:0')
-    self.assertEqual(sublayer.active_name_scope, 'MyName2/Sublayer')
-
-  def test_name_scope_tf_tensor(self):
-    x = ops.convert_to_tensor_v2(np.ones((10, 10)))
-    layer = keras.layers.Dense(
-        10, activation=keras.layers.ReLU(name='MyAct'), name='MyName3')
-    layer(x)
-    self.assertEqual(layer.bias.name, 'MyName3/bias:0')
-    self.assertEqual(layer.kernel.name, 'MyName3/kernel:0')
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class AutographControlFlowTest(keras_parameterized.TestCase):
-
-  def test_disabling_in_context_is_matched(self):
-
-    test_obj = self
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs, training=None):
-        with test_obj.assertRaisesRegex(TypeError, 'Tensor.*as.*bool'):
-          if constant_op.constant(False):
-            return inputs * 1.
-        return inputs * 0.
-
-    @def_function.function(autograph=False)
-    def test_fn():
-      return MyLayer()(constant_op.constant([[1., 2., 3.]]))
-
-    test_fn()
-
-  def test_if_training_pattern_output(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs, training=None):
-        if training:
-          return inputs * 1.
-        return inputs * 0.
-
-    inputs = keras.Input((3,))
-    outputs = MyLayer()(inputs)
-    model = keras.Model(inputs, outputs)
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly())
-    train_loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(train_loss, 0.)
-    test_loss = model.test_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(test_loss, 1.)
-
-  def test_if_training_pattern_loss(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs, training=None):
-        if training:
-          loss = math_ops.reduce_sum(inputs)
-        else:
-          loss = 0.
-        self.add_loss(loss)
-        return inputs
-
-    inputs = keras.Input((3,))
-    outputs = MyLayer()(inputs)
-    model = keras.Model(inputs, outputs)
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly())
-    train_loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(train_loss, 2 * 3)
-    test_loss = model.test_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(test_loss, 0)
-
-  def test_if_training_pattern_metric(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def call(self, inputs, training=None):
-        if training:
-          metric = math_ops.reduce_sum(inputs)
-        else:
-          metric = 0.
-        self.add_metric(metric, name='my_metric', aggregation='mean')
-        return inputs
-
-    inputs = keras.Input((3,))
-    outputs = MyLayer()(inputs)
-    model = keras.Model(inputs, outputs)
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly())
-    for _ in range(3):
-      _, train_metric = model.train_on_batch(np.ones((2, 3)),
-                                             np.ones((2, 3)))
-
-      self.assertEqual(train_metric, 2 * 3)
-      _, test_metric = model.test_on_batch(np.ones((2, 3)),
-                                           np.ones((2, 3)))
-      self.assertEqual(test_metric, 0)
-
-  def test_if_training_pattern_update(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def build(self, input_shape):
-        self.counter = self.add_weight(
-            shape=(), trainable=False, initializer='zeros')
-
-      def call(self, inputs, training=None):
-        if training:
-          increment = 1.
-        else:
-          increment = 0.
-        self.counter.assign_add(increment)
-        return inputs
-
-    inputs = keras.Input((3,))
-    layer = MyLayer()
-    outputs = layer(inputs)
-    model = keras.Model(inputs, outputs)
-    model.compile(
-        'sgd',
-        'mse',
-        run_eagerly=testing_utils.should_run_eagerly())
-    model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-    self.assertEqual(keras.backend.get_value(layer.counter), 1.)
-
-  def test_conditional_updates_in_call(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def __init__(self):
-        super(MyLayer,
-              self).__init__(dynamic=testing_utils.should_run_eagerly())
-
-      def build(self, input_shape):
-        self.counter = self.add_weight(
-            shape=(), trainable=False, initializer='zeros')
-
-      def call(self, inputs, training=None):
-        if training:
-          z = math_ops.reduce_sum(inputs)
-          self.add_update(lambda: self.counter.assign_add(z))
-        return inputs
-
-      def compute_output_shape(self, input_shape):
-        return input_shape
-
-    if testing_utils.should_run_eagerly():
-      inputs = keras.Input((3,))
-      layer = MyLayer()
-      outputs = layer(inputs)
-      model = keras.Model(inputs, outputs)
-      model.compile(
-          'sgd',
-          'mse',
-          run_eagerly=testing_utils.should_run_eagerly())
-      model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-      self.assertEqual(keras.backend.get_value(layer.counter), 6.)
-    else:
-      # TODO(fchollet): support the same workflow in graph mode.
-      with self.assertRaisesRegexp(RuntimeError,
-                                   '`add_update` in a control flow branch'):
-        layer = MyLayer()
-        layer(keras.Input((3,)))
-        _ = layer.updates
-
-  def test_conditional_losses_in_call(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def __init__(self):
-        super(MyLayer,
-              self).__init__(dynamic=testing_utils.should_run_eagerly())
-
-      def call(self, inputs, training=None):
-        if training:
-          self.add_loss(math_ops.reduce_sum(inputs))
-        return inputs
-
-      def compute_output_shape(self, input_shape):
-        return input_shape
-
-    if testing_utils.should_run_eagerly():
-      inputs = keras.Input((3,))
-      layer = MyLayer()
-      outputs = layer(inputs)
-      model = keras.Model(inputs, outputs)
-      model.compile(
-          'sgd',
-          'mse',
-          run_eagerly=testing_utils.should_run_eagerly())
-      loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3)))
-      self.assertEqual(loss, 2 * 3)
-    else:
-      with self.assertRaisesRegexp(RuntimeError,
-                                   '`add_loss` in a control flow branch'):
-        layer = MyLayer()(keras.Input((3,)))
-
-  def test_conditional_callable_losses(self):
-    model = keras.Sequential([
-        keras.layers.Dense(
-            1, kernel_regularizer=keras.regularizers.l2(1e-4), input_shape=(1,))
-    ])
-    model._run_eagerly = testing_utils.should_run_eagerly()
-
-    def assert_graph(t):
-      if not context.executing_eagerly():
-        self.assertEqual(t.graph, ops.get_default_graph())
-
-    @def_function.function
-    def get_losses(t):
-      if t < 0:
-        return math_ops.reduce_sum(model.losses) * t
-      else:
-        return math_ops.reduce_sum(model.losses)
-
-    assert_graph(get_losses(constant_op.constant(2.)))
-    assert_graph(get_losses(constant_op.constant(0.5)))
-
-  def test_conditional_metrics_in_call(self):
-
-    class MyLayer(keras.layers.Layer):
-
-      def __init__(self):
-        super(MyLayer,
-              self).__init__(dynamic=testing_utils.should_run_eagerly())
-
-      def call(self, inputs, training=None):
-        if training:
-          self.add_metric(math_ops.reduce_sum(inputs),
-                          name='sum',
-                          aggregation='mean')
-        return inputs
-
-      def compute_output_shape(self, input_shape):
-        return input_shape
-
-    if testing_utils.should_run_eagerly():
-      inputs = keras.Input((3,))
-      layer = MyLayer()
-      outputs = layer(inputs)
-      model = keras.Model(inputs, outputs)
-      model.compile(
-          'sgd',
-          'mse',
-          run_eagerly=testing_utils.should_run_eagerly())
-      history = model.fit(np.ones((2, 3)), np.ones((2, 3)))
-      self.assertEqual(history.history['sum'][-1], 2 * 3)
-    else:
-      # TODO(fchollet): support the same workflow in graph mode.
-      with self.assertRaisesRegexp(RuntimeError,
-                                   '`add_metric` in a control flow branch'):
-        layer = MyLayer()(keras.Input((3,)))
-
-  def test_conditional_activity_regularizer_in_call(self):
-
-    class TestModel(keras.Model):
-
-      def __init__(self):
-        super(TestModel, self).__init__(
-            name='test_model', dynamic=testing_utils.should_run_eagerly())
-        self.layer = keras.layers.Dense(2, activity_regularizer='l2')
-
-      def call(self, x, training=None):
-        if math_ops.greater(math_ops.reduce_sum(x), 0.0):
-          return self.layer(x)
-        else:
-          return self.layer(x)
-
-    model = TestModel()
-    model.compile(
-        loss='mse',
-        optimizer='sgd',
-        run_eagerly=testing_utils.should_run_eagerly())
-
-    x = np.ones(shape=(10, 1))
-    y = np.ones(shape=(10, 2))
-
-    if testing_utils.should_run_eagerly():
-      model.fit(x, y, epochs=2, batch_size=5)
-    else:
-      with self.assertRaisesRegexp(
-          RuntimeError, '`activity_regularizer` in a control flow branch'):
-        model.fit(x, y, epochs=2, batch_size=5)
-
-  def test_conditional_activity_regularizer_with_wrappers_in_call(self):
-
-    class TestModel(keras.Model):
-
-      def __init__(self):
-        super(TestModel, self).__init__(
-            name='test_model', dynamic=testing_utils.should_run_eagerly())
-        self.layer = keras.layers.TimeDistributed(
-            keras.layers.Dense(2, activity_regularizer='l2'),
-            input_shape=(3, 4))
-
-      def call(self, x, training=None):
-        if math_ops.greater(math_ops.reduce_sum(x), 0.0):
-          return self.layer(x)
-        else:
-          return self.layer(x)
-
-    model = TestModel()
-    model.compile(
-        loss='mse',
-        optimizer='sgd',
-        run_eagerly=testing_utils.should_run_eagerly())
-
-    x = np.ones(shape=(10, 3, 4))
-    y = np.ones(shape=(10, 3, 2))
-
-    if testing_utils.should_run_eagerly():
-      model.fit(x, y, epochs=2, batch_size=5)
-    else:
-      with self.assertRaisesRegexp(
-          RuntimeError, '`activity_regularizer` in a control flow branch'):
-        model.fit(x, y, epochs=2, batch_size=5)
-
-
-if __name__ == '__main__':
-  ops.enable_eager_execution()
-  test.main()
diff --git a/tensorflow/python/frozen_keras/engine/node.py b/tensorflow/python/frozen_keras/engine/node.py
deleted file mode 100644
index c1d6c032a70..00000000000
--- a/tensorflow/python/frozen_keras/engine/node.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=protected-access
-# pylint: disable=g-classes-have-attributes
-"""Contains the `Node` class."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import ops
-from tensorflow.python.frozen_keras import backend
-from tensorflow.python.frozen_keras.engine import base_layer_utils
-from tensorflow.python.util import nest
-
-
-class Node(object):
-  """A `Node` describes the connectivity between two layers.
-
-  Each time a layer is connected to some new input,
-  a node is added to `layer._inbound_nodes`.
-  Each time the output of a layer is used by another layer,
-  a node is added to `layer._outbound_nodes`.
-
-  Arguments:
-      outbound_layer: the layer that takes
-          `input_tensors` and turns them into `output_tensors`
-          (the node gets created when the `call`
-          method of the layer was called).
-      inbound_layers: a list of layers, the same length as `input_tensors`,
-          the layers from where `input_tensors` originate.
-      node_indices: a list of integers, the same length as `inbound_layers`.
-          `node_indices[i]` is the origin node of `input_tensors[i]`
-          (necessary since each inbound layer might have several nodes,
-          e.g. if the layer is being shared with a different data stream).
-      tensor_indices: a list of integers,
-          the same length as `inbound_layers`.
-          `tensor_indices[i]` is the index of `input_tensors[i]` within the
-          output of the inbound layer
-          (necessary since each inbound layer might
-          have multiple tensor outputs, with each one being
-          independently manipulable).
-      input_tensors: list of input tensors.
-      output_tensors: list of output tensors.
-      arguments: dictionary of keyword arguments that were passed to the
-          `call` method of the layer at the call that created the node.
-
-  `node_indices` and `tensor_indices` are basically fine-grained coordinates
-  describing the origin of the `input_tensors`.
-
-  A node from layer A to layer B is added to:
-    - A._outbound_nodes
-    - B._inbound_nodes
-  """
-
-  def __init__(self,
-               outbound_layer,
-               inbound_layers,
-               node_indices,
-               tensor_indices,
-               input_tensors,
-               output_tensors,
-               arguments=None):
-    # Layer instance (NOT a sequence)
-    if isinstance(outbound_layer, (list, tuple, dict)):
-      raise ValueError('`outbound_layer` should be a layer instance, '
-                       'not a list, tuple, or, dict.')
-
-    # this is the layer that takes a nested structure of input tensors
-    # and turns them into a nested structure of output tensors.
-    # the current node will be added to
-    # the inbound_nodes of outbound_layer.
-    self.outbound_layer = outbound_layer
-
-    # The following 3 properties describe where
-    # the input tensors come from: which layers,
-    # and for each layer, which node and which
-    # tensor output of each node.
-
-    # Nested structure of layer instances.
-    self.inbound_layers = inbound_layers
-    # Nested structure of integers, 1:1 mapping with inbound_layers.
-    self.node_indices = node_indices
-    # Nested of integers, 1:1 mapping with inbound_layers.
-    self.tensor_indices = tensor_indices
-
-    # Following 2 properties:
-    # tensor inputs and outputs of outbound_layer.
-
-    # Nested structure of tensors. 1:1 mapping with inbound_layers.
-    self.input_tensors = input_tensors
-    # Nested structure of tensors, created by outbound_layer.call().
-    self.output_tensors = output_tensors
-
-    # Following 2 properties: input and output shapes.
-
-    # Nested structure of shape tuples, shapes of input_tensors.
-    self.input_shapes = nest.map_structure(backend.int_shape, input_tensors)
-    # Nested structure of shape tuples, shapes of output_tensors.
-    self.output_shapes = nest.map_structure(backend.int_shape, output_tensors)
-
-    # Optional keyword arguments to layer's `call`.
-    self.arguments = arguments
-
-    # Create Keras History for any Keras Tensors in `arguments`.
-    tensor_arguments = [
-        t for t in nest.flatten(self.arguments) if isinstance(t, ops.Tensor)
-    ]
-    for tensor_argument in tensor_arguments:
-      if base_layer_utils.needs_keras_history(
-          tensor_argument, ignore_call_context=True):
-        base_layer_utils.create_keras_history(tensor_argument)
-
-    # Add nodes to all layers involved.
-    for layer in nest.flatten(inbound_layers):
-      if layer is not None:
-        # For compatibility with external Keras, we use the deprecated
-        # accessor here.
-        layer.outbound_nodes.append(self)
-    # For compatibility with external Keras, we use the deprecated
-    # accessor here.
-    outbound_layer.inbound_nodes.append(self)
-
-  def iterate_inbound(self, include_arguments=False):
-    """Returns a list of tuples representing the inbound data.
-
-    Arguments:
-      include_arguments: Whether to also iterate over any Keras Tensors
-        passed as args, kwargs.
-
-    Returns:
-      List of tuples like: (inbound_layer, node_index, tensor_index, tensor).
-    """
-    inputs_inbound = list(
-        zip(
-            nest.flatten(self.inbound_layers),
-            nest.flatten(self.node_indices),
-            nest.flatten(self.tensor_indices),
-            nest.flatten(self.input_tensors)))
-
-    if include_arguments:
-      keras_tensor_arguments = [
-          kt for kt in nest.flatten(self.arguments)
-          if hasattr(kt, '_keras_history')
-      ]
-
-      def _get_inbound(keras_tensor):
-        kh = keras_tensor._keras_history
-        return kh.layer, kh.node_index, kh.tensor_index, keras_tensor
-
-      arguments_inbound = nest.map_structure(_get_inbound,
-                                             keras_tensor_arguments)
-
-      return inputs_inbound + arguments_inbound
-    else:
-      return inputs_inbound
-
-  def _get_all_node_dependencies(self):
-    """Returns all of the nodes this node immediately depends on."""
-    node_deps = []
-    for layer, node_index, _, _ in self.iterate_inbound():
-      node_deps.append(layer._inbound_nodes[node_index])
-
-    for arg in nest.flatten(self.arguments):
-      if isinstance(arg, ops.Tensor) and hasattr(arg, '_keras_history'):
-        kh = arg._keras_history
-        node_deps.append(kh.layer._inbound_nodes[kh.node_index])
-
-    return node_deps
-
-  def get_config(self):
-    inbound_names = nest.map_structure(
-        lambda layer: layer.name if layer else None, self.inbound_layers)
-    return {
-        'outbound_layer': self.outbound_layer.name,
-        'inbound_layers': inbound_names,
-        'node_indices': self.node_indices,
-        'tensor_indices': self.tensor_indices
-    }
diff --git a/tensorflow/python/frozen_keras/initializers.py b/tensorflow/python/frozen_keras/initializers.py
deleted file mode 100644
index e7dc85a875d..00000000000
--- a/tensorflow/python/frozen_keras/initializers.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras initializer serialization / deserialization."""
-# pylint: disable=g-classes-have-attributes
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import six
-
-from tensorflow.python import tf2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.frozen_keras.utils.generic_utils import deserialize_keras_object
-from tensorflow.python.frozen_keras.utils.generic_utils import serialize_keras_object
-from tensorflow.python.ops import init_ops_v2
-
-# These imports are brought in so that keras.initializers.deserialize
-# has them available in module_objects.
-from tensorflow.python.ops.init_ops import Constant
-from tensorflow.python.ops.init_ops import GlorotNormal
-from tensorflow.python.ops.init_ops import GlorotUniform
-from tensorflow.python.ops.init_ops import he_normal  # pylint: disable=unused-import
-from tensorflow.python.ops.init_ops import he_uniform  # pylint: disable=unused-import
-from tensorflow.python.ops.init_ops import Identity
-from tensorflow.python.ops.init_ops import Initializer  # pylint: disable=unused-import
-from tensorflow.python.ops.init_ops import lecun_normal  # pylint: disable=unused-import
-from tensorflow.python.ops.init_ops import lecun_uniform  # pylint: disable=unused-import
-from tensorflow.python.ops.init_ops import Ones
-from tensorflow.python.ops.init_ops import Orthogonal
-from tensorflow.python.ops.init_ops import RandomNormal as TFRandomNormal
-from tensorflow.python.ops.init_ops import RandomUniform as TFRandomUniform
-from tensorflow.python.ops.init_ops import TruncatedNormal as TFTruncatedNormal
-from tensorflow.python.ops.init_ops import VarianceScaling  # pylint: disable=unused-import
-from tensorflow.python.ops.init_ops import Zeros
-# pylint: disable=unused-import, disable=line-too-long
-from tensorflow.python.ops.init_ops_v2 import Constant as ConstantV2
-from tensorflow.python.ops.init_ops_v2 import GlorotNormal as GlorotNormalV2
-from tensorflow.python.ops.init_ops_v2 import GlorotUniform as GlorotUniformV2
-from tensorflow.python.ops.init_ops_v2 import he_normal as he_normalV2
-from tensorflow.python.ops.init_ops_v2 import he_uniform as he_uniformV2
-from tensorflow.python.ops.init_ops_v2 import Identity as IdentityV2
-from tensorflow.python.ops.init_ops_v2 import Initializer as InitializerV2
-from tensorflow.python.ops.init_ops_v2 import lecun_normal as lecun_normalV2
-from tensorflow.python.ops.init_ops_v2 import lecun_uniform  as lecun_uniformV2
-from tensorflow.python.ops.init_ops_v2 import Ones as OnesV2
-from tensorflow.python.ops.init_ops_v2 import Orthogonal as OrthogonalV2
-from tensorflow.python.ops.init_ops_v2 import RandomNormal as RandomNormalV2
-from tensorflow.python.ops.init_ops_v2 import RandomUniform as RandomUniformV2
-from tensorflow.python.ops.init_ops_v2 import TruncatedNormal as TruncatedNormalV2
-from tensorflow.python.ops.init_ops_v2 import VarianceScaling as VarianceScalingV2
-from tensorflow.python.ops.init_ops_v2 import Zeros as ZerosV2
-# pylint: enable=unused-import, enable=line-too-long
-
-
-class TruncatedNormal(TFTruncatedNormal):
-  """Initializer that generates a truncated normal distribution.
-
-  These values are similar to values from a `random_normal_initializer`
-  except that values more than two standard deviations from the mean
-  are discarded and re-drawn. This is the recommended initializer for
-  neural network weights and filters.
-
-  Args:
-    mean: a python scalar or a scalar tensor. Mean of the random values to
-      generate. Defaults to 0.
-    stddev: a python scalar or a scalar tensor. Standard deviation of the random
-      values to generate. Defaults to 0.05.
-    seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed` for behavior.
-    dtype: The data type. Only floating point types are supported.
-    
-  Returns:
-    A TruncatedNormal instance.
-  """
-
-  def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=dtypes.float32):
-    super(TruncatedNormal, self).__init__(
-        mean=mean, stddev=stddev, seed=seed, dtype=dtype)
-
-
-class RandomUniform(TFRandomUniform):
-  """Initializer that generates tensors with a uniform distribution.
-
-  Args:
-    minval: A python scalar or a scalar tensor. Lower bound of the range of
-      random values to generate. Defaults to -0.05.
-    maxval: A python scalar or a scalar tensor. Upper bound of the range of
-      random values to generate. Defaults to 0.05.
-    seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed` for behavior.
-    dtype: The data type.
-    
-  Returns:
-    A RandomUniform instance.
-  """
-
-  def __init__(self, minval=-0.05, maxval=0.05, seed=None,
-               dtype=dtypes.float32):
-    super(RandomUniform, self).__init__(
-        minval=minval, maxval=maxval, seed=seed, dtype=dtype)
-
-
-class RandomNormal(TFRandomNormal):
-  """Initializer that generates tensors with a normal distribution.
-
-  Args:
-    mean: a python scalar or a scalar tensor. Mean of the random values to
-      generate. Defaults to 0.
-    stddev: a python scalar or a scalar tensor. Standard deviation of the random
-      values to generate. Defaults to 0.05.
-    seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed` for behavior.
-    dtype: The data type. Only floating point types are supported.
-
-  Returns:
-      RandomNormal instance.
-  """
-
-  def __init__(self, mean=0.0, stddev=0.05, seed=None, dtype=dtypes.float32):
-    super(RandomNormal, self).__init__(
-        mean=mean, stddev=stddev, seed=seed, dtype=dtype)
-
-
-# Compatibility aliases
-
-# pylint: disable=invalid-name
-zero = zeros = Zeros
-one = ones = Ones
-constant = Constant
-uniform = random_uniform = RandomUniform
-normal = random_normal = RandomNormal
-truncated_normal = TruncatedNormal
-identity = Identity
-orthogonal = Orthogonal
-glorot_normal = GlorotNormal
-glorot_uniform = GlorotUniform
-
-
-# Utility functions
-
-
-def serialize(initializer):
-  return serialize_keras_object(initializer)
-
-
-def deserialize(config, custom_objects=None):
-  """Return an `Initializer` object from its config."""
-  if tf2.enabled():
-    # Class names are the same for V1 and V2 but the V2 classes
-    # are aliased in this file so we need to grab them directly
-    # from `init_ops_v2`.
-    module_objects = {
-        obj_name: getattr(init_ops_v2, obj_name)
-        for obj_name in dir(init_ops_v2)
-    }
-  else:
-    module_objects = globals()
-  return deserialize_keras_object(
-      config,
-      module_objects=module_objects,
-      custom_objects=custom_objects,
-      printable_module_name='initializer')
-
-
-def get(identifier):
-  if identifier is None:
-    return None
-  if isinstance(identifier, dict):
-    return deserialize(identifier)
-  elif isinstance(identifier, six.string_types):
-    identifier = str(identifier)
-    # We have to special-case functions that return classes.
-    # TODO(omalleyt): Turn these into classes or class aliases.
-    special_cases = ['he_normal', 'he_uniform', 'lecun_normal', 'lecun_uniform']
-    if identifier in special_cases:
-      # Treat like a class.
-      return deserialize({'class_name': identifier, 'config': {}})
-    return deserialize(identifier)
-  elif callable(identifier):
-    return identifier
-  else:
-    raise ValueError('Could not interpret initializer identifier: ' +
-                     str(identifier))
-
-
-# pylint: enable=invalid-name
diff --git a/tensorflow/python/frozen_keras/regularizers.py b/tensorflow/python/frozen_keras/regularizers.py
deleted file mode 100644
index 152f30867ea..00000000000
--- a/tensorflow/python/frozen_keras/regularizers.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Built-in regularizers."""
-# pylint: disable=g-classes-have-attributes
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import six
-
-from tensorflow.python.frozen_keras import backend as K
-from tensorflow.python.frozen_keras.utils.generic_utils import deserialize_keras_object
-from tensorflow.python.frozen_keras.utils.generic_utils import serialize_keras_object
-from tensorflow.python.ops import math_ops
-
-
-class Regularizer(object):
-  """Regularizer base class.
-
-  Regularizers allow you to apply penalties on layer parameters or layer
-  activity during optimization. These penalties are summed into the loss
-  function that the network optimizes.
-
-  Regularization penalties are applied on a per-layer basis. The exact API will
-  depend on the layer, but many layers (e.g. `Dense`, `Conv1D`, `Conv2D` and
-  `Conv3D`) have a unified API.
-
-  These layers expose 3 keyword arguments:
-
-  - `kernel_regularizer`: Regularizer to apply a penalty on the layer's kernel
-  - `bias_regularizer`: Regularizer to apply a penalty on the layer's bias
-  - `activity_regularizer`: Regularizer to apply a penalty on the layer's output
-
-  All layers (including custom layers) expose `activity_regularizer` as a
-  settable property, whether or not it is in the constructor arguments.
-
-  The value returned by the `activity_regularizer` is divided by the input
-  batch size so that the relative weighting between the weight regularizers and
-  the activity regularizers does not change with the batch size.
-
-  You can access a layer's regularization penalties by calling `layer.losses`
-  after calling the layer on inputs.
-
-  ## Example
-
-  >>> layer = tf.keras.layers.Dense(
-  ...     5, input_dim=5,
-  ...     kernel_initializer='ones',
-  ...     kernel_regularizer=tf.keras.regularizers.l1(0.01),
-  ...     activity_regularizer=tf.keras.regularizers.l2(0.01))
-  >>> tensor = tf.ones(shape=(5, 5)) * 2.0
-  >>> out = layer(tensor)
-
-  >>> # The kernel regularization term is 0.25
-  >>> # The activity regularization term (after dividing by the batch size) is 5
-  >>> tf.math.reduce_sum(layer.losses)
-  <tf.Tensor: shape=(), dtype=float32, numpy=5.25>
-
-  ## Available penalties
-
-  ```python
-  tf.keras.regularizers.l1(0.3)  # L1 Regularization Penalty
-  tf.keras.regularizers.l2(0.1)  # L2 Regularization Penalty
-  tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)  # L1 + L2 penalties
-  ```
-
-  ## Directly calling a regularizer
-
-  Compute a regularization loss on a tensor by directly calling a regularizer
-  as if it is a one-argument function.
-
-  E.g.
-  >>> regularizer = tf.keras.regularizers.l2(2.)
-  >>> tensor = tf.ones(shape=(5, 5))
-  >>> regularizer(tensor)
-  <tf.Tensor: shape=(), dtype=float32, numpy=50.0>
-
-  ### A note on serialization and deserialization:
-
-  Registering the regularizers as serializable is optional if you are just
-  training and executing models, exporting to and from SavedModels, or saving
-  and loading weight checkpoints.
-
-  Registration is required for Keras `model_to_estimator`, saving and
-  loading models to HDF5 formats, Keras model cloning, some visualization
-  utilities, and exporting models to and from JSON. If using this functionality,
-  you must make sure any python process running your model has also defined
-  and registered your custom regularizer.
-
-  `tf.keras.utils.register_keras_serializable` is only available in TF 2.1 and
-  beyond. In earlier versions of TensorFlow you must pass your custom
-  regularizer to the `custom_objects` argument of methods that expect custom
-  regularizers to be registered as serializable.
-  """
-
-  def __call__(self, x):
-    """Compute a regularization penalty from an input tensor."""
-    return 0.
-
-  @classmethod
-  def from_config(cls, config):
-    """Creates a regularizer from its config.
-
-    This method is the reverse of `get_config`,
-    capable of instantiating the same regularizer from the config
-    dictionary.
-
-    This method is used by Keras `model_to_estimator`, saving and
-    loading models to HDF5 formats, Keras model cloning, some visualization
-    utilities, and exporting models to and from JSON.
-
-    Arguments:
-        config: A Python dictionary, typically the output of get_config.
-
-    Returns:
-        A regularizer instance.
-    """
-    return cls(**config)
-
-  def get_config(self):
-    """Returns the config of the regularizer.
-
-    An regularizer config is a Python dictionary (serializable)
-    containing all configuration parameters of the regularizer.
-    The same regularizer can be reinstantiated later
-    (without any saved state) from this configuration.
-
-    This method is optional if you are just training and executing models,
-    exporting to and from SavedModels, or using weight checkpoints.
-
-    This method is required for Keras `model_to_estimator`, saving and
-    loading models to HDF5 formats, Keras model cloning, some visualization
-    utilities, and exporting models to and from JSON.
-
-    Returns:
-        Python dictionary.
-    """
-    raise NotImplementedError(str(self) + ' does not implement get_config()')
-
-
-class L1L2(Regularizer):
-  r"""A regularizer that applies both L1 and L2 regularization penalties.
-
-  The L1 regularization penalty is computed as:
-  $$\ell_1\,\,penalty =\ell_1\sum_{i=0}^n|x_i|$$
-
-  The L2 regularization penalty is computed as
-  $$\ell_2\,\,penalty =\ell_2\sum_{i=0}^nx_i^2$$
-
-  Attributes:
-      l1: Float; L1 regularization factor.
-      l2: Float; L2 regularization factor.
-  """
-
-  def __init__(self, l1=0., l2=0.):  # pylint: disable=redefined-outer-name
-    self.l1 = K.cast_to_floatx(l1)
-    self.l2 = K.cast_to_floatx(l2)
-
-  def __call__(self, x):
-    if not self.l1 and not self.l2:
-      return K.constant(0.)
-    regularization = 0.
-    if self.l1:
-      regularization += self.l1 * math_ops.reduce_sum(math_ops.abs(x))
-    if self.l2:
-      regularization += self.l2 * math_ops.reduce_sum(math_ops.square(x))
-    return regularization
-
-  def get_config(self):
-    return {'l1': float(self.l1), 'l2': float(self.l2)}
-
-
-# Aliases.
-
-
-def l1(l=0.01):
-  r"""Create a regularizer that applies an L1 regularization penalty.
-
-  The L1 regularization penalty is computed as:
-  $$\ell_1\,\,penalty =\ell_1\sum_{i=0}^n|x_i|$$
-
-  Arguments:
-      l: Float; L1 regularization factor.
-
-  Returns:
-    An L1 Regularizer with the given regularization factor.
-  """
-  return L1L2(l1=l)
-
-
-def l2(l=0.01):
-  r"""Create a regularizer that applies an L2 regularization penalty.
-
-  The L2 regularization penalty is computed as:
-  $$\ell_2\,\,penalty =\ell_2\sum_{i=0}^nx_i^2$$
-
-  Arguments:
-      l: Float; L2 regularization factor.
-
-  Returns:
-    An L2 Regularizer with the given regularization factor.
-  """
-  return L1L2(l2=l)
-
-
-def l1_l2(l1=0.01, l2=0.01):  # pylint: disable=redefined-outer-name
-  r"""Create a regularizer that applies both L1 and L2 penalties.
-
-  The L1 regularization penalty is computed as:
-  $$\ell_1\,\,penalty =\ell_1\sum_{i=0}^n|x_i|$$
-
-  The L2 regularization penalty is computed as:
-  $$\ell_2\,\,penalty =\ell_2\sum_{i=0}^nx_i^2$$
-
-  Arguments:
-      l1: Float; L1 regularization factor.
-      l2: Float; L2 regularization factor.
-
-  Returns:
-    An L1L2 Regularizer with the given regularization factors.
-  """
-  return L1L2(l1=l1, l2=l2)
-
-
-def serialize(regularizer):
-  return serialize_keras_object(regularizer)
-
-
-def deserialize(config, custom_objects=None):
-  return deserialize_keras_object(
-      config,
-      module_objects=globals(),
-      custom_objects=custom_objects,
-      printable_module_name='regularizer')
-
-
-def get(identifier):
-  if identifier is None:
-    return None
-  if isinstance(identifier, dict):
-    return deserialize(identifier)
-  elif isinstance(identifier, six.string_types):
-    identifier = str(identifier)
-    # We have to special-case functions that return classes.
-    # TODO(omalleyt): Turn these into classes or class aliases.
-    special_cases = ['l1', 'l2', 'l1_l2']
-    if identifier in special_cases:
-      # Treat like a class.
-      return deserialize({'class_name': identifier, 'config': {}})
-    return deserialize(str(identifier))
-  elif callable(identifier):
-    return identifier
-  else:
-    raise ValueError('Could not interpret regularizer identifier:', identifier)
diff --git a/tensorflow/python/frozen_keras/utils/BUILD b/tensorflow/python/frozen_keras/utils/BUILD
deleted file mode 100644
index a77b3fb09d0..00000000000
--- a/tensorflow/python/frozen_keras/utils/BUILD
+++ /dev/null
@@ -1,106 +0,0 @@
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
-
-package(
-    default_visibility = ["//tensorflow:__subpackages__"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-py_library(
-    name = "tf_utils",
-    srcs = ["tf_utils.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:composite_tensor",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:smart_cond",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python:tensor_util",
-        "//tensorflow/python:type_spec",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/data/experimental/ops:cardinality",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/frozen_keras:backend",
-        "//third_party/py/numpy",
-        "@six_archive//:six",
-    ],
-)
-
-py_library(
-    name = "conv_utils",
-    srcs = [
-        "conv_utils.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python/frozen_keras:backend",
-    ],
-)
-
-py_library(
-    name = "generic_utils",
-    srcs = [
-        "generic_utils.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:util",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "layer_utils",
-    srcs = [
-        "layer_utils.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":conv_utils",
-        "//tensorflow/python:util",
-        "//tensorflow/python/frozen_keras:backend",
-        "//third_party/py/numpy",
-    ],
-)
-
-tf_py_test(
-    name = "generic_utils_test",
-    size = "small",
-    srcs = ["generic_utils_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":generic_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/frozen_keras:regularizers",
-        "//tensorflow/python/keras",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "tf_utils_test",
-    size = "small",
-    srcs = ["tf_utils_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":tf_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-    ],
-)
-
-tf_py_test(
-    name = "conv_utils_test",
-    size = "small",
-    srcs = ["conv_utils_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":conv_utils",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
diff --git a/tensorflow/python/frozen_keras/utils/conv_utils.py b/tensorflow/python/frozen_keras/utils/conv_utils.py
deleted file mode 100644
index 575e7af45d4..00000000000
--- a/tensorflow/python/frozen_keras/utils/conv_utils.py
+++ /dev/null
@@ -1,482 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities used by convolution layers."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-import numpy as np
-from six.moves import range  # pylint: disable=redefined-builtin
-
-from tensorflow.python.frozen_keras import backend
-
-
-def convert_data_format(data_format, ndim):
-  if data_format == 'channels_last':
-    if ndim == 3:
-      return 'NWC'
-    elif ndim == 4:
-      return 'NHWC'
-    elif ndim == 5:
-      return 'NDHWC'
-    else:
-      raise ValueError('Input rank not supported:', ndim)
-  elif data_format == 'channels_first':
-    if ndim == 3:
-      return 'NCW'
-    elif ndim == 4:
-      return 'NCHW'
-    elif ndim == 5:
-      return 'NCDHW'
-    else:
-      raise ValueError('Input rank not supported:', ndim)
-  else:
-    raise ValueError('Invalid data_format:', data_format)
-
-
-def normalize_tuple(value, n, name):
-  """Transforms a single integer or iterable of integers into an integer tuple.
-
-  Arguments:
-    value: The value to validate and convert. Could an int, or any iterable of
-      ints.
-    n: The size of the tuple to be returned.
-    name: The name of the argument being validated, e.g. "strides" or
-      "kernel_size". This is only used to format error messages.
-
-  Returns:
-    A tuple of n integers.
-
-  Raises:
-    ValueError: If something else than an int/long or iterable thereof was
-      passed.
-  """
-  if isinstance(value, int):
-    return (value,) * n
-  else:
-    try:
-      value_tuple = tuple(value)
-    except TypeError:
-      raise ValueError('The `' + name + '` argument must be a tuple of ' +
-                       str(n) + ' integers. Received: ' + str(value))
-    if len(value_tuple) != n:
-      raise ValueError('The `' + name + '` argument must be a tuple of ' +
-                       str(n) + ' integers. Received: ' + str(value))
-    for single_value in value_tuple:
-      try:
-        int(single_value)
-      except (ValueError, TypeError):
-        raise ValueError('The `' + name + '` argument must be a tuple of ' +
-                         str(n) + ' integers. Received: ' + str(value) + ' '
-                         'including element ' + str(single_value) + ' of type' +
-                         ' ' + str(type(single_value)))
-    return value_tuple
-
-
-def conv_output_length(input_length, filter_size, padding, stride, dilation=1):
-  """Determines output length of a convolution given input length.
-
-  Arguments:
-      input_length: integer.
-      filter_size: integer.
-      padding: one of "same", "valid", "full", "causal"
-      stride: integer.
-      dilation: dilation rate, integer.
-
-  Returns:
-      The output length (integer).
-  """
-  if input_length is None:
-    return None
-  assert padding in {'same', 'valid', 'full', 'causal'}
-  dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
-  if padding in ['same', 'causal']:
-    output_length = input_length
-  elif padding == 'valid':
-    output_length = input_length - dilated_filter_size + 1
-  elif padding == 'full':
-    output_length = input_length + dilated_filter_size - 1
-  return (output_length + stride - 1) // stride
-
-
-def conv_input_length(output_length, filter_size, padding, stride):
-  """Determines input length of a convolution given output length.
-
-  Arguments:
-      output_length: integer.
-      filter_size: integer.
-      padding: one of "same", "valid", "full".
-      stride: integer.
-
-  Returns:
-      The input length (integer).
-  """
-  if output_length is None:
-    return None
-  assert padding in {'same', 'valid', 'full'}
-  if padding == 'same':
-    pad = filter_size // 2
-  elif padding == 'valid':
-    pad = 0
-  elif padding == 'full':
-    pad = filter_size - 1
-  return (output_length - 1) * stride - 2 * pad + filter_size
-
-
-def deconv_output_length(input_length,
-                         filter_size,
-                         padding,
-                         output_padding=None,
-                         stride=0,
-                         dilation=1):
-  """Determines output length of a transposed convolution given input length.
-
-  Arguments:
-      input_length: Integer.
-      filter_size: Integer.
-      padding: one of `"same"`, `"valid"`, `"full"`.
-      output_padding: Integer, amount of padding along the output dimension. Can
-        be set to `None` in which case the output length is inferred.
-      stride: Integer.
-      dilation: Integer.
-
-  Returns:
-      The output length (integer).
-  """
-  assert padding in {'same', 'valid', 'full'}
-  if input_length is None:
-    return None
-
-  # Get the dilated kernel size
-  filter_size = filter_size + (filter_size - 1) * (dilation - 1)
-
-  # Infer length if output padding is None, else compute the exact length
-  if output_padding is None:
-    if padding == 'valid':
-      length = input_length * stride + max(filter_size - stride, 0)
-    elif padding == 'full':
-      length = input_length * stride - (stride + filter_size - 2)
-    elif padding == 'same':
-      length = input_length * stride
-
-  else:
-    if padding == 'same':
-      pad = filter_size // 2
-    elif padding == 'valid':
-      pad = 0
-    elif padding == 'full':
-      pad = filter_size - 1
-
-    length = ((input_length - 1) * stride + filter_size - 2 * pad +
-              output_padding)
-  return length
-
-
-def normalize_data_format(value):
-  if value is None:
-    value = backend.image_data_format()
-  data_format = value.lower()
-  if data_format not in {'channels_first', 'channels_last'}:
-    raise ValueError('The `data_format` argument must be one of '
-                     '"channels_first", "channels_last". Received: ' +
-                     str(value))
-  return data_format
-
-
-def normalize_padding(value):
-  if isinstance(value, (list, tuple)):
-    return value
-  padding = value.lower()
-  if padding not in {'valid', 'same', 'causal'}:
-    raise ValueError('The `padding` argument must be a list/tuple or one of '
-                     '"valid", "same" (or "causal", only for `Conv1D). '
-                     'Received: ' + str(padding))
-  return padding
-
-
-def convert_kernel(kernel):
-  """Converts a Numpy kernel matrix from Theano format to TensorFlow format.
-
-  Also works reciprocally, since the transformation is its own inverse.
-
-  This is used for converting legacy Theano-saved model files.
-
-  Arguments:
-      kernel: Numpy array (3D, 4D or 5D).
-
-  Returns:
-      The converted kernel.
-
-  Raises:
-      ValueError: in case of invalid kernel shape or invalid data_format.
-  """
-  kernel = np.asarray(kernel)
-  if not 3 <= kernel.ndim <= 5:
-    raise ValueError('Invalid kernel shape:', kernel.shape)
-  slices = [slice(None, None, -1) for _ in range(kernel.ndim)]
-  no_flip = (slice(None, None), slice(None, None))
-  slices[-2:] = no_flip
-  return np.copy(kernel[slices])
-
-
-def conv_kernel_mask(input_shape, kernel_shape, strides, padding):
-  """Compute a mask representing the connectivity of a convolution operation.
-
-  Assume a convolution with given parameters is applied to an input having N
-  spatial dimensions with `input_shape = (d_in1, ..., d_inN)` to produce an
-  output with shape `(d_out1, ..., d_outN)`. This method returns a boolean array
-  of shape `(d_in1, ..., d_inN, d_out1, ..., d_outN)` with `True` entries
-  indicating pairs of input and output locations that are connected by a weight.
-
-  Example:
-
-    >>> input_shape = (4,)
-    >>> kernel_shape = (2,)
-    >>> strides = (1,)
-    >>> padding = "valid"
-    >>> conv_kernel_mask(input_shape, kernel_shape, strides, padding)
-    array([[ True, False, False],
-           [ True,  True, False],
-           [False,  True,  True],
-           [False, False,  True]])
-
-    where rows and columns correspond to inputs and outputs respectively.
-
-
-  Args:
-    input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
-      input.
-    kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
-      receptive field.
-    strides: tuple of size N, strides along each spatial dimension.
-    padding: type of padding, string `"same"` or `"valid"`.
-
-  Returns:
-    A boolean 2N-D `np.ndarray` of shape
-    `(d_in1, ..., d_inN, d_out1, ..., d_outN)`, where `(d_out1, ..., d_outN)`
-    is the spatial shape of the output. `True` entries in the mask represent
-    pairs of input-output locations that are connected by a weight.
-
-  Raises:
-    ValueError: if `input_shape`, `kernel_shape` and `strides` don't have the
-        same number of dimensions.
-    NotImplementedError: if `padding` is not in {`"same"`, `"valid"`}.
-  """
-  if padding not in {'same', 'valid'}:
-    raise NotImplementedError('Padding type %s not supported. '
-                              'Only "valid" and "same" '
-                              'are implemented.' % padding)
-
-  in_dims = len(input_shape)
-  if isinstance(kernel_shape, int):
-    kernel_shape = (kernel_shape,) * in_dims
-  if isinstance(strides, int):
-    strides = (strides,) * in_dims
-
-  kernel_dims = len(kernel_shape)
-  stride_dims = len(strides)
-  if kernel_dims != in_dims or stride_dims != in_dims:
-    raise ValueError('Number of strides, input and kernel dimensions must all '
-                     'match. Received: %d, %d, %d.' %
-                     (stride_dims, in_dims, kernel_dims))
-
-  output_shape = conv_output_shape(input_shape, kernel_shape, strides, padding)
-
-  mask_shape = input_shape + output_shape
-  mask = np.zeros(mask_shape, np.bool)
-
-  output_axes_ticks = [range(dim) for dim in output_shape]
-  for output_position in itertools.product(*output_axes_ticks):
-    input_axes_ticks = conv_connected_inputs(input_shape, kernel_shape,
-                                             output_position, strides, padding)
-    for input_position in itertools.product(*input_axes_ticks):
-      mask[input_position + output_position] = True
-
-  return mask
-
-
-def conv_kernel_idxs(input_shape, kernel_shape, strides, padding, filters_in,
-                     filters_out, data_format):
-  """Yields output-input tuples of indices in a CNN layer.
-
-  The generator iterates over all `(output_idx, input_idx)` tuples, where
-    `output_idx` is an integer index in a flattened tensor representing a single
-    output image of a convolutional layer that is connected (via the layer
-    weights) to the respective single input image at `input_idx`
-
-  Example:
-
-    >>> input_shape = (2, 2)
-    >>> kernel_shape = (2, 1)
-    >>> strides = (1, 1)
-    >>> padding = "valid"
-    >>> filters_in = 1
-    >>> filters_out = 1
-    >>> data_format = "channels_last"
-    >>> list(conv_kernel_idxs(input_shape, kernel_shape, strides, padding,
-    ...                       filters_in, filters_out, data_format))
-    [(0, 0), (0, 2), (1, 1), (1, 3)]
-
-  Args:
-    input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
-      input.
-    kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
-      receptive field.
-    strides: tuple of size N, strides along each spatial dimension.
-    padding: type of padding, string `"same"` or `"valid"`.
-    filters_in: `int`, number if filters in the input to the layer.
-    filters_out: `int', number if filters in the output of the layer.
-    data_format: string, "channels_first" or "channels_last".
-
-  Yields:
-    The next tuple `(output_idx, input_idx)`, where
-    `output_idx` is an integer index in a flattened tensor representing a single
-    output image of a convolutional layer that is connected (via the layer
-    weights) to the respective single input image at `input_idx`.
-
-  Raises:
-      ValueError: if `data_format` is neither
-      `"channels_last"` nor `"channels_first"`, or if number of strides, input,
-      and kernel number of dimensions do not match.
-
-      NotImplementedError: if `padding` is neither `"same"` nor `"valid"`.
-  """
-  if padding not in ('same', 'valid'):
-    raise NotImplementedError('Padding type %s not supported. '
-                              'Only "valid" and "same" '
-                              'are implemented.' % padding)
-
-  in_dims = len(input_shape)
-  if isinstance(kernel_shape, int):
-    kernel_shape = (kernel_shape,) * in_dims
-  if isinstance(strides, int):
-    strides = (strides,) * in_dims
-
-  kernel_dims = len(kernel_shape)
-  stride_dims = len(strides)
-  if kernel_dims != in_dims or stride_dims != in_dims:
-    raise ValueError('Number of strides, input and kernel dimensions must all '
-                     'match. Received: %d, %d, %d.' %
-                     (stride_dims, in_dims, kernel_dims))
-
-  output_shape = conv_output_shape(input_shape, kernel_shape, strides, padding)
-  output_axes_ticks = [range(dim) for dim in output_shape]
-
-  if data_format == 'channels_first':
-    concat_idxs = lambda spatial_idx, filter_idx: (filter_idx,) + spatial_idx
-  elif data_format == 'channels_last':
-    concat_idxs = lambda spatial_idx, filter_idx: spatial_idx + (filter_idx,)
-  else:
-    raise ValueError('Data format %s not recognized.'
-                     '`data_format` must be "channels_first" or '
-                     '"channels_last".' % data_format)
-
-  for output_position in itertools.product(*output_axes_ticks):
-    input_axes_ticks = conv_connected_inputs(input_shape, kernel_shape,
-                                             output_position, strides, padding)
-    for input_position in itertools.product(*input_axes_ticks):
-      for f_in in range(filters_in):
-        for f_out in range(filters_out):
-          out_idx = np.ravel_multi_index(
-              multi_index=concat_idxs(output_position, f_out),
-              dims=concat_idxs(output_shape, filters_out))
-          in_idx = np.ravel_multi_index(
-              multi_index=concat_idxs(input_position, f_in),
-              dims=concat_idxs(input_shape, filters_in))
-          yield (out_idx, in_idx)
-
-
-def conv_connected_inputs(input_shape, kernel_shape, output_position, strides,
-                          padding):
-  """Return locations of the input connected to an output position.
-
-  Assume a convolution with given parameters is applied to an input having N
-  spatial dimensions with `input_shape = (d_in1, ..., d_inN)`. This method
-  returns N ranges specifying the input region that was convolved with the
-  kernel to produce the output at position
-  `output_position = (p_out1, ..., p_outN)`.
-
-  Example:
-
-    >>> input_shape = (4, 4)
-    >>> kernel_shape = (2, 1)
-    >>> output_position = (1, 1)
-    >>> strides = (1, 1)
-    >>> padding = "valid"
-    >>> conv_connected_inputs(input_shape, kernel_shape, output_position,
-    ...                       strides, padding)
-    [range(1, 3), range(1, 2)]
-
-  Args:
-    input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
-      input.
-    kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
-      receptive field.
-    output_position: tuple of size N: `(p_out1, ..., p_outN)`, a single position
-      in the output of the convolution.
-    strides: tuple of size N, strides along each spatial dimension.
-    padding: type of padding, string `"same"` or `"valid"`.
-
-  Returns:
-    N ranges `[[p_in_left1, ..., p_in_right1], ...,
-              [p_in_leftN, ..., p_in_rightN]]` specifying the region in the
-    input connected to output_position.
-  """
-  ranges = []
-
-  ndims = len(input_shape)
-  for d in range(ndims):
-    left_shift = int(kernel_shape[d] / 2)
-    right_shift = kernel_shape[d] - left_shift
-
-    center = output_position[d] * strides[d]
-
-    if padding == 'valid':
-      center += left_shift
-
-    start = max(0, center - left_shift)
-    end = min(input_shape[d], center + right_shift)
-
-    ranges.append(range(start, end))
-
-  return ranges
-
-
-def conv_output_shape(input_shape, kernel_shape, strides, padding):
-  """Return the output shape of an N-D convolution.
-
-  Forces dimensions where input is empty (size 0) to remain empty.
-
-  Args:
-    input_shape: tuple of size N: `(d_in1, ..., d_inN)`, spatial shape of the
-      input.
-    kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
-      receptive field.
-    strides: tuple of size N, strides along each spatial dimension.
-    padding: type of padding, string `"same"` or `"valid"`.
-
-  Returns:
-    tuple of size N: `(d_out1, ..., d_outN)`, spatial shape of the output.
-  """
-  dims = range(len(kernel_shape))
-  output_shape = [
-      conv_output_length(input_shape[d], kernel_shape[d], padding, strides[d])
-      for d in dims
-  ]
-  output_shape = tuple(
-      [0 if input_shape[d] == 0 else output_shape[d] for d in dims])
-  return output_shape
diff --git a/tensorflow/python/frozen_keras/utils/conv_utils_test.py b/tensorflow/python/frozen_keras/utils/conv_utils_test.py
deleted file mode 100644
index 53fd8582e83..00000000000
--- a/tensorflow/python/frozen_keras/utils/conv_utils_test.py
+++ /dev/null
@@ -1,340 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for conv_utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python.frozen_keras.utils import conv_utils
-from tensorflow.python.platform import test
-
-
-def _get_const_output_shape(input_shape, dim):
-  return tuple([min(d, dim) for d in input_shape])
-
-
-input_shapes = [
-    (0,),
-    (0, 0),
-    (1,),
-    (2,),
-    (3,),
-    (1, 0),
-    (0, 3),
-    (1, 1),
-    (1, 2),
-    (3, 1),
-    (2, 2),
-    (3, 3),
-    (1, 0, 1),
-    (5, 2, 3),
-    (3, 5, 6, 7, 0),
-    (3, 2, 2, 4, 4),
-    (1, 2, 3, 4, 7, 2),
-]
-
-
-class TestBasicConvUtilsTest(test.TestCase):
-
-  def test_convert_data_format(self):
-    self.assertEqual('NCDHW', conv_utils.convert_data_format(
-        'channels_first', 5))
-    self.assertEqual('NCHW', conv_utils.convert_data_format(
-        'channels_first', 4))
-    self.assertEqual('NCW', conv_utils.convert_data_format('channels_first', 3))
-    self.assertEqual('NHWC', conv_utils.convert_data_format('channels_last', 4))
-    self.assertEqual('NWC', conv_utils.convert_data_format('channels_last', 3))
-    self.assertEqual('NDHWC', conv_utils.convert_data_format(
-        'channels_last', 5))
-
-    with self.assertRaises(ValueError):
-      conv_utils.convert_data_format('invalid', 2)
-
-  def test_normalize_tuple(self):
-    self.assertEqual((2, 2, 2),
-                     conv_utils.normalize_tuple(2, n=3, name='strides'))
-    self.assertEqual((2, 1, 2),
-                     conv_utils.normalize_tuple((2, 1, 2), n=3, name='strides'))
-
-    with self.assertRaises(ValueError):
-      conv_utils.normalize_tuple((2, 1), n=3, name='strides')
-
-    with self.assertRaises(ValueError):
-      conv_utils.normalize_tuple(None, n=3, name='strides')
-
-  def test_normalize_data_format(self):
-    self.assertEqual('channels_last',
-                     conv_utils.normalize_data_format('Channels_Last'))
-    self.assertEqual('channels_first',
-                     conv_utils.normalize_data_format('CHANNELS_FIRST'))
-
-    with self.assertRaises(ValueError):
-      conv_utils.normalize_data_format('invalid')
-
-  def test_normalize_padding(self):
-    self.assertEqual('same', conv_utils.normalize_padding('SAME'))
-    self.assertEqual('valid', conv_utils.normalize_padding('VALID'))
-
-    with self.assertRaises(ValueError):
-      conv_utils.normalize_padding('invalid')
-
-  def test_conv_output_length(self):
-    self.assertEqual(4, conv_utils.conv_output_length(4, 2, 'same', 1, 1))
-    self.assertEqual(2, conv_utils.conv_output_length(4, 2, 'same', 2, 1))
-    self.assertEqual(3, conv_utils.conv_output_length(4, 2, 'valid', 1, 1))
-    self.assertEqual(2, conv_utils.conv_output_length(4, 2, 'valid', 2, 1))
-    self.assertEqual(5, conv_utils.conv_output_length(4, 2, 'full', 1, 1))
-    self.assertEqual(3, conv_utils.conv_output_length(4, 2, 'full', 2, 1))
-    self.assertEqual(2, conv_utils.conv_output_length(5, 2, 'valid', 2, 2))
-
-  def test_conv_input_length(self):
-    self.assertEqual(3, conv_utils.conv_input_length(4, 2, 'same', 1))
-    self.assertEqual(2, conv_utils.conv_input_length(2, 2, 'same', 2))
-    self.assertEqual(4, conv_utils.conv_input_length(3, 2, 'valid', 1))
-    self.assertEqual(4, conv_utils.conv_input_length(2, 2, 'valid', 2))
-    self.assertEqual(3, conv_utils.conv_input_length(4, 2, 'full', 1))
-    self.assertEqual(4, conv_utils.conv_input_length(3, 2, 'full', 2))
-
-  def test_deconv_output_length(self):
-    self.assertEqual(4, conv_utils.deconv_output_length(4, 2, 'same', stride=1))
-    self.assertEqual(8, conv_utils.deconv_output_length(4, 2, 'same', stride=2))
-    self.assertEqual(5, conv_utils.deconv_output_length(
-        4, 2, 'valid', stride=1))
-    self.assertEqual(8, conv_utils.deconv_output_length(
-        4, 2, 'valid', stride=2))
-    self.assertEqual(3, conv_utils.deconv_output_length(4, 2, 'full', stride=1))
-    self.assertEqual(6, conv_utils.deconv_output_length(4, 2, 'full', stride=2))
-    self.assertEqual(
-        5,
-        conv_utils.deconv_output_length(
-            4, 2, 'same', output_padding=2, stride=1))
-    self.assertEqual(
-        7,
-        conv_utils.deconv_output_length(
-            4, 2, 'same', output_padding=1, stride=2))
-    self.assertEqual(
-        7,
-        conv_utils.deconv_output_length(
-            4, 2, 'valid', output_padding=2, stride=1))
-    self.assertEqual(
-        9,
-        conv_utils.deconv_output_length(
-            4, 2, 'valid', output_padding=1, stride=2))
-    self.assertEqual(
-        5,
-        conv_utils.deconv_output_length(
-            4, 2, 'full', output_padding=2, stride=1))
-    self.assertEqual(
-        7,
-        conv_utils.deconv_output_length(
-            4, 2, 'full', output_padding=1, stride=2))
-    self.assertEqual(
-        5,
-        conv_utils.deconv_output_length(
-            4, 2, 'same', output_padding=1, stride=1, dilation=2))
-    self.assertEqual(
-        12,
-        conv_utils.deconv_output_length(
-            4, 2, 'valid', output_padding=2, stride=2, dilation=3))
-    self.assertEqual(
-        6,
-        conv_utils.deconv_output_length(
-            4, 2, 'full', output_padding=2, stride=2, dilation=3))
-
-
-@parameterized.parameters(input_shapes)
-class TestConvUtils(test.TestCase, parameterized.TestCase):
-
-  def test_conv_kernel_mask_fc(self, *input_shape):
-    padding = 'valid'
-    kernel_shape = input_shape
-    ndims = len(input_shape)
-    strides = (1,) * ndims
-    output_shape = _get_const_output_shape(input_shape, dim=1)
-    mask = np.ones(input_shape + output_shape, np.bool)
-    self.assertAllEqual(
-        mask,
-        conv_utils.conv_kernel_mask(
-            input_shape,
-            kernel_shape,
-            strides,
-            padding
-        )
-    )
-
-  def test_conv_kernel_mask_diag(self, *input_shape):
-    ndims = len(input_shape)
-    kernel_shape = (1,) * ndims
-    strides = (1,) * ndims
-
-    for padding in ['valid', 'same']:
-      mask = np.identity(int(np.prod(input_shape)), np.bool)
-      mask = np.reshape(mask, input_shape * 2)
-      self.assertAllEqual(
-          mask,
-          conv_utils.conv_kernel_mask(
-              input_shape,
-              kernel_shape,
-              strides,
-              padding
-          )
-      )
-
-  def test_conv_kernel_mask_full_stride(self, *input_shape):
-    padding = 'valid'
-    ndims = len(input_shape)
-    kernel_shape = (1,) * ndims
-    strides = tuple([max(d, 1) for d in input_shape])
-    output_shape = _get_const_output_shape(input_shape, dim=1)
-
-    mask = np.zeros(input_shape + output_shape, np.bool)
-    if all(d > 0 for d in mask.shape):
-      mask[(0,) * len(output_shape)] = True
-
-    self.assertAllEqual(
-        mask,
-        conv_utils.conv_kernel_mask(
-            input_shape,
-            kernel_shape,
-            strides,
-            padding
-        )
-    )
-
-  def test_conv_kernel_mask_almost_full_stride(self, *input_shape):
-    padding = 'valid'
-    ndims = len(input_shape)
-    kernel_shape = (1,) * ndims
-    strides = tuple([max(d - 1, 1) for d in input_shape])
-    output_shape = _get_const_output_shape(input_shape, dim=2)
-
-    mask = np.zeros(input_shape + output_shape, np.bool)
-    if all(d > 0 for d in mask.shape):
-      for in_position in itertools.product(*[[0, d - 1] for d in input_shape]):
-        out_position = tuple([min(p, 1) for p in in_position])
-        mask[in_position + out_position] = True
-
-    self.assertAllEqual(
-        mask,
-        conv_utils.conv_kernel_mask(
-            input_shape,
-            kernel_shape,
-            strides,
-            padding
-        )
-    )
-
-  def test_conv_kernel_mask_rect_kernel(self, *input_shape):
-    padding = 'valid'
-    ndims = len(input_shape)
-    strides = (1,) * ndims
-
-    for d in range(ndims):
-      kernel_shape = [1] * ndims
-      kernel_shape[d] = input_shape[d]
-
-      output_shape = list(input_shape)
-      output_shape[d] = min(1, input_shape[d])
-
-      mask = np.identity(int(np.prod(input_shape)), np.bool)
-      mask = np.reshape(mask, input_shape * 2)
-
-      for p in itertools.product(*[range(input_shape[dim])
-                                   for dim in range(ndims)]):
-        p = list(p)
-        p[d] = slice(None)
-        mask[p * 2] = True
-
-      mask = np.take(mask, range(0, min(1, input_shape[d])), ndims + d)
-
-      self.assertAllEqual(
-          mask,
-          conv_utils.conv_kernel_mask(
-              input_shape,
-              kernel_shape,
-              strides,
-              padding
-          )
-      )
-
-  def test_conv_kernel_mask_wrong_padding(self, *input_shape):
-    ndims = len(input_shape)
-    kernel_shape = (1,) * ndims
-    strides = (1,) * ndims
-
-    conv_utils.conv_kernel_mask(
-        input_shape,
-        kernel_shape,
-        strides,
-        'valid'
-    )
-
-    conv_utils.conv_kernel_mask(
-        input_shape,
-        kernel_shape,
-        strides,
-        'same'
-    )
-
-    self.assertRaises(NotImplementedError,
-                      conv_utils.conv_kernel_mask,
-                      input_shape, kernel_shape, strides, 'full')
-
-  def test_conv_kernel_mask_wrong_dims(self, *input_shape):
-    kernel_shape = 1
-    strides = 1
-
-    conv_utils.conv_kernel_mask(
-        input_shape,
-        kernel_shape,
-        strides,
-        'valid'
-    )
-
-    ndims = len(input_shape)
-
-    kernel_shape = (2,) * (ndims + 1)
-    self.assertRaises(ValueError,
-                      conv_utils.conv_kernel_mask,
-                      input_shape, kernel_shape, strides, 'same')
-
-    strides = (1,) * ndims
-    self.assertRaises(ValueError,
-                      conv_utils.conv_kernel_mask,
-                      input_shape, kernel_shape, strides, 'valid')
-
-    kernel_shape = (1,) * ndims
-    strides = (2,) * (ndims - 1)
-    self.assertRaises(ValueError,
-                      conv_utils.conv_kernel_mask,
-                      input_shape, kernel_shape, strides, 'valid')
-
-    strides = (2,) * ndims
-    conv_utils.conv_kernel_mask(
-        input_shape,
-        kernel_shape,
-        strides,
-        'valid'
-    )
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/frozen_keras/utils/generic_utils.py b/tensorflow/python/frozen_keras/utils/generic_utils.py
deleted file mode 100644
index 5f5cca7c3fb..00000000000
--- a/tensorflow/python/frozen_keras/utils/generic_utils.py
+++ /dev/null
@@ -1,612 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Python utilities required by Keras."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import binascii
-import codecs
-import marshal
-import os
-import re
-import types as python_types
-
-import numpy as np
-import six
-
-from tensorflow.python.util import nest
-from tensorflow.python.util import tf_contextlib
-from tensorflow.python.util import tf_decorator
-from tensorflow.python.util import tf_inspect
-
-_GLOBAL_CUSTOM_OBJECTS = {}
-_GLOBAL_CUSTOM_NAMES = {}
-
-# Flag that determines whether to skip the NotImplementedError when calling
-# get_config in custom models and layers. This is only enabled when saving to
-# SavedModel, when the config isn't required.
-_SKIP_FAILED_SERIALIZATION = False
-# If a layer does not have a defined config, then the returned config will be a
-# dictionary with the below key.
-_LAYER_UNDEFINED_CONFIG_KEY = 'layer was saved without config'
-
-
-class CustomObjectScope(object):
-  """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape.
-
-  Code within a `with` statement will be able to access custom objects
-  by name. Changes to global custom objects persist
-  within the enclosing `with` statement. At end of the `with` statement,
-  global custom objects are reverted to state
-  at beginning of the `with` statement.
-
-  Example:
-
-  Consider a custom object `MyObject` (e.g. a class):
-
-  ```python
-      with CustomObjectScope({'MyObject':MyObject}):
-          layer = Dense(..., kernel_regularizer='MyObject')
-          # save, load, etc. will recognize custom object by name
-  ```
-  """
-
-  def __init__(self, *args):
-    self.custom_objects = args
-    self.backup = None
-
-  def __enter__(self):
-    self.backup = _GLOBAL_CUSTOM_OBJECTS.copy()
-    for objects in self.custom_objects:
-      _GLOBAL_CUSTOM_OBJECTS.update(objects)
-    return self
-
-  def __exit__(self, *args, **kwargs):
-    _GLOBAL_CUSTOM_OBJECTS.clear()
-    _GLOBAL_CUSTOM_OBJECTS.update(self.backup)
-
-
-def custom_object_scope(*args):
-  """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape.
-
-  Convenience wrapper for `CustomObjectScope`.
-  Code within a `with` statement will be able to access custom objects
-  by name. Changes to global custom objects persist
-  within the enclosing `with` statement. At end of the `with` statement,
-  global custom objects are reverted to state
-  at beginning of the `with` statement.
-
-  Example:
-
-  Consider a custom object `MyObject`
-
-  ```python
-      with custom_object_scope({'MyObject':MyObject}):
-          layer = Dense(..., kernel_regularizer='MyObject')
-          # save, load, etc. will recognize custom object by name
-  ```
-
-  Arguments:
-      *args: Variable length list of dictionaries of name, class pairs to add to
-        custom objects.
-
-  Returns:
-      Object of type `CustomObjectScope`.
-  """
-  return CustomObjectScope(*args)
-
-
-def get_custom_objects():
-  """Retrieves a live reference to the global dictionary of custom objects.
-
-  Updating and clearing custom objects using `custom_object_scope`
-  is preferred, but `get_custom_objects` can
-  be used to directly access `_GLOBAL_CUSTOM_OBJECTS`.
-
-  Example:
-
-  ```python
-      get_custom_objects().clear()
-      get_custom_objects()['MyObject'] = MyObject
-  ```
-
-  Returns:
-      Global dictionary of names to classes (`_GLOBAL_CUSTOM_OBJECTS`).
-  """
-  return _GLOBAL_CUSTOM_OBJECTS
-
-
-def serialize_keras_class_and_config(cls_name, cls_config):
-  """Returns the serialization of the class with the given config."""
-  return {'class_name': cls_name, 'config': cls_config}
-
-
-def register_keras_serializable(package='Custom', name=None):
-  """Registers an object with the Keras serialization framework.
-
-  This decorator injects the decorated class or function into the Keras custom
-  object dictionary, so that it can be serialized and deserialized without
-  needing an entry in the user-provided custom object dict. It also injects a
-  function that Keras will call to get the object's serializable string key.
-
-  Note that to be serialized and deserialized, classes must implement the
-  `get_config()` method. Functions do not have this requirement.
-
-  The object will be registered under the key 'package>name' where `name`,
-  defaults to the object name if not passed.
-
-  Arguments:
-    package: The package that this class belongs to.
-    name: The name to serialize this class under in this package. If None, the
-      class's name will be used.
-
-  Returns:
-    A decorator that registers the decorated class with the passed names.
-  """
-
-  def decorator(arg):
-    """Registers a class with the Keras serialization framework."""
-    class_name = name if name is not None else arg.__name__
-    registered_name = package + '>' + class_name
-
-    if tf_inspect.isclass(arg) and not hasattr(arg, 'get_config'):
-      raise ValueError(
-          'Cannot register a class that does not have a get_config() method.')
-
-    if registered_name in _GLOBAL_CUSTOM_OBJECTS:
-      raise ValueError(
-          '%s has already been registered to %s' %
-          (registered_name, _GLOBAL_CUSTOM_OBJECTS[registered_name]))
-
-    if arg in _GLOBAL_CUSTOM_NAMES:
-      raise ValueError('%s has already been registered to %s' %
-                       (arg, _GLOBAL_CUSTOM_NAMES[arg]))
-    _GLOBAL_CUSTOM_OBJECTS[registered_name] = arg
-    _GLOBAL_CUSTOM_NAMES[arg] = registered_name
-
-    return arg
-
-  return decorator
-
-
-def get_registered_name(obj):
-  """Returns the name registered to an object within the Keras framework.
-
-  This function is part of the Keras serialization and deserialization
-  framework. It maps objects to the string names associated with those objects
-  for serialization/deserialization.
-
-  Args:
-    obj: The object to look up.
-
-  Returns:
-    The name associated with the object, or the default Python name if the
-      object is not registered.
-  """
-  if obj in _GLOBAL_CUSTOM_NAMES:
-    return _GLOBAL_CUSTOM_NAMES[obj]
-  else:
-    return obj.__name__
-
-
-@tf_contextlib.contextmanager
-def skip_failed_serialization():
-  global _SKIP_FAILED_SERIALIZATION
-  prev = _SKIP_FAILED_SERIALIZATION
-  try:
-    _SKIP_FAILED_SERIALIZATION = True
-    yield
-  finally:
-    _SKIP_FAILED_SERIALIZATION = prev
-
-
-def get_registered_object(name, custom_objects=None, module_objects=None):
-  """Returns the class associated with `name` if it is registered with Keras.
-
-  This function is part of the Keras serialization and deserialization
-  framework. It maps strings to the objects associated with them for
-  serialization/deserialization.
-
-  Example:
-  ```
-  def from_config(cls, config, custom_objects=None):
-    if 'my_custom_object_name' in config:
-      config['hidden_cls'] = tf.keras.utils.get_registered_object(
-          config['my_custom_object_name'], custom_objects=custom_objects)
-  ```
-
-  Args:
-    name: The name to look up.
-    custom_objects: A dictionary of custom objects to look the name up in.
-      Generally, custom_objects is provided by the user.
-    module_objects: A dictionary of custom objects to look the name up in.
-      Generally, module_objects is provided by midlevel library implementers.
-
-  Returns:
-    An instantiable class associated with 'name', or None if no such class
-      exists.
-  """
-  if name in _GLOBAL_CUSTOM_OBJECTS:
-    return _GLOBAL_CUSTOM_OBJECTS[name]
-  elif custom_objects and name in custom_objects:
-    return custom_objects[name]
-  elif module_objects and name in module_objects:
-    return module_objects[name]
-  return None
-
-
-def serialize_keras_object(instance):
-  """Serialize Keras object into JSON."""
-  _, instance = tf_decorator.unwrap(instance)
-  if instance is None:
-    return None
-
-  if hasattr(instance, 'get_config'):
-    name = get_registered_name(instance.__class__)
-    try:
-      config = instance.get_config()
-    except NotImplementedError as e:
-      if _SKIP_FAILED_SERIALIZATION:
-        return serialize_keras_class_and_config(
-            name, {_LAYER_UNDEFINED_CONFIG_KEY: True})
-      raise e
-    serialization_config = {}
-    for key, item in config.items():
-      if isinstance(item, six.string_types):
-        serialization_config[key] = item
-        continue
-
-      # Any object of a different type needs to be converted to string or dict
-      # for serialization (e.g. custom functions, custom classes)
-      try:
-        serialized_item = serialize_keras_object(item)
-        if isinstance(serialized_item, dict) and not isinstance(item, dict):
-          serialized_item['__passive_serialization__'] = True
-        serialization_config[key] = serialized_item
-      except ValueError:
-        serialization_config[key] = item
-
-    name = get_registered_name(instance.__class__)
-    return serialize_keras_class_and_config(name, serialization_config)
-  if hasattr(instance, '__name__'):
-    return get_registered_name(instance)
-  raise ValueError('Cannot serialize', instance)
-
-
-def get_custom_objects_by_name(item, custom_objects=None):
-  """Returns the item if it is in either local or global custom objects."""
-  if item in _GLOBAL_CUSTOM_OBJECTS:
-    return _GLOBAL_CUSTOM_OBJECTS[item]
-  elif custom_objects and item in custom_objects:
-    return custom_objects[item]
-  return None
-
-
-def class_and_config_for_serialized_keras_object(
-    config,
-    module_objects=None,
-    custom_objects=None,
-    printable_module_name='object'):
-  """Returns the class name and config for a serialized keras object."""
-  if (not isinstance(config, dict) or 'class_name' not in config or
-      'config' not in config):
-    raise ValueError('Improper config format: ' + str(config))
-
-  class_name = config['class_name']
-  cls = get_registered_object(class_name, custom_objects, module_objects)
-  if cls is None:
-    raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
-
-  cls_config = config['config']
-  deserialized_objects = {}
-  for key, item in cls_config.items():
-    if isinstance(item, dict) and '__passive_serialization__' in item:
-      deserialized_objects[key] = deserialize_keras_object(
-          item,
-          module_objects=module_objects,
-          custom_objects=custom_objects,
-          printable_module_name='config_item')
-    # TODO(momernick): Should this also have 'module_objects'?
-    elif (isinstance(item, six.string_types) and
-          tf_inspect.isfunction(get_registered_object(item, custom_objects))):
-      # Handle custom functions here. When saving functions, we only save the
-      # function's name as a string. If we find a matching string in the custom
-      # objects during deserialization, we convert the string back to the
-      # original function.
-      # Note that a potential issue is that a string field could have a naming
-      # conflict with a custom function name, but this should be a rare case.
-      # This issue does not occur if a string field has a naming conflict with
-      # a custom object, since the config of an object will always be a dict.
-      deserialized_objects[key] = get_registered_object(item, custom_objects)
-  for key, item in deserialized_objects.items():
-    cls_config[key] = deserialized_objects[key]
-
-  return (cls, cls_config)
-
-
-def deserialize_keras_object(identifier,
-                             module_objects=None,
-                             custom_objects=None,
-                             printable_module_name='object'):
-  if identifier is None:
-    return None
-
-  if isinstance(identifier, dict):
-    # In this case we are dealing with a Keras config dictionary.
-    config = identifier
-    (cls, cls_config) = class_and_config_for_serialized_keras_object(
-        config, module_objects, custom_objects, printable_module_name)
-
-    if hasattr(cls, 'from_config'):
-      arg_spec = tf_inspect.getfullargspec(cls.from_config)
-      custom_objects = custom_objects or {}
-
-      if 'custom_objects' in arg_spec.args:
-        return cls.from_config(
-            cls_config,
-            custom_objects=dict(
-                list(_GLOBAL_CUSTOM_OBJECTS.items()) +
-                list(custom_objects.items())))
-      with CustomObjectScope(custom_objects):
-        return cls.from_config(cls_config)
-    else:
-      # Then `cls` may be a function returning a class.
-      # in this case by convention `config` holds
-      # the kwargs of the function.
-      custom_objects = custom_objects or {}
-      with CustomObjectScope(custom_objects):
-        return cls(**cls_config)
-  elif isinstance(identifier, six.string_types):
-    object_name = identifier
-    if custom_objects and object_name in custom_objects:
-      obj = custom_objects.get(object_name)
-    elif object_name in _GLOBAL_CUSTOM_OBJECTS:
-      obj = _GLOBAL_CUSTOM_OBJECTS[object_name]
-    else:
-      obj = module_objects.get(object_name)
-      if obj is None:
-        raise ValueError('Unknown ' + printable_module_name + ':' + object_name)
-    # Classes passed by name are instantiated with no args, functions are
-    # returned as-is.
-    if tf_inspect.isclass(obj):
-      return obj()
-    return obj
-  elif tf_inspect.isfunction(identifier):
-    # If a function has already been deserialized, return as is.
-    return identifier
-  else:
-    raise ValueError('Could not interpret serialized %s: %s' %
-                     (printable_module_name, identifier))
-
-
-def func_dump(func):
-  """Serializes a user defined function.
-
-  Arguments:
-      func: the function to serialize.
-
-  Returns:
-      A tuple `(code, defaults, closure)`.
-  """
-  if os.name == 'nt':
-    raw_code = marshal.dumps(func.__code__).replace(b'\\', b'/')
-    code = codecs.encode(raw_code, 'base64').decode('ascii')
-  else:
-    raw_code = marshal.dumps(func.__code__)
-    code = codecs.encode(raw_code, 'base64').decode('ascii')
-  defaults = func.__defaults__
-  if func.__closure__:
-    closure = tuple(c.cell_contents for c in func.__closure__)
-  else:
-    closure = None
-  return code, defaults, closure
-
-
-def func_load(code, defaults=None, closure=None, globs=None):
-  """Deserializes a user defined function.
-
-  Arguments:
-      code: bytecode of the function.
-      defaults: defaults of the function.
-      closure: closure of the function.
-      globs: dictionary of global objects.
-
-  Returns:
-      A function object.
-  """
-  if isinstance(code, (tuple, list)):  # unpack previous dump
-    code, defaults, closure = code
-    if isinstance(defaults, list):
-      defaults = tuple(defaults)
-
-  def ensure_value_to_cell(value):
-    """Ensures that a value is converted to a python cell object.
-
-    Arguments:
-        value: Any value that needs to be casted to the cell type
-
-    Returns:
-        A value wrapped as a cell object (see function "func_load")
-    """
-
-    def dummy_fn():
-      # pylint: disable=pointless-statement
-      value  # just access it so it gets captured in .__closure__
-
-    cell_value = dummy_fn.__closure__[0]
-    if not isinstance(value, type(cell_value)):
-      return cell_value
-    return value
-
-  if closure is not None:
-    closure = tuple(ensure_value_to_cell(_) for _ in closure)
-  try:
-    raw_code = codecs.decode(code.encode('ascii'), 'base64')
-  except (UnicodeEncodeError, binascii.Error):
-    raw_code = code.encode('raw_unicode_escape')
-  code = marshal.loads(raw_code)
-  if globs is None:
-    globs = globals()
-  return python_types.FunctionType(
-      code, globs, name=code.co_name, argdefs=defaults, closure=closure)
-
-
-def has_arg(fn, name, accept_all=False):
-  """Checks if a callable accepts a given keyword argument.
-
-  Arguments:
-      fn: Callable to inspect.
-      name: Check if `fn` can be called with `name` as a keyword argument.
-      accept_all: What to return if there is no parameter called `name` but the
-        function accepts a `**kwargs` argument.
-
-  Returns:
-      bool, whether `fn` accepts a `name` keyword argument.
-  """
-  arg_spec = tf_inspect.getfullargspec(fn)
-  if accept_all and arg_spec.varkw is not None:
-    return True
-  return name in arg_spec.args
-
-
-def make_batches(size, batch_size):
-  """Returns a list of batch indices (tuples of indices).
-
-  Arguments:
-      size: Integer, total size of the data to slice into batches.
-      batch_size: Integer, batch size.
-
-  Returns:
-      A list of tuples of array indices.
-  """
-  num_batches = int(np.ceil(size / float(batch_size)))
-  return [(i * batch_size, min(size, (i + 1) * batch_size))
-          for i in range(0, num_batches)]
-
-
-def slice_arrays(arrays, start=None, stop=None):
-  """Slice an array or list of arrays.
-
-  This takes an array-like, or a list of
-  array-likes, and outputs:
-      - arrays[start:stop] if `arrays` is an array-like
-      - [x[start:stop] for x in arrays] if `arrays` is a list
-
-  Can also work on list/array of indices: `slice_arrays(x, indices)`
-
-  Arguments:
-      arrays: Single array or list of arrays.
-      start: can be an integer index (start index) or a list/array of indices
-      stop: integer (stop index); should be None if `start` was a list.
-
-  Returns:
-      A slice of the array(s).
-
-  Raises:
-      ValueError: If the value of start is a list and stop is not None.
-  """
-  if arrays is None:
-    return [None]
-  if isinstance(start, list) and stop is not None:
-    raise ValueError('The stop argument has to be None if the value of start '
-                     'is a list.')
-  elif isinstance(arrays, list):
-    if hasattr(start, '__len__'):
-      # hdf5 datasets only support list objects as indices
-      if hasattr(start, 'shape'):
-        start = start.tolist()
-      return [None if x is None else x[start] for x in arrays]
-    return [
-        None if x is None else
-        None if not hasattr(x, '__getitem__') else x[start:stop] for x in arrays
-    ]
-  else:
-    if hasattr(start, '__len__'):
-      if hasattr(start, 'shape'):
-        start = start.tolist()
-      return arrays[start]
-    if hasattr(start, '__getitem__'):
-      return arrays[start:stop]
-    return [None]
-
-
-def to_list(x):
-  """Normalizes a list/tensor into a list.
-
-  If a tensor is passed, we return
-  a list of size 1 containing the tensor.
-
-  Arguments:
-      x: target object to be normalized.
-
-  Returns:
-      A list.
-  """
-  if isinstance(x, list):
-    return x
-  return [x]
-
-
-def to_snake_case(name):
-  intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name)
-  insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower()
-  # If the class is private the name starts with "_" which is not secure
-  # for creating scopes. We prefix the name with "private" in this case.
-  if insecure[0] != '_':
-    return insecure
-  return 'private' + insecure
-
-
-def is_all_none(structure):
-  iterable = nest.flatten(structure)
-  # We cannot use Python's `any` because the iterable may return Tensors.
-  for element in iterable:
-    if element is not None:
-      return False
-  return True
-
-
-def check_for_unexpected_keys(name, input_dict, expected_values):
-  unknown = set(input_dict.keys()).difference(expected_values)
-  if unknown:
-    raise ValueError('Unknown entries in {} dictionary: {}. Only expected '
-                     'following keys: {}'.format(name, list(unknown),
-                                                 expected_values))
-
-
-def validate_kwargs(kwargs,
-                    allowed_kwargs,
-                    error_message='Keyword argument not understood:'):
-  """Checks that all keyword arguments are in the set of allowed keys."""
-  for kwarg in kwargs:
-    if kwarg not in allowed_kwargs:
-      raise TypeError(error_message, kwarg)
-
-
-def validate_config(config):
-  """Determines whether config appears to be a valid layer config."""
-  return isinstance(config, dict) and _LAYER_UNDEFINED_CONFIG_KEY not in config
-
-
-def default(method):
-  """Decorates a method to detect overrides in subclasses."""
-  method._is_default = True  # pylint: disable=protected-access
-  return method
-
-
-def is_default(method):
-  """Check if a method is decorated with the `default` wrapper."""
-  return getattr(method, '_is_default', False)
diff --git a/tensorflow/python/frozen_keras/utils/generic_utils_test.py b/tensorflow/python/frozen_keras/utils/generic_utils_test.py
deleted file mode 100644
index 7bc93ba0353..00000000000
--- a/tensorflow/python/frozen_keras/utils/generic_utils_test.py
+++ /dev/null
@@ -1,321 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras generic Python utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python import keras
-from tensorflow.python.frozen_keras import regularizers
-from tensorflow.python.frozen_keras.utils import generic_utils
-from tensorflow.python.platform import test
-
-
-class HasArgTest(test.TestCase):
-
-  def test_has_arg(self):
-
-    def f_x(x):
-      return x
-
-    def f_x_args(x, *args):
-      _ = args
-      return x
-
-    def f_x_kwargs(x, **kwargs):
-      _ = kwargs
-      return x
-
-    self.assertTrue(generic_utils.has_arg(
-        f_x, 'x', accept_all=False))
-    self.assertFalse(generic_utils.has_arg(
-        f_x, 'y', accept_all=False))
-    self.assertTrue(generic_utils.has_arg(
-        f_x_args, 'x', accept_all=False))
-    self.assertFalse(generic_utils.has_arg(
-        f_x_args, 'y', accept_all=False))
-    self.assertTrue(generic_utils.has_arg(
-        f_x_kwargs, 'x', accept_all=False))
-    self.assertFalse(generic_utils.has_arg(
-        f_x_kwargs, 'y', accept_all=False))
-    self.assertTrue(generic_utils.has_arg(
-        f_x_kwargs, 'y', accept_all=True))
-
-
-class TestCustomObjectScope(test.TestCase):
-
-  def test_custom_object_scope(self):
-
-    def custom_fn():
-      pass
-
-    class CustomClass(object):
-      pass
-
-    with generic_utils.custom_object_scope(
-        {'CustomClass': CustomClass, 'custom_fn': custom_fn}):
-      # Disable activation test since its not under frozen_keras package.
-      # act = keras.activations.get('custom_fn')
-      # self.assertEqual(act, custom_fn)
-      cl = regularizers.get('CustomClass')
-      self.assertEqual(cl.__class__, CustomClass)
-
-
-class SerializeKerasObjectTest(test.TestCase):
-
-  def test_serialize_none(self):
-    serialized = generic_utils.serialize_keras_object(None)
-    self.assertEqual(serialized, None)
-    deserialized = generic_utils.deserialize_keras_object(
-        serialized)
-    self.assertEqual(deserialized, None)
-
-  def test_serialize_custom_class_with_default_name(self):
-
-    @generic_utils.register_keras_serializable()
-    class TestClass(object):
-
-      def __init__(self, value):
-        self._value = value
-
-      def get_config(self):
-        return {'value': self._value}
-
-    serialized_name = 'Custom>TestClass'
-    inst = TestClass(value=10)
-    class_name = generic_utils._GLOBAL_CUSTOM_NAMES[TestClass]
-    self.assertEqual(serialized_name, class_name)
-    config = generic_utils.serialize_keras_object(inst)
-    self.assertEqual(class_name, config['class_name'])
-    new_inst = generic_utils.deserialize_keras_object(config)
-    self.assertIsNot(inst, new_inst)
-    self.assertIsInstance(new_inst, TestClass)
-    self.assertEqual(10, new_inst._value)
-
-    # Make sure registering a new class with same name will fail.
-    with self.assertRaisesRegex(ValueError, '.*has already been registered.*'):
-      @generic_utils.register_keras_serializable()  # pylint: disable=function-redefined
-      class TestClass(object):
-
-        def __init__(self, value):
-          self._value = value
-
-        def get_config(self):
-          return {'value': self._value}
-
-  def test_serialize_custom_class_with_custom_name(self):
-
-    @generic_utils.register_keras_serializable(
-        'TestPackage', 'CustomName')
-    class OtherTestClass(object):
-
-      def __init__(self, val):
-        self._val = val
-
-      def get_config(self):
-        return {'val': self._val}
-
-    serialized_name = 'TestPackage>CustomName'
-    inst = OtherTestClass(val=5)
-    class_name = generic_utils._GLOBAL_CUSTOM_NAMES[OtherTestClass]
-    self.assertEqual(serialized_name, class_name)
-    fn_class_name = generic_utils.get_registered_name(
-        OtherTestClass)
-    self.assertEqual(fn_class_name, class_name)
-
-    cls = generic_utils.get_registered_object(fn_class_name)
-    self.assertEqual(OtherTestClass, cls)
-
-    config = generic_utils.serialize_keras_object(inst)
-    self.assertEqual(class_name, config['class_name'])
-    new_inst = generic_utils.deserialize_keras_object(config)
-    self.assertIsNot(inst, new_inst)
-    self.assertIsInstance(new_inst, OtherTestClass)
-    self.assertEqual(5, new_inst._val)
-
-  def test_serialize_custom_function(self):
-
-    @generic_utils.register_keras_serializable()
-    def my_fn():
-      return 42
-
-    serialized_name = 'Custom>my_fn'
-    class_name = generic_utils._GLOBAL_CUSTOM_NAMES[my_fn]
-    self.assertEqual(serialized_name, class_name)
-    fn_class_name = generic_utils.get_registered_name(my_fn)
-    self.assertEqual(fn_class_name, class_name)
-
-    config = generic_utils.serialize_keras_object(my_fn)
-    self.assertEqual(class_name, config)
-    fn = generic_utils.deserialize_keras_object(config)
-    self.assertEqual(42, fn())
-
-    fn_2 = generic_utils.get_registered_object(fn_class_name)
-    self.assertEqual(42, fn_2())
-
-  def test_serialize_custom_class_without_get_config_fails(self):
-
-    with self.assertRaisesRegex(
-        ValueError, 'Cannot register a class that does '
-        'not have a get_config.*'):
-
-      @generic_utils.register_keras_serializable(  # pylint: disable=unused-variable
-          'TestPackage', 'TestClass')
-      class TestClass(object):
-
-        def __init__(self, value):
-          self._value = value
-
-  def test_serializable_object(self):
-
-    class SerializableInt(int):
-      """A serializable object to pass out of a test layer's config."""
-
-      def __new__(cls, value):
-        return int.__new__(cls, value)
-
-      def get_config(self):
-        return {'value': int(self)}
-
-      @classmethod
-      def from_config(cls, config):
-        return cls(**config)
-
-    layer = keras.layers.Dense(
-        SerializableInt(3),
-        activation='relu',
-        kernel_initializer='ones',
-        bias_regularizer='l2')
-    config = keras.layers.serialize(layer)
-    new_layer = keras.layers.deserialize(
-        config, custom_objects={'SerializableInt': SerializableInt})
-    self.assertEqual(new_layer.activation, keras.activations.relu)
-    self.assertEqual(new_layer.bias_regularizer.__class__,
-                     keras.regularizers.L1L2)
-    self.assertEqual(new_layer.units.__class__, SerializableInt)
-    self.assertEqual(new_layer.units, 3)
-
-  def test_nested_serializable_object(self):
-    class SerializableInt(int):
-      """A serializable object to pass out of a test layer's config."""
-
-      def __new__(cls, value):
-        return int.__new__(cls, value)
-
-      def get_config(self):
-        return {'value': int(self)}
-
-      @classmethod
-      def from_config(cls, config):
-        return cls(**config)
-
-    class SerializableNestedInt(int):
-      """A serializable object containing another serializable object."""
-
-      def __new__(cls, value, int_obj):
-        obj = int.__new__(cls, value)
-        obj.int_obj = int_obj
-        return obj
-
-      def get_config(self):
-        return {'value': int(self), 'int_obj': self.int_obj}
-
-      @classmethod
-      def from_config(cls, config):
-        return cls(**config)
-
-    nested_int = SerializableInt(4)
-    layer = keras.layers.Dense(
-        SerializableNestedInt(3, nested_int),
-        name='SerializableNestedInt',
-        activation='relu',
-        kernel_initializer='ones',
-        bias_regularizer='l2')
-    config = keras.layers.serialize(layer)
-    new_layer = keras.layers.deserialize(
-        config,
-        custom_objects={
-            'SerializableInt': SerializableInt,
-            'SerializableNestedInt': SerializableNestedInt
-        })
-    # Make sure the string field doesn't get convert to custom object, even
-    # they have same value.
-    self.assertEqual(new_layer.name, 'SerializableNestedInt')
-    self.assertEqual(new_layer.activation, keras.activations.relu)
-    self.assertEqual(new_layer.bias_regularizer.__class__,
-                     keras.regularizers.L1L2)
-    self.assertEqual(new_layer.units.__class__, SerializableNestedInt)
-    self.assertEqual(new_layer.units, 3)
-    self.assertEqual(new_layer.units.int_obj.__class__, SerializableInt)
-    self.assertEqual(new_layer.units.int_obj, 4)
-
-  def test_nested_serializable_fn(self):
-
-    def serializable_fn(x):
-      """A serializable function to pass out of a test layer's config."""
-      return x
-
-    class SerializableNestedInt(int):
-      """A serializable object containing a serializable function."""
-
-      def __new__(cls, value, fn):
-        obj = int.__new__(cls, value)
-        obj.fn = fn
-        return obj
-
-      def get_config(self):
-        return {'value': int(self), 'fn': self.fn}
-
-      @classmethod
-      def from_config(cls, config):
-        return cls(**config)
-
-    layer = keras.layers.Dense(
-        SerializableNestedInt(3, serializable_fn),
-        activation='relu',
-        kernel_initializer='ones',
-        bias_regularizer='l2')
-    config = keras.layers.serialize(layer)
-    new_layer = keras.layers.deserialize(
-        config,
-        custom_objects={
-            'serializable_fn': serializable_fn,
-            'SerializableNestedInt': SerializableNestedInt
-        })
-    self.assertEqual(new_layer.activation, keras.activations.relu)
-    self.assertIsInstance(new_layer.bias_regularizer, keras.regularizers.L1L2)
-    self.assertIsInstance(new_layer.units, SerializableNestedInt)
-    self.assertEqual(new_layer.units, 3)
-    self.assertIs(new_layer.units.fn, serializable_fn)
-
-
-class SliceArraysTest(test.TestCase):
-
-  def test_slice_arrays(self):
-    input_a = list([1, 2, 3])
-    self.assertEqual(
-        generic_utils.slice_arrays(input_a, start=0),
-        [None, None, None])
-    self.assertEqual(
-        generic_utils.slice_arrays(input_a, stop=3),
-        [None, None, None])
-    self.assertEqual(
-        generic_utils.slice_arrays(input_a, start=0, stop=1),
-        [None, None, None])
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/frozen_keras/utils/layer_utils.py b/tensorflow/python/frozen_keras/utils/layer_utils.py
deleted file mode 100644
index 5540acb8690..00000000000
--- a/tensorflow/python/frozen_keras/utils/layer_utils.py
+++ /dev/null
@@ -1,403 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=protected-access
-"""Utilities related to layer/model functionality.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import six
-
-from tensorflow.python.frozen_keras import backend as K
-from tensorflow.python.frozen_keras.utils.conv_utils import convert_kernel
-from tensorflow.python.util import deprecation
-from tensorflow.python.util import nest
-from tensorflow.python.util import object_identity
-
-
-def get_source_inputs(tensor, layer=None, node_index=None):
-  """Returns the list of input tensors necessary to compute `tensor`.
-
-  Output will always be a list of tensors
-  (potentially with 1 element).
-
-  Arguments:
-      tensor: The tensor to start from.
-      layer: Origin layer of the tensor. Will be
-          determined via tensor._keras_history if not provided.
-      node_index: Origin node index of the tensor.
-
-  Returns:
-      List of input tensors.
-  """
-  if not hasattr(tensor, '_keras_history'):
-    return tensor
-
-  if layer is None or node_index:
-    layer, node_index, _ = tensor._keras_history
-  if not layer._inbound_nodes:
-    return [tensor]
-  else:
-    node = layer._inbound_nodes[node_index]
-    if not node.inbound_layers:
-      # Reached an Input layer, stop recursion.
-      return nest.flatten(node.input_tensors)
-    else:
-      source_tensors = []
-      for layer, node_index, _, tensor in node.iterate_inbound():
-        previous_sources = get_source_inputs(tensor, layer, node_index)
-        # Avoid input redundancy.
-        for x in previous_sources:
-          if all(x is not t for t in source_tensors):
-            source_tensors.append(x)
-      return source_tensors
-
-
-def validate_string_arg(input_data,
-                        allowable_strings,
-                        layer_name,
-                        arg_name,
-                        allow_none=False,
-                        allow_callables=False):
-  """Validates the correctness of a string-based arg."""
-  if allow_none and input_data is None:
-    return
-  elif allow_callables and callable(input_data):
-    return
-  elif isinstance(input_data,
-                  six.string_types) and input_data in allowable_strings:
-    return
-  else:
-    allowed_args = '`None`, ' if allow_none else ''
-    allowed_args += 'a `Callable`, ' if allow_callables else ''
-    allowed_args += 'or one of the following values: %s' % (allowable_strings,)
-    raise ValueError(("%s's %s arg received an invalid value %s. " +
-                      'Allowed values are %s.') %
-                     (layer_name, arg_name, input_data, allowed_args))
-
-
-def count_params(weights):
-  """Count the total number of scalars composing the weights.
-
-  Arguments:
-      weights: An iterable containing the weights on which to compute params
-
-  Returns:
-      The total number of scalars composing the weights
-  """
-  unique_weights = object_identity.ObjectIdentitySet(weights)
-  weight_shapes = [w.shape.as_list() for w in unique_weights]
-  standardized_weight_shapes = [
-      [0 if w_i is None else w_i for w_i in w] for w in weight_shapes
-  ]
-  return int(sum(np.prod(p) for p in standardized_weight_shapes))
-
-
-def print_summary(model, line_length=None, positions=None, print_fn=None):
-  """Prints a summary of a model.
-
-  Arguments:
-      model: Keras model instance.
-      line_length: Total length of printed lines
-          (e.g. set this to adapt the display to different
-          terminal window sizes).
-      positions: Relative or absolute positions of log elements in each line.
-          If not provided, defaults to `[.33, .55, .67, 1.]`.
-      print_fn: Print function to use.
-          It will be called on each line of the summary.
-          You can set it to a custom function
-          in order to capture the string summary.
-          It defaults to `print` (prints to stdout).
-  """
-  if print_fn is None:
-    print_fn = print
-
-  if model.__class__.__name__ == 'Sequential':
-    sequential_like = True
-  elif not model._is_graph_network:
-    # We treat subclassed models as a simple sequence of layers, for logging
-    # purposes.
-    sequential_like = True
-  else:
-    sequential_like = True
-    nodes_by_depth = model._nodes_by_depth.values()
-    nodes = []
-    for v in nodes_by_depth:
-      if (len(v) > 1) or (len(v) == 1 and
-                          len(nest.flatten(v[0].inbound_layers)) > 1):
-        # if the model has multiple nodes
-        # or if the nodes have multiple inbound_layers
-        # the model is no longer sequential
-        sequential_like = False
-        break
-      nodes += v
-    if sequential_like:
-      # search for shared layers
-      for layer in model.layers:
-        flag = False
-        for node in layer._inbound_nodes:
-          if node in nodes:
-            if flag:
-              sequential_like = False
-              break
-            else:
-              flag = True
-        if not sequential_like:
-          break
-
-  if sequential_like:
-    line_length = line_length or 65
-    positions = positions or [.45, .85, 1.]
-    if positions[-1] <= 1:
-      positions = [int(line_length * p) for p in positions]
-    # header names for the different log elements
-    to_display = ['Layer (type)', 'Output Shape', 'Param #']
-  else:
-    line_length = line_length or 98
-    positions = positions or [.33, .55, .67, 1.]
-    if positions[-1] <= 1:
-      positions = [int(line_length * p) for p in positions]
-    # header names for the different log elements
-    to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Connected to']
-    relevant_nodes = []
-    for v in model._nodes_by_depth.values():
-      relevant_nodes += v
-
-  def print_row(fields, positions):
-    line = ''
-    for i in range(len(fields)):
-      if i > 0:
-        line = line[:-1] + ' '
-      line += str(fields[i])
-      line = line[:positions[i]]
-      line += ' ' * (positions[i] - len(line))
-    print_fn(line)
-
-  print_fn('Model: "{}"'.format(model.name))
-  print_fn('_' * line_length)
-  print_row(to_display, positions)
-  print_fn('=' * line_length)
-
-  def print_layer_summary(layer):
-    """Prints a summary for a single layer.
-
-    Arguments:
-        layer: target layer.
-    """
-    try:
-      output_shape = layer.output_shape
-    except AttributeError:
-      output_shape = 'multiple'
-    except RuntimeError:  # output_shape unknown in Eager mode.
-      output_shape = '?'
-    name = layer.name
-    cls_name = layer.__class__.__name__
-    fields = [name + ' (' + cls_name + ')', output_shape, layer.count_params()]
-    print_row(fields, positions)
-
-  def print_layer_summary_with_connections(layer):
-    """Prints a summary for a single layer (including topological connections).
-
-    Arguments:
-        layer: target layer.
-    """
-    try:
-      output_shape = layer.output_shape
-    except AttributeError:
-      output_shape = 'multiple'
-    connections = []
-    for node in layer._inbound_nodes:
-      if relevant_nodes and node not in relevant_nodes:
-        # node is not part of the current network
-        continue
-
-      for inbound_layer, node_index, tensor_index, _ in node.iterate_inbound():
-        connections.append('{}[{}][{}]'.format(inbound_layer.name, node_index,
-                                               tensor_index))
-
-    name = layer.name
-    cls_name = layer.__class__.__name__
-    if not connections:
-      first_connection = ''
-    else:
-      first_connection = connections[0]
-    fields = [
-        name + ' (' + cls_name + ')', output_shape,
-        layer.count_params(), first_connection
-    ]
-    print_row(fields, positions)
-    if len(connections) > 1:
-      for i in range(1, len(connections)):
-        fields = ['', '', '', connections[i]]
-        print_row(fields, positions)
-
-  layers = model.layers
-  for i in range(len(layers)):
-    if sequential_like:
-      print_layer_summary(layers[i])
-    else:
-      print_layer_summary_with_connections(layers[i])
-    if i == len(layers) - 1:
-      print_fn('=' * line_length)
-    else:
-      print_fn('_' * line_length)
-
-  if hasattr(model, '_collected_trainable_weights'):
-    trainable_count = count_params(model._collected_trainable_weights)
-  else:
-    trainable_count = count_params(model.trainable_weights)
-
-  non_trainable_count = count_params(model.non_trainable_weights)
-
-  print_fn('Total params: {:,}'.format(trainable_count + non_trainable_count))
-  print_fn('Trainable params: {:,}'.format(trainable_count))
-  print_fn('Non-trainable params: {:,}'.format(non_trainable_count))
-  print_fn('_' * line_length)
-
-
-def gather_trainable_weights(trainable, sub_layers, extra_variables):
-  """Lists the trainable weights for an object with sub-layers.
-
-  Args:
-    trainable: Whether the object collecting the variables is trainable.
-    sub_layers: A flat list of Layer objects owned by this object, to collect
-      variables from.
-    extra_variables: Any extra variables to include. Their `.trainable` property
-      is used to categorize them.
-
-  Returns:
-    A list of collected trainable weights/variables.
-  """
-  if not trainable:
-    return []
-  weights = []
-  for layer in sub_layers:
-    weights += layer.trainable_weights
-  trainable_extra_variables = [
-      v for v in extra_variables if v.trainable]
-  return weights + trainable_extra_variables
-
-
-def gather_non_trainable_weights(trainable, sub_layers, extra_variables):
-  """Lists the non-trainable weights for an object with sub-layers.
-
-  Args:
-    trainable: Whether the object collecting the variables is trainable.
-    sub_layers: A flat list of Layer objects owned by this object, to collect
-      variables from.
-    extra_variables: Any extra variables to include. Their `.trainable` property
-      is used to categorize them.
-
-  Returns:
-    A list of collected non-trainable weights/variables.
-  """
-  trainable_extra_variables = []
-  non_trainable_extra_variables = []
-  for v in extra_variables:
-    if v.trainable:
-      trainable_extra_variables.append(v)
-    else:
-      non_trainable_extra_variables.append(v)
-  weights = []
-  for layer in sub_layers:
-    weights += layer.non_trainable_weights
-  if not trainable:
-    trainable_weights = []
-    for layer in sub_layers:
-      trainable_weights += layer.trainable_weights
-    return (trainable_weights + trainable_extra_variables
-            + weights + non_trainable_extra_variables)
-  return weights + non_trainable_extra_variables
-
-
-@deprecation.deprecated('2020-06-23',
-                        'The Theano kernel format is legacy; '
-                        'this utility will be removed.')
-def convert_all_kernels_in_model(model):
-  """Converts all convolution kernels in a model from Theano to TensorFlow.
-
-  Also works from TensorFlow to Theano.
-
-  This is used for converting legacy Theano-saved model files.
-
-  Arguments:
-      model: target model for the conversion.
-  """
-  # Note: SeparableConvolution not included
-  # since only supported by TF.
-  conv_classes = {
-      'Conv1D',
-      'Conv2D',
-      'Conv3D',
-      'Conv2DTranspose',
-  }
-  to_assign = []
-  for layer in model.layers:
-    if layer.__class__.__name__ in conv_classes:
-      original_kernel = K.get_value(layer.kernel)
-      converted_kernel = convert_kernel(original_kernel)
-      to_assign.append((layer.kernel, converted_kernel))
-  K.batch_set_value(to_assign)
-
-
-def convert_dense_weights_data_format(dense,
-                                      previous_feature_map_shape,
-                                      target_data_format='channels_first'):
-  """Utility useful when changing a convnet's `data_format`.
-
-  When porting the weights of a convnet from one data format to the other,
-  if the convnet includes a `Flatten` layer
-  (applied to the last convolutional feature map)
-  followed by a `Dense` layer, the weights of that `Dense` layer
-  should be updated to reflect the new dimension ordering.
-
-  Arguments:
-      dense: The target `Dense` layer.
-      previous_feature_map_shape: A shape tuple of 3 integers,
-          e.g. `(512, 7, 7)`. The shape of the convolutional
-          feature map right before the `Flatten` layer that
-          came before the target `Dense` layer.
-      target_data_format: One of "channels_last", "channels_first".
-          Set it "channels_last"
-          if converting a "channels_first" model to "channels_last",
-          or reciprocally.
-  """
-  assert target_data_format in {'channels_last', 'channels_first'}
-  kernel, bias = dense.get_weights()
-  for i in range(kernel.shape[1]):
-    if target_data_format == 'channels_first':
-      c, h, w = previous_feature_map_shape
-      original_fm_shape = (h, w, c)
-      ki = kernel[:, i].reshape(original_fm_shape)
-      ki = np.transpose(ki, (2, 0, 1))  # last -> first
-    else:
-      h, w, c = previous_feature_map_shape
-      original_fm_shape = (c, h, w)
-      ki = kernel[:, i].reshape(original_fm_shape)
-      ki = np.transpose(ki, (1, 2, 0))  # first -> last
-    kernel[:, i] = np.reshape(ki, (np.prod(previous_feature_map_shape),))
-  dense.set_weights([kernel, bias])
-
-
-def is_builtin_layer(layer):
-  if not getattr(layer, '_keras_api_names', None):
-    return False
-
-  # Subclasses of `Layer` that are not exported inherit the export name
-  # of the base layer class.
-  return (layer._keras_api_names != ('keras.layers.Layer',) and
-          layer._keras_api_names_v1 != ('keras.layers.Layer',))
diff --git a/tensorflow/python/frozen_keras/utils/tf_utils.py b/tensorflow/python/frozen_keras/utils/tf_utils.py
deleted file mode 100644
index 46ddb5b204b..00000000000
--- a/tensorflow/python/frozen_keras/utils/tf_utils.py
+++ /dev/null
@@ -1,524 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TensorFlow-related utilities."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import numpy as np
-import six
-
-from tensorflow.python.data.experimental.ops import cardinality
-from tensorflow.python.eager import context
-from tensorflow.python.framework import composite_tensor
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import smart_cond as smart_module
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.framework import type_spec
-from tensorflow.python.frozen_keras import backend as K
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.util import nest
-from tensorflow.python.util import object_identity
-from tensorflow.python.util import tf_contextlib
-
-
-def smart_cond(pred, true_fn=None, false_fn=None, name=None):
-  """Return either `true_fn()` if predicate `pred` is true else `false_fn()`.
-
-  If `pred` is a bool or has a constant value, we return either `true_fn()`
-  or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both.
-
-  Arguments:
-    pred: A scalar determining whether to return the result of `true_fn` or
-      `false_fn`.
-    true_fn: The callable to be performed if pred is true.
-    false_fn: The callable to be performed if pred is false.
-    name: Optional name prefix when using `tf.cond`.
-
-  Returns:
-    Tensors returned by the call to either `true_fn` or `false_fn`.
-
-  Raises:
-    TypeError: If `true_fn` or `false_fn` is not callable.
-  """
-  if isinstance(pred, variables.Variable):
-    return control_flow_ops.cond(
-        pred, true_fn=true_fn, false_fn=false_fn, name=name)
-  return smart_module.smart_cond(
-      pred, true_fn=true_fn, false_fn=false_fn, name=name)
-
-
-def constant_value(pred):
-  """Return the bool value for `pred`, or None if `pred` had a dynamic value.
-
-  Arguments:
-    pred: A scalar, either a Python bool or a TensorFlow boolean variable
-      or tensor, or the Python integer 1 or 0.
-
-  Returns:
-    True or False if `pred` has a constant boolean value, None otherwise.
-
-  Raises:
-    TypeError: If `pred` is not a Variable, Tensor or bool, or Python
-      integer 1 or 0.
-  """
-  # Allow integer booleans.
-  if isinstance(pred, int):
-    if pred == 1:
-      pred = True
-    elif pred == 0:
-      pred = False
-
-  if isinstance(pred, variables.Variable):
-    return None
-  return smart_module.smart_constant_value(pred)
-
-
-def is_tensor_or_tensor_list(v):
-  v = nest.flatten(v)
-  if v and isinstance(v[0], ops.Tensor):
-    return True
-  else:
-    return False
-
-
-def get_reachable_from_inputs(inputs, targets=None):
-  """Returns the set of tensors/ops reachable from `inputs`.
-
-  Stops if all targets have been found (target is optional).
-
-  Only valid in Symbolic mode, not Eager mode.
-
-  Args:
-    inputs: List of tensors.
-    targets: List of tensors.
-
-  Returns:
-    A set of tensors reachable from the inputs (includes the inputs themselves).
-  """
-  inputs = nest.flatten(inputs, expand_composites=True)
-  reachable = object_identity.ObjectIdentitySet(inputs)
-  if targets:
-    remaining_targets = object_identity.ObjectIdentitySet(nest.flatten(targets))
-  queue = inputs[:]
-
-  while queue:
-    x = queue.pop()
-    if isinstance(x, tuple(_user_convertible_tensor_types)):
-      # Can't find consumers of user-specific types.
-      continue
-
-    if isinstance(x, ops.Operation):
-      outputs = x.outputs[:] or []
-      outputs += x._control_outputs  # pylint: disable=protected-access
-    elif isinstance(x, variables.Variable):
-      try:
-        outputs = [x.op]
-      except AttributeError:
-        # Variables can be created in an Eager context.
-        outputs = []
-    elif tensor_util.is_tensor(x):
-      outputs = x.consumers()
-    else:
-      raise TypeError('Expected Operation, Variable, or Tensor, got ' + str(x))
-
-    for y in outputs:
-      if y not in reachable:
-        reachable.add(y)
-        if targets:
-          remaining_targets.discard(y)
-        queue.insert(0, y)
-
-    if targets and not remaining_targets:
-      return reachable
-
-  return reachable
-
-
-# This function needs access to private functions of `nest`.
-#  pylint: disable=protected-access
-def map_structure_with_atomic(is_atomic_fn, map_fn, nested):
-  """Maps the atomic elements of a nested structure.
-
-  Arguments:
-    is_atomic_fn: A function that determines if an element of `nested` is
-      atomic.
-    map_fn: The function to apply to atomic elements of `nested`.
-    nested: A nested structure.
-
-  Returns:
-    The nested structure, with atomic elements mapped according to `map_fn`.
-
-  Raises:
-    ValueError: If an element that is neither atomic nor a sequence is
-      encountered.
-  """
-  if is_atomic_fn(nested):
-    return map_fn(nested)
-
-  # Recursively convert.
-  if not nest.is_sequence(nested):
-    raise ValueError(
-        'Received non-atomic and non-sequence element: {}'.format(nested))
-  if nest._is_mapping(nested):
-    values = [nested[k] for k in nest._sorted(nested)]
-  else:
-    values = nested
-  mapped_values = [
-      map_structure_with_atomic(is_atomic_fn, map_fn, ele) for ele in values
-  ]
-  return nest._sequence_like(nested, mapped_values)
-
-
-#  pylint: enable=protected-access
-
-
-def convert_shapes(input_shape, to_tuples=True):
-  """Converts nested shape representations to desired format.
-
-  Performs:
-
-  TensorShapes -> tuples if `to_tuples=True`.
-  tuples of int or None -> TensorShapes if `to_tuples=False`.
-
-  Valid objects to be converted are:
-  - TensorShapes
-  - tuples with elements of type int or None.
-  - ints
-  - None
-
-  Arguments:
-    input_shape: A nested structure of objects to be converted to TensorShapes.
-    to_tuples: If `True`, converts all TensorShape to tuples. Otherwise converts
-      all tuples representing shapes to TensorShapes.
-
-  Returns:
-    Nested structure of shapes in desired format.
-
-  Raises:
-    ValueError: when the input tensor shape can't be converted to tuples, eg
-      unknown tensor shape.
-  """
-
-  def _is_shape_component(value):
-    return value is None or isinstance(value, (int, tensor_shape.Dimension))
-
-  def _is_atomic_shape(input_shape):
-    # Ex: TensorShape or (None, 10, 32) or 5 or `None`
-    if _is_shape_component(input_shape):
-      return True
-    if isinstance(input_shape, tensor_shape.TensorShape):
-      return True
-    if (isinstance(input_shape, (tuple, list)) and
-        all(_is_shape_component(ele) for ele in input_shape)):
-      return True
-    return False
-
-  def _convert_shape(input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape)
-    if to_tuples:
-      input_shape = tuple(input_shape.as_list())
-    return input_shape
-
-  return map_structure_with_atomic(_is_atomic_shape, _convert_shape,
-                                   input_shape)
-
-
-class ListWrapper(object):
-  """A wrapper for lists to be treated as elements for `nest`."""
-
-  def __init__(self, list_to_wrap):
-    self._list = list_to_wrap
-
-  def as_list(self):
-    return self._list
-
-
-def convert_inner_node_data(nested, wrap=False):
-  """Either wraps or unwraps innermost node data lists in `ListWrapper` objects.
-
-  Arguments:
-    nested: A nested data structure.
-    wrap: If `True`, wrap innermost lists in `ListWrapper` objects. If `False`,
-      unwraps `ListWrapper` objects into lists.
-
-  Returns:
-    Structure of same type as nested, with lists wrapped/unwrapped.
-  """
-
-  def _is_serialized_node_data(nested):
-    # Node data can be of form `[layer_name, node_id, tensor_id]` or
-    # `[layer_name, node_id, tensor_id, kwargs]`.
-    if (isinstance(nested, list) and (len(nested) in [3, 4]) and
-        isinstance(nested[0], six.string_types)):
-      return True
-    return False
-
-  def _is_atomic_nested(nested):
-    """Returns `True` if `nested` is a list representing node data."""
-    if isinstance(nested, ListWrapper):
-      return True
-    if _is_serialized_node_data(nested):
-      return True
-    return not nest.is_sequence(nested)
-
-  def _convert_object_or_list(nested):
-    """Convert b/t `ListWrapper` object and list representations."""
-    if wrap:
-      if isinstance(nested, ListWrapper):
-        return nested
-      if _is_serialized_node_data(nested):
-        return ListWrapper(nested)
-      return nested
-    else:
-      if isinstance(nested, ListWrapper):
-        return nested.as_list()
-      return nested
-
-  return map_structure_with_atomic(_is_atomic_nested, _convert_object_or_list,
-                                   nested)
-
-
-def shape_type_conversion(fn):
-  """Decorator that handles tuple/TensorShape conversion.
-
-  Used in `compute_output_shape` and `build`.
-
-  Arguments:
-    fn: function to wrap.
-
-  Returns:
-    Wrapped function.
-  """
-
-  def wrapper(instance, input_shape):
-    # Pass shapes as tuples to `fn`
-    # This preserves compatibility with external Keras.
-    if input_shape is not None:
-      input_shape = convert_shapes(input_shape, to_tuples=True)
-    output_shape = fn(instance, input_shape)
-    # Return shapes from `fn` as TensorShapes.
-    if output_shape is not None:
-      output_shape = convert_shapes(output_shape, to_tuples=False)
-    return output_shape
-
-  return wrapper
-
-
-def are_all_symbolic_tensors(tensors):
-  return all(is_symbolic_tensor(tensor) for tensor in tensors)
-
-
-_user_convertible_tensor_types = set()
-
-
-def is_symbolic_tensor(tensor):
-  """Returns whether a tensor is symbolic (from a TF graph) or an eager tensor.
-
-  A Variable can be seen as either: it is considered symbolic
-  when we are in a graph scope, and eager when we are in an eager scope.
-
-  Arguments:
-    tensor: A tensor instance to test.
-
-  Returns:
-    True for symbolic tensors, False for eager tensors.
-  """
-  if isinstance(tensor, tuple(_user_convertible_tensor_types)):
-    tensor = ops.convert_to_tensor_or_composite(tensor)
-  if isinstance(tensor, variables.Variable):
-    # Variables that are output of a Keras Layer in Functional API mode
-    # should be considered symbolic.
-    # TODO(omalleyt): We need a better way to check this in order to
-    # enable `run_eagerly=True` for Models containing Layers that
-    # return Variables as outputs.
-    return (getattr(tensor, '_keras_history', False) or
-            not context.executing_eagerly())
-  if isinstance(tensor, composite_tensor.CompositeTensor):
-    component_tensors = nest.flatten(tensor, expand_composites=True)
-    return any(hasattr(t, 'graph') for t in component_tensors)
-  if isinstance(tensor, ops.Tensor):
-    return hasattr(tensor, 'graph')
-  return False
-
-
-def register_symbolic_tensor_type(cls):
-  """Allows users to specify types regarded as symbolic `Tensor`s.
-
-  Used in conjunction with `tf.register_tensor_conversion_function`, calling
-  `tf.keras.utils.register_symbolic_tensor_type(cls)` allows non-`Tensor`
-  objects to be plumbed through Keras layers.
-
-  Example:
-
-  ```python
-  # One-time setup.
-  class Foo(object):
-    def __init__(self, input_):
-      self._input = input_
-    def value(self):
-      return tf.constant(42.)
-
-  tf.register_tensor_conversion_function(
-      Foo, lambda x, *args, **kwargs: x.value())
-
-  tf.keras.utils.register_symbolic_tensor_type(Foo)
-
-  # User-land.
-  layer = tf.keras.layers.Lambda(lambda input_: Foo(input_))
-  ```
-
-  Arguments:
-    cls: A `class` type which shall be regarded as a symbolic `Tensor`.
-  """
-  global _user_convertible_tensor_types
-  _user_convertible_tensor_types.add(cls)
-
-
-def type_spec_from_value(value):
-  """Grab type_spec without converting array-likes to tensors."""
-  if isinstance(value, composite_tensor.CompositeTensor):
-    return value._type_spec  # pylint: disable=protected-access
-  # Get a TensorSpec for array-like data without
-  # converting the data to a Tensor
-  if hasattr(value, 'shape') and hasattr(value, 'dtype'):
-    return tensor_spec.TensorSpec(value.shape, value.dtype)
-  else:
-    return type_spec.type_spec_from_value(value)
-
-
-def is_tensor_or_variable(x):
-  return tensor_util.is_tensor(x) or isinstance(x, variables.Variable)
-
-
-def assert_no_legacy_layers(layers):
-  """Prevent tf.layers.Layers from being used with Keras.
-
-  Certain legacy layers inherit from their keras analogs; however they are
-  not supported with keras and can lead to subtle and hard to diagnose bugs.
-
-  Args:
-    layers: A list of layers to check
-
-  Raises:
-    TypeError: If any elements of layers are tf.layers.Layers
-  """
-
-  # isinstance check for tf.layers.Layer introduces a circular dependency.
-  legacy_layers = [l for l in layers if getattr(l, '_is_legacy_layer', None)]
-  if legacy_layers:
-    layer_str = '\n'.join('  ' + str(l) for l in legacy_layers)
-    raise TypeError(
-        'The following are legacy tf.layers.Layers:\n{}\nTo use keras as a '
-        'framework (for instance using the Network, Model, or Sequential '
-        'classes), please use the tf.keras.layers implementation instead. '
-        '(Or, if writing custom layers, subclass from tf.keras.layers rather '
-        'than tf.layers)'.format(layer_str))
-
-
-@tf_contextlib.contextmanager
-def maybe_init_scope(layer):
-  """Open an `init_scope` if in V2 mode and using the keras graph.
-
-  Arguments:
-    layer: The Layer/Model that is currently active.
-
-  Yields:
-    None
-  """
-  # Don't open an init_scope in V1 mode or when using legacy tf.layers.
-  if (ops.executing_eagerly_outside_functions() and
-      getattr(layer, '_keras_style', True)):
-    with ops.init_scope():
-      yield
-  else:
-    yield
-
-
-@tf_contextlib.contextmanager
-def graph_context_for_symbolic_tensors(*args, **kwargs):
-  """Returns graph context manager if any of the inputs is a symbolic tensor."""
-  if any(is_symbolic_tensor(v) for v in list(args) + list(kwargs.values())):
-    with K.get_graph().as_default():
-      yield
-  else:
-    yield
-
-
-def dataset_is_infinite(dataset):
-  """True if the passed dataset is infinite."""
-  if ops.executing_eagerly_outside_functions():
-    return math_ops.equal(
-        cardinality.cardinality(dataset), cardinality.INFINITE)
-  else:
-    dataset_size = K.get_session().run(cardinality.cardinality(dataset))
-    return dataset_size == cardinality.INFINITE
-
-
-def get_tensor_spec(t, dynamic_batch=False, name=None):
-  """Returns a `TensorSpec` given a single `Tensor` or `TensorSpec`."""
-  if isinstance(t, type_spec.TypeSpec):
-    spec = t
-  elif isinstance(t, composite_tensor.CompositeTensor):
-    # TODO(b/148821952): Should these specs have a name attr?
-    spec = t._type_spec  # pylint: disable=protected-access
-  elif hasattr(t, 'shape') and hasattr(t, 'dtype'):
-    spec = tensor_spec.TensorSpec(shape=t.shape, dtype=t.dtype, name=name)
-  else:
-    return None  # Allow non-Tensors to pass through.
-
-  if not dynamic_batch:
-    return spec
-
-  dynamic_batch_spec = copy.deepcopy(spec)
-  # RaggedTensorSpec only has a private _shape.
-  shape = dynamic_batch_spec._shape.as_list()  # pylint: disable=protected-access
-  if shape:
-    shape[0] = None
-    dynamic_batch_spec._shape = tensor_shape.TensorShape(shape)  # pylint: disable=protected-access
-  return dynamic_batch_spec
-
-
-def to_numpy_or_python_type(tensors):
-  """Converts a structure of `Tensor`s to `NumPy` arrays or Python scalar types.
-
-  For each tensor, it calls `tensor.numpy()`. If the result is a scalar value,
-  it converts it to a Python type, such as a float or int, by calling
-  `result.item()`.
-
-  Numpy scalars are converted, as Python types are often more convenient to deal
-  with. This is especially useful for bfloat16 Numpy scalars, which don't
-  support as many operations as other Numpy values.
-
-  Args:
-    tensors: A structure of tensors.
-
-  Returns:
-    `tensors`, but scalar tensors are converted to Python types and non-scalar
-    tensors are converted to Numpy arrays.
-  """
-  def _to_single_numpy_or_python_type(t):
-    if isinstance(t, ops.Tensor):
-      x = t.numpy()
-      return x.item() if np.ndim(x) == 0 else x
-    return t  # Don't turn ragged or sparse tensors to NumPy.
-
-  return nest.map_structure(_to_single_numpy_or_python_type, tensors)
-
diff --git a/tensorflow/python/frozen_keras/utils/tf_utils_test.py b/tensorflow/python/frozen_keras/utils/tf_utils_test.py
deleted file mode 100644
index 57a0e33b0f6..00000000000
--- a/tensorflow/python/frozen_keras/utils/tf_utils_test.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras TF utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python import keras
-from tensorflow.python.eager import context
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
-from tensorflow.python.frozen_keras.utils import tf_utils
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-
-
-@test_util.run_all_in_graph_and_eager_modes
-class TestIsSymbolicTensor(test.TestCase):
-
-  def test_default_behavior(self):
-    if context.executing_eagerly():
-      self.assertFalse(tf_utils.is_symbolic_tensor(
-          variables.Variable(name='blah', initial_value=0.)))
-      self.assertFalse(
-          tf_utils.is_symbolic_tensor(ops.convert_to_tensor_v2(0.)))
-      self.assertFalse(tf_utils.is_symbolic_tensor(
-          sparse_tensor.SparseTensor(
-              indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])))
-    else:
-      self.assertTrue(tf_utils.is_symbolic_tensor(
-          variables.Variable(name='blah', initial_value=0.)))
-      self.assertTrue(tf_utils.is_symbolic_tensor(ops.convert_to_tensor_v2(0.)))
-      self.assertTrue(tf_utils.is_symbolic_tensor(
-          sparse_tensor.SparseTensor(
-              indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])))
-
-  def test_works_with_registered(self):
-
-    class CustomClass(object):
-
-      def value(self):
-        return ops.convert_to_tensor_v2(42.)
-
-    ops.register_tensor_conversion_function(
-        CustomClass, lambda value, **_: value.value())
-
-    tf_utils.register_symbolic_tensor_type(CustomClass)
-
-    if context.executing_eagerly():
-      self.assertFalse(tf_utils.is_symbolic_tensor(
-          variables.Variable(name='blah', initial_value=0.)))
-      self.assertFalse(
-          tf_utils.is_symbolic_tensor(ops.convert_to_tensor_v2(0.)))
-      self.assertFalse(tf_utils.is_symbolic_tensor(
-          sparse_tensor.SparseTensor(
-              indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])))
-      self.assertFalse(tf_utils.is_symbolic_tensor(CustomClass()))
-    else:
-      self.assertTrue(tf_utils.is_symbolic_tensor(
-          variables.Variable(name='blah', initial_value=0.)))
-      self.assertTrue(tf_utils.is_symbolic_tensor(ops.convert_to_tensor_v2(0.)))
-      self.assertTrue(tf_utils.is_symbolic_tensor(
-          sparse_tensor.SparseTensor(
-              indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])))
-      self.assertTrue(tf_utils.is_symbolic_tensor(CustomClass()))
-
-  def test_enables_nontensor_plumbing(self):
-    self.skipTest('Sequential model will check layer instance type and fail.')
-
-    if context.executing_eagerly():
-      self.skipTest('`compile` functionality changed.')
-    # Setup.
-
-    class Foo(object):
-
-      def __init__(self, input_):
-        self._input = input_
-        self.value = ops.convert_to_tensor_v2([[42.]])
-
-      @property
-      def dtype(self):
-        return self.value.dtype
-
-    ops.register_tensor_conversion_function(
-        Foo, lambda x, *args, **kwargs: x.value)
-    tf_utils.register_symbolic_tensor_type(Foo)
-
-    class PlumbingLayer(keras.layers.Lambda):
-
-      def __init__(self, fn, **kwargs):
-        def _fn(*fargs, **fkwargs):
-          d = fn(*fargs, **fkwargs)
-          x = ops.convert_to_tensor_v2(d)
-          d.shape = x.shape
-          d.get_shape = x.get_shape
-          return d, x
-        super(PlumbingLayer, self).__init__(_fn, **kwargs)
-        self._enter_dunder_call = False
-
-      def __call__(self, inputs, *args, **kwargs):
-        self._enter_dunder_call = True
-        d, _ = super(PlumbingLayer, self).__call__(inputs, *args, **kwargs)
-        self._enter_dunder_call = False
-        return d
-
-      def call(self, inputs, *args, **kwargs):
-        d, v = super(PlumbingLayer, self).call(inputs, *args, **kwargs)
-        if self._enter_dunder_call:
-          return d, v
-        return d
-
-    # User-land.
-    model = keras.Sequential([
-        keras.layers.InputLayer((1,)),
-        PlumbingLayer(Foo),  # Makes a `Foo` object.
-    ])
-    # Let's ensure Keras graph history is preserved by composing the models.
-    model = keras.Model(model.inputs, model(model.outputs))
-    # Now we instantiate the model and verify we have a `Foo` object, not a
-    # `Tensor`.
-    y = model(ops.convert_to_tensor_v2([[7.]]))
-    self.assertIsInstance(y, Foo)
-    # Confirm that (custom) loss sees `Foo` instance, not Tensor.
-    obtained_prediction_box = [None]
-    def custom_loss(y_obs, y_pred):
-      del y_obs
-      obtained_prediction_box[0] = y_pred
-      return y_pred
-    # Apparently `compile` calls the loss function enough to trigger the
-    # side-effect.
-    model.compile('SGD', loss=custom_loss)
-    self.assertIsInstance(obtained_prediction_box[0], Foo)
-
-
-class ConvertInnerNodeDataTest(test.TestCase):
-
-  def test_convert_inner_node_data(self):
-    data = tf_utils.convert_inner_node_data((tf_utils.ListWrapper(['l', 2, 3]),
-                                             tf_utils.ListWrapper(['l', 5, 6])))
-    self.assertEqual(data, (['l', 2, 3], ['l', 5, 6]))
-
-    data = tf_utils.convert_inner_node_data(((['l', 2, 3], ['l', 5, 6])),
-                                            wrap=True)
-    self.assertTrue(all(isinstance(ele, tf_utils.ListWrapper) for ele in data))
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD
index 0d143e1586e..a5b8ac2c13a 100644
--- a/tensorflow/python/keras/engine/BUILD
+++ b/tensorflow/python/keras/engine/BUILD
@@ -44,7 +44,6 @@ py_library(
         "//tensorflow/python/distribute:input_lib",
         "//tensorflow/python/distribute:reduce_util",
         "//tensorflow/python/eager:monitoring",
-        "//tensorflow/python/frozen_keras/engine:legacy_base_layer",
         "//tensorflow/python/keras:activations",
         "//tensorflow/python/keras:backend",
         "//tensorflow/python/keras:callbacks",
diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index b615b1794b9..667899660c1 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 
 import copy
 
-from tensorflow.python.frozen_keras.engine import legacy_base_layer
 from tensorflow.python.keras import layers as layer_module
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import input_layer
@@ -169,8 +168,7 @@ class Sequential(training.Model):
       if isinstance(origin_layer, input_layer.InputLayer):
         layer = origin_layer
 
-    if not isinstance(layer,
-                      (base_layer.Layer, legacy_base_layer.LegacyBaseLayer)):
+    if not isinstance(layer, base_layer.Layer):
       raise TypeError('The added layer must be '
                       'an instance of class Layer. '
                       'Found: ' + str(layer))
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 339da0798a3..c38d7b84a74 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -122,7 +122,6 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python/distribute:combinations",
     "//tensorflow/python/distribute:multi_process_runner",
     "//tensorflow/python/eager:eager_pip",
-    "//tensorflow/python/frozen_keras/engine:legacy_base_layer",
     "//tensorflow/python/keras:combinations",
     "//tensorflow/python/keras/layers/preprocessing:preprocessing_test_utils",
     "//tensorflow/python/keras/distribute:distribute_strategy_test_lib",