Initial support for mixed precision policies.

A tf.keras.mixed_precision.experimental.Policy determines the dtype of layer computations and variables. In this initial implementation, policies only determine layer variable dtypes, and support for determining layer computation dtypes will come later. Co-authored-by: James Qin <jamesqin@google.com> PiperOrigin-RevId: 236010048
2019-02-27 15:56:35 -08:00 · 2019-02-27 15:56:35 -08:00 · f22e833a2b
commit f22e833a2b
parent 97e36fc65e
57 changed files with 894 additions and 50 deletions
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@ -64,6 +64,7 @@ py_library(
        ":saving",
        "//tensorflow/python:training",
        "//tensorflow/python/keras/mixed_precision/experimental:autocast_variable",
+        "//tensorflow/python/keras/mixed_precision/experimental:policy",
        "//tensorflow/python/keras/optimizer_v2",
        "//tensorflow/python/saved_model",
        "@keras_applications_archive//:keras_applications",
@ -171,6 +172,8 @@ py_library(
        "//tensorflow/python/distribute:distribute_lib",
        "//tensorflow/python/distribute:input_lib",
        "//tensorflow/python/distribute:reduce_util",
+        "//tensorflow/python/keras/mixed_precision/experimental:autocast_variable",
+        "//tensorflow/python/keras/mixed_precision/experimental:policy",
        "//tensorflow/python/training/tracking:data_structures",
        "//tensorflow/tools/docs:doc_controls",
        "@six_archive//:six",
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@ -26,6 +26,7 @@ import numpy as np
 from six.moves import zip  # pylint: disable=redefined-builtin

 from tensorflow.core.framework import node_def_pb2
+from tensorflow.python.distribute import values as distribute_values
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
 from tensorflow.python.framework import dtypes
@ -38,6 +39,8 @@ from tensorflow.python.keras import initializers
 from tensorflow.python.keras import regularizers
 from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.engine import input_spec
+from tensorflow.python.keras.mixed_precision.experimental import autocast_variable
+from tensorflow.python.keras.mixed_precision.experimental import policy
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import tf_utils
 # A module that only depends on `keras.layers` import these from here.
@ -172,7 +175,9 @@ class Layer(trackable.Trackable):
    # A dictionary that maps metric names to metric result tensors. The results
    # are the running averages of metric values over an epoch.
    self._metrics_tensors = {}
-    self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name
+
+    self._set_dtype_and_policy(dtype)
+
    self._call_fn_args = function_utils.fn_args(self.call)
    self._compute_previous_mask = ('mask' in self._call_fn_args or
                                   hasattr(self, 'compute_mask'))
@ -308,10 +313,13 @@ class Layer(trackable.Trackable):
    shape = shape or ()
    # Validate optional keyword arguments.
    for kwarg in kwargs:
-      if kwarg not in ['getter', 'collections']:
+      if kwarg not in ['getter', 'collections', 'experimental_autocast']:
        raise TypeError('Unknown keyword argument:', kwarg)
    getter = kwargs.pop('getter', None)
    collections = kwargs.pop('collections', None)
+    # 'experimental_autocast' can be set to False by the caller to indicate an
+    # AutoCastVariable should never be created.
+    autocast = kwargs.pop('experimental_autocast', True)

    if dtype is None:
      dtype = self.dtype or backend.floatx()
@ -368,6 +376,12 @@ class Layer(trackable.Trackable):
        aggregation=aggregation)
    backend.track_variable(variable)

+    if autocast and self._mixed_precision_policy.should_cast_variables:
+      if isinstance(variable, distribute_values.DistributedVariable):
+        variable = autocast_variable.AutoCastDistributedVariable(variable)
+      else:
+        variable = autocast_variable.AutoCastVariable(variable)
+
    if regularizer is not None:
      # TODO(fchollet): in the future, this should be handled at the
      # level of variable creation, and weight regularization losses
@ -402,6 +416,7 @@ class Layer(trackable.Trackable):
      config['batch_input_shape'] = self._batch_input_shape
    if hasattr(self, 'dtype'):
      config['dtype'] = self.dtype
+    # TODO(reedwm): Handle serializing self._mixed_precision_policy.
    return config

  @classmethod
@ -588,8 +603,11 @@ class Layer(trackable.Trackable):
            kwargs['training'] = backend.learning_phase()
          if not self.dynamic:
            try:
-              with base_layer_utils.AutoAddUpdates(self,
-                                                   inputs) as auto_updater:
+              with base_layer_utils.autocast_context_manager(
+                  input_list,
+                  self._mixed_precision_policy.should_cast_variables), (
+                      base_layer_utils.AutoAddUpdates(self,
+                                                      inputs)) as auto_updater:
                outputs = self.call(inputs, *args, **kwargs)
                auto_updater.set_outputs(outputs)

@ -636,7 +654,9 @@ class Layer(trackable.Trackable):
        # Eager execution on data tensors.
        with ops.name_scope(self._name_scope()):
          self._maybe_build(inputs)
-          outputs = self.call(inputs, *args, **kwargs)
+          with base_layer_utils.autocast_context_manager(
+              input_list, self._mixed_precision_policy.should_cast_variables):
+            outputs = self.call(inputs, *args, **kwargs)
          self._handle_activity_regularization(inputs, outputs)
          self._set_mask_metadata(inputs, outputs, previous_mask)

@ -1328,6 +1348,24 @@ class Layer(trackable.Trackable):
  # Methods & attributes below are all private and only used by the framework. #
  ##############################################################################

+  def _set_dtype_and_policy(self, dtype):
+    """Sets self._dtype and self._mixed_precision_policy."""
+    if dtype:
+      if isinstance(dtype, policy.Policy):
+        self._mixed_precision_policy = dtype
+        self._dtype = self._mixed_precision_policy.default_variable_dtype
+      else:
+        # If a non-policy dtype is passed, no casting should be done. So we use
+        # the "infer" policy, which does no casting.
+        self._mixed_precision_policy = policy.Policy('infer')
+        self._dtype = dtypes.as_dtype(dtype).name
+    else:
+      self._mixed_precision_policy = policy.global_policy()
+      # If the global policy has not been set, it will be an "infer" policy
+      # without a default variable dtype, and so self._dtype will be None. In
+      # that case, self._dtype will be set when the layer is built or called.
+      self._dtype = self._mixed_precision_policy.default_variable_dtype
+
  def _name_scope(self):
    return self.name

--- a/tensorflow/python/keras/engine/base_layer_utils.py
+++ b/tensorflow/python/keras/engine/base_layer_utils.py
@ -514,3 +514,32 @@ class AutoAddUpdates(object):
      self.layer.add_update(list(unconditional_updates))
    if conditional_updates:
      self.layer.add_update(list(conditional_updates), inputs=self.inputs)
+
+
+def _get_var_read_dtype(input_list, should_cast):
+  """Gets the dtype that AutoCastVariables should be read in."""
+  if should_cast and input_list and input_list[0].dtype.is_floating:
+    return input_list[0].dtype.base_dtype
+  else:
+    return None
+
+
+def autocast_context_manager(input_list, should_cast):
+  """Returns a context manager to autocast AutoCastVariables.
+
+  Under this context manager, if `should_cast` is True, AutoCastVariables will
+  be casted. If `should_cast` is False, AutoCastVariables will not be casted,
+  which can be used to disable autocasting if nested under another
+  call to `autocast_context_manager`.
+
+  Args:
+    input_list: The inputs to the layer with the AutoCastVariables.
+    should_cast: Whether AutoCastVariables should be casted.
+
+  Returns:
+    A context manager to automatically cast AutoCastVariables.
+  """
+  var_read_dtype = _get_var_read_dtype(input_list, should_cast)
+  return ops.get_default_graph()._enable_auto_casting_variables(  # pylint: disable=protected-access
+      var_read_dtype)
+
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@ -36,6 +36,7 @@ from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.engine import training_utils
+from tensorflow.python.keras.mixed_precision.experimental import policy
 from tensorflow.python.keras.saving import hdf5_format
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import layer_utils
@ -209,6 +210,12 @@ class Network(base_layer.Layer):
    self._trackable_saver = (
        trackable_utils.saver_with_op_caching(self))

+    # Networks do not need to do any casting of inputs or variables, because
+    # each of its layers will handle casting through the layer's own
+    # implementation. Therefore networks use the 'infer' policy, which does no
+    # casting.
+    self._mixed_precision_policy = policy.Policy('infer')
+
  @trackable.no_automatic_dependency_tracking
  def _init_graph_network(self, inputs, outputs, name=None):
    self._call_convention = (base_layer_utils
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@ -1001,7 +1001,11 @@ class Dense(Layer):
        output_shape = shape[:-1] + [self.units]
        outputs.set_shape(output_shape)
    else:
-      inputs = math_ops.cast(inputs, self.dtype)
+      # Cast the inputs to self.dtype, which is the variable dtype. We do not
+      # cast if `should_cast_variables` is True, as in that case the variable
+      # will be automatically casted to inputs.dtype.
+      if not self._mixed_precision_policy.should_cast_variables:
+        inputs = math_ops.cast(inputs, self.dtype)
      outputs = gen_math_ops.mat_mul(inputs, self.kernel)
    if self.use_bias:
      outputs = nn.bias_add(outputs, self.bias)
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@ -25,6 +25,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.mixed_precision.experimental import policy
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test

@ -298,6 +299,14 @@ class CoreLayersTest(keras_parameterized.TestCase):
    outputs = layer(inputs)
    self.assertEqual(outputs.dtype, 'float32')

+  def test_dense_with_policy(self):
+    inputs = ops.convert_to_tensor(
+        np.random.randint(low=0, high=7, size=(2, 2)), dtype='float16')
+    layer = keras.layers.Dense(5, dtype=policy.Policy('infer_float32_vars'))
+    outputs = layer(inputs)
+    self.assertEqual(outputs.dtype, 'float16')
+    self.assertEqual(layer.kernel.dtype, 'float32')
+
  def test_dense_regularization(self):
    layer = keras.layers.Dense(
        3,
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@ -334,7 +334,8 @@ class BatchNormalizationV2(Layer):
          initializer=self.gamma_initializer,
          regularizer=self.gamma_regularizer,
          constraint=self.gamma_constraint,
-          trainable=True)
+          trainable=True,
+          experimental_autocast=False)
    else:
      self.gamma = None
      if self.fused:
@ -349,7 +350,8 @@ class BatchNormalizationV2(Layer):
          initializer=self.beta_initializer,
          regularizer=self.beta_regularizer,
          constraint=self.beta_constraint,
-          trainable=True)
+          trainable=True,
+          experimental_autocast=False)
    else:
      self.beta = None
      if self.fused:
@ -370,7 +372,8 @@ class BatchNormalizationV2(Layer):
          initializer=self.moving_mean_initializer,
          synchronization=tf_variables.VariableSynchronization.ON_READ,
          trainable=False,
-          aggregation=tf_variables.VariableAggregation.MEAN)
+          aggregation=tf_variables.VariableAggregation.MEAN,
+          experimental_autocast=False)

      self.moving_variance = self.add_weight(
          name='moving_variance',
@ -379,7 +382,8 @@ class BatchNormalizationV2(Layer):
          initializer=self.moving_variance_initializer,
          synchronization=tf_variables.VariableSynchronization.ON_READ,
          trainable=False,
-          aggregation=tf_variables.VariableAggregation.MEAN)
+          aggregation=tf_variables.VariableAggregation.MEAN,
+          experimental_autocast=False)

      if self.renorm:
        # Create variables to maintain the moving mean and standard deviation.
@ -390,6 +394,7 @@ class BatchNormalizationV2(Layer):
        # stack to be cleared. The nested ones use a `lambda` to set the desired
        # device and ignore any devices that may be set by the custom getter.
        def _renorm_variable(name, shape):
+          """Create a renorm variable."""
          var = self.add_weight(
              name=name,
              shape=shape,
@ -397,7 +402,8 @@ class BatchNormalizationV2(Layer):
              initializer=init_ops.zeros_initializer(),
              synchronization=tf_variables.VariableSynchronization.ON_READ,
              trainable=False,
-              aggregation=tf_variables.VariableAggregation.MEAN)
+              aggregation=tf_variables.VariableAggregation.MEAN,
+              experimental_autocast=False)
          return var

        with distribution_strategy_context.get_strategy(
@ -958,7 +964,8 @@ class LayerNormalization(Layer):
          initializer=self.gamma_initializer,
          regularizer=self.gamma_regularizer,
          constraint=self.gamma_constraint,
-          trainable=True)
+          trainable=True,
+          experimental_autocast=False)
    else:
      self.gamma = None

@ -969,7 +976,8 @@ class LayerNormalization(Layer):
          initializer=self.beta_initializer,
          regularizer=self.beta_regularizer,
          constraint=self.beta_constraint,
-          trainable=True)
+          trainable=True,
+          experimental_autocast=False)
    else:
      self.beta = None

--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@ -26,6 +26,7 @@ from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.layers import normalization
+from tensorflow.python.keras.mixed_precision.experimental import policy
 from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent

@ -143,6 +144,19 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
    _run_batchnorm_correctness_test(
        normalization.BatchNormalization, dtype='float16', fused=False)

+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_batchnorm_policy(self):
+    norm = keras.layers.BatchNormalization(
+        axis=-1,
+        input_shape=(4, 4, 3),
+        momentum=0.8,
+        dtype=policy.Policy('infer_float32_vars'))
+    x = np.random.normal(size=(10, 4, 4, 3)).astype('float16')
+    y = norm(x)
+    self.assertEqual(y.dtype, 'float16')
+    self.assertEqual(norm.beta.dtype.base_dtype, 'float32')
+    self.assertEqual(norm.gamma.dtype.base_dtype, 'float32')
+

 class BatchNormalizationV1Test(test.TestCase):

--- a/tensorflow/python/keras/mixed_precision/experimental/BUILD
+++ b/tensorflow/python/keras/mixed_precision/experimental/BUILD
@ -24,6 +24,31 @@ exports_files(["LICENSE"])

 load("//tensorflow:tensorflow.bzl", "py_test")

+py_library(
+    name = "policy",
+    srcs = [
+        "policy.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:framework",
+    ],
+)
+
+py_test(
+    name = "policy_test",
+    size = "medium",
+    srcs = [
+        "policy_test.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":policy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 py_library(
    name = "autocast_variable",
    srcs = [
@ -52,3 +77,16 @@ py_test(
        "@absl_py//absl/testing:parameterized",
    ],
 )
+
+py_test(
+    name = "keras_test",
+    size = "medium",
+    srcs = ["keras_test.py"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/distribute:mirrored_strategy",
+        "//tensorflow/python/distribute:one_device_strategy",
+        "//tensorflow/python/keras",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
--- a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
@ -0,0 +1,360 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests mixed precision works correctly with Keras layers and models."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.distribute import one_device_strategy
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import backend
+from tensorflow.python.keras import layers
+from tensorflow.python.keras import models
+from tensorflow.python.keras import regularizers
+from tensorflow.python.keras.engine import base_layer
+from tensorflow.python.keras.mixed_precision.experimental import policy
+from tensorflow.python.keras.optimizer_v2 import gradient_descent
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.util import nest
+
+
+class AssertTypeLayer(base_layer.Layer):
+  """A layer which asserts it's inputs are a certain type."""
+
+  def __init__(self, assert_type=None, **kwargs):
+    self._assert_type = assert_type
+    super(AssertTypeLayer, self).__init__(**kwargs)
+
+  def assert_input_types(self, inputs):
+    """Asserts `inputs` are of the correct type. Should be called in call()."""
+    if self._assert_type:
+      inputs_flattened = nest.flatten(inputs)
+      for inp in inputs_flattened:
+        assert inp.dtype.base_dtype == self._assert_type, (
+            'Input tensor has type %s which does not match assert type %s' %
+            (inp.dtype.name, self._assert_type.name))
+
+
+class AddLayer(AssertTypeLayer):
+  """A layer which adds it's input to a scalar variable."""
+
+  def __init__(self, regularizer=None, use_operator=False, **kwargs):
+    """Initializes the AddLayer.
+
+    Args:
+      regularizer: The regularizer on the scalar variable.
+      use_operator: If True, add using the + operator. If False, add using
+        tf.add.
+      **kwargs: Passed to AssertTypeLayer constructor.
+    """
+    self._regularizer = regularizer
+    self._use_operator = use_operator
+    super(AddLayer, self).__init__(**kwargs)
+
+  def build(self, _):
+    self.v = self.add_weight('v', (), initializer='ones',
+                             regularizer=self._regularizer)
+    self.built = True
+
+  def call(self, inputs):
+    self.assert_input_types(inputs)
+    assert inputs.dtype == self.v.dtype
+    return self._add(inputs, self.v)
+
+  def _add(self, x, y):
+    if self._use_operator:
+      return x + y
+    else:
+      return math_ops.add(x, y)
+
+
+class AddLayerWithoutAutoCast(AddLayer):
+  """Same as AddLayer, but does not use AutoCastVariables."""
+
+  def build(self, _):
+    dtype = self.dtype
+    if dtype in ('float16', 'bfloat16'):
+      dtype = 'float32'
+    self.v = self.add_weight('v', (), initializer='ones', dtype=dtype,
+                             experimental_autocast=False,
+                             regularizer=self._regularizer)
+    self.built = True
+
+  def call(self, inputs):
+    self.assert_input_types(inputs)
+    assert self.v.dtype in (dtypes.float32, dtypes.float64)
+    return self._add(inputs, math_ops.cast(self.v, inputs.dtype))
+
+
+class IdentityRegularizer(regularizers.Regularizer):
+
+  def __call__(self, x):
+    assert x.dtype == dtypes.float32
+    return array_ops.identity(x)
+
+
+def create_one_device_strategy():
+  return one_device_strategy.OneDeviceStrategy('cpu:0')
+
+
+def create_mirrored_strategy():
+  if context.num_gpus() >= 1:
+    return mirrored_strategy.MirroredStrategy(['cpu:0', 'gpu:0'])
+  else:
+    return mirrored_strategy.MirroredStrategy(['cpu:0'])
+
+
+TESTCASES = ({
+    'testcase_name': 'base',
+    'strategy_fn': create_one_device_strategy
+}, {
+    'testcase_name': 'distribute',
+    'strategy_fn': create_mirrored_strategy
+})
+
+
+class KerasLayerTest(test.TestCase, parameterized.TestCase):
+  """Test mixed precision with Keras layers."""
+
+  @parameterized.named_parameters(*TESTCASES)
+  @test_util.run_in_graph_and_eager_modes
+  def test_variables_in_float32(self, strategy_fn):
+    x = constant_op.constant([1.], dtype=dtypes.float16)
+    with strategy_fn().scope():
+      with policy.policy_scope('infer_float32_vars'):
+        layer = AddLayer(assert_type=dtypes.float16)
+        y = layer(x)
+        self.assertEqual(layer.v.dtype, dtypes.float32)
+        self.assertEqual(y.dtype, dtypes.float16)
+        self.evaluate(variables.global_variables_initializer())
+        self.assertEqual(self.evaluate(y), 2.)
+
+  @parameterized.named_parameters(*TESTCASES)
+  @test_util.run_in_graph_and_eager_modes
+  def test_layer_with_non_autocast_variable(self, strategy_fn):
+    x = constant_op.constant([1.], dtype=dtypes.float16)
+    with strategy_fn().scope():
+      with policy.policy_scope('infer_float32_vars'):
+        layer = AddLayerWithoutAutoCast(assert_type=dtypes.float16)
+        y = layer(x)
+        self.assertEqual(layer.v.dtype, dtypes.float32)
+        self.assertEqual(y.dtype, dtypes.float16)
+        self.evaluate(variables.global_variables_initializer())
+        self.assertEqual(self.evaluate(y), 2.)
+
+  @parameterized.named_parameters(*TESTCASES)
+  @test_util.run_in_graph_and_eager_modes
+  def test_layer_regularizer_runs_in_float32(self, strategy_fn):
+    x = constant_op.constant([1.], dtype=dtypes.float16)
+    with strategy_fn().scope():
+      with policy.policy_scope('infer_float32_vars'):
+        # Test on AddLayer
+        layer = AddLayer(assert_type=dtypes.float16,
+                         regularizer=IdentityRegularizer())
+        layer(x)
+        (regularizer_loss,) = layer.losses
+        self.assertEqual(regularizer_loss.dtype, dtypes.float32)
+        self.evaluate(variables.global_variables_initializer())
+        self.assertEqual(self.evaluate(regularizer_loss), 1.)
+
+        # Test on AddLayerWithoutAutoCast
+        layer = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
+                                        regularizer=IdentityRegularizer())
+        layer(x)
+        (regularizer_loss,) = layer.losses
+        self.assertEqual(regularizer_loss.dtype, dtypes.float32)
+        self.evaluate(variables.global_variables_initializer())
+        self.assertEqual(self.evaluate(regularizer_loss), 1.)
+
+  @parameterized.named_parameters(*TESTCASES)
+  @test_util.run_in_graph_and_eager_modes
+  def test_passing_policy_to_layer(self, strategy_fn):
+    x = constant_op.constant([1.], dtype=dtypes.float16)
+    with strategy_fn().scope():
+      # Passing a Policy to 'dtype' sets the policy for that layer.
+      layer = AddLayer(assert_type=dtypes.float16,
+                       dtype=policy.Policy('infer_float32_vars'))
+      # layer.dtype refers to the variable dtype
+      self.assertEqual(layer.dtype, dtypes.float32)
+      layer(x)
+      self.assertEqual(layer.v.dtype, dtypes.float32)
+      with policy.policy_scope('infer_float32_vars'):
+        # Passing a Policy to dtype overrides the global Policy
+        layer = AddLayer(assert_type=dtypes.float16,
+                         dtype=policy.Policy('infer'))
+        # layer dtype is not yet known
+        self.assertEqual(layer.dtype, None)
+        layer(x)
+        self.assertEqual(layer.v.dtype, dtypes.float16)
+        self.assertEqual(layer.dtype, dtypes.float16)
+
+  @parameterized.named_parameters(*TESTCASES)
+  @test_util.run_in_graph_and_eager_modes
+  def test_gradient(self, strategy_fn):
+    x = constant_op.constant([1.], dtype=dtypes.float16)
+    with strategy_fn().scope() as strategy:
+      with policy.policy_scope('infer_float32_vars'):
+        layer = AddLayer(assert_type=dtypes.float16)
+        def run_fn():
+          with backprop.GradientTape() as tape:
+            y = layer(x)
+            # Divide by num_replicas_in_sync, as the effective total loss is the
+            # sum of each of the replica's losses.
+            y /= strategy.num_replicas_in_sync
+
+          # Learning rate is small enough that if applied to a float16 variable,
+          # the variable will not change. So this tests the learning rate is not
+          # applied to a float16 value, but instead the float32 variable.
+          opt = gradient_descent.SGD(2 ** -14)
+          grad = tape.gradient(y, layer.v)
+          return opt.apply_gradients([(grad, layer.v)])
+
+        op = strategy.experimental_run(run_fn)
+        if not context.executing_eagerly():
+          self.evaluate(variables.global_variables_initializer())
+          self.evaluate(op)
+        # The gradient with respective to the variable is 1. Since the
+        # variable is initialized with 1 and the learning rate is 2**-14, the
+        # new variable value should be: init_val - gradient * learning_rate,
+        # which is  1 - 1 * 2**-14
+        self.assertEqual(self.evaluate(layer.v), 1 - 2 ** -14)
+
+
+class KerasModelTest(test.TestCase, parameterized.TestCase):
+  """Test mixed precision with Keras models."""
+
+  @parameterized.named_parameters({
+      'testcase_name': 'base',
+      'strategy_fn': create_one_device_strategy,
+  }, {
+      'testcase_name': 'distribute',
+      'strategy_fn': create_mirrored_strategy,
+  }, {
+      'testcase_name': 'operator',
+      'strategy_fn': create_mirrored_strategy,
+      'use_operator': True
+  }, {
+      'testcase_name': 'regularizer',
+      'strategy_fn': create_mirrored_strategy,
+      'use_regularizer': True
+  })
+  @test_util.run_in_graph_and_eager_modes
+  def test_model(self, strategy_fn, use_operator=False, use_regularizer=False):
+    regularizer = IdentityRegularizer() if use_regularizer else None
+    with strategy_fn().scope():
+      with policy.policy_scope('infer_float32_vars'):
+        x = layers.Input(shape=(), batch_size=2, dtype=dtypes.float16)
+        layer = AddLayer(assert_type=dtypes.float16, use_operator=use_operator,
+                         regularizer=regularizer)
+        y = layer(x)
+        y = math_ops.cast(y, dtypes.float32)
+        model = models.Model(inputs=x, outputs=y)
+
+        def loss_fn(y_true, y_pred):
+          del y_true
+          return math_ops.reduce_mean(y_pred)
+
+        # Learning rate is small enough that if applied to a float16 variable,
+        # the variable will not change. So this tests the learning rate not
+        # applied to a float16 value, but instead the float32 variable.
+        opt = gradient_descent.SGD(2 ** -14)
+        model.compile(opt, loss=loss_fn)
+
+      self.assertEqual(backend.eval(layer.v), 1)
+      x = np.ones((2, 1))
+      y = np.ones((2, 1))
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
+      model.fit(dataset)
+      # Variable starts at 1, and should have gradient of 2 ** -14 subtracted
+      # from it.
+      expected = 1 - 2 ** -14
+      if use_regularizer:
+        # Regularizer adds another 2 ** -14 to the gradient.
+        expected -= 2 ** -14
+      self.assertEqual(backend.eval(layer.v), expected)
+
+  @parameterized.named_parameters({
+      'testcase_name': 'base',
+      'strategy_fn': create_one_device_strategy,
+  }, {
+      'testcase_name': 'distribute',
+      'strategy_fn': create_mirrored_strategy,
+  })
+  @test_util.run_in_graph_and_eager_modes
+  def test_advanced_model(self, strategy_fn):
+
+    # The advanced model tests mixed-precision-related features that would occur
+    # in a resnet50 model. It tests a model that has:
+    #  * Multiple layers, some which use auto-cast variables and some which do
+    #    not
+    #  * Regularization on some variables and not others.
+
+    strategy = strategy_fn()
+
+    learning_rate = 2 ** -14
+
+    with strategy.scope():
+      with policy.policy_scope(policy.Policy('infer_float32_vars')):
+        x = layers.Input(shape=(), batch_size=2, dtype=dtypes.float16)
+        layer1 = AddLayer(assert_type=dtypes.float16,
+                          regularizer=IdentityRegularizer(), use_operator=True)
+        layer2 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
+                                         use_operator=True)
+        layer3 = AddLayer(assert_type=dtypes.float16, use_operator=False)
+        layer4 = AddLayerWithoutAutoCast(assert_type=dtypes.float16,
+                                         regularizer=IdentityRegularizer(),
+                                         use_operator=False)
+        y = layer1(x)
+        y = layer2(y)
+        y = layer3(y)
+        y = layer4(y)
+        y = math_ops.cast(y, dtypes.float32)
+        model = models.Model(inputs=x, outputs=y)
+
+        def loss_fn(y_true, y_pred):
+          self.assertEqual(y_true.dtype, dtypes.float32)
+          self.assertEqual(y_pred.dtype, dtypes.float32)
+          return math_ops.reduce_mean(y_pred)
+
+        opt = gradient_descent.SGD(learning_rate)
+        model.compile(opt, loss=loss_fn)
+
+      x = np.ones((2, 1))
+      y = np.ones((2, 1))
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(2)
+      model.fit(dataset)
+      for layer in (layer1, layer2, layer3, layer4):
+        if layer.losses:
+          # Layer has weight regularizer
+          self.assertEqual(backend.eval(layer.v), 1 - 2 * learning_rate)
+        else:
+          # Layer does not have weight regularizer
+          self.assertEqual(backend.eval(layer.v), 1 - learning_rate)
+
+
+if __name__ == '__main__':
+  test.main()
--- a/tensorflow/python/keras/mixed_precision/experimental/policy.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/policy.py
@ -0,0 +1,160 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains the Policy class for mixed precision training."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+
+from tensorflow.python.util.tf_export import keras_export
+
+
+@keras_export('keras.mixed_precision.experimental.Policy')
+class Policy(object):
+  """A mixed precision policy for a Keras layer.
+
+  A mixed precision policy determines the floating-point dtype that Keras layers
+  should create variables in. For non-default policies, if the variable dtype
+  does not match the input dtype, variables will automatically be casted to the
+  input dtype to avoid type errors. Policies can be passed to the 'dtype'
+  argument of layer constructors, or a global policy can be set with
+  'set_policy'.
+
+  In the near future, policies will also determine the computation dtype of
+  layers, as well as the loss scaling algorithm.
+
+  Policies are intended to enable mixed precision training, which require using
+  float32 variables and [b]float16 computations for most layers. The term "mixed
+  precision" refers to the use of both float16 (or bfloat16) and float32 in a
+  model. See https://arxiv.org/abs/1710.03740 for more information on mixed
+  precision training.
+
+  Policies are constructed by passing a string to the `name` constructor
+  argument. `name` determines the behavior of the policy. Currently, `name` can
+  be one of the following values.
+
+    * 'infer': Infer the variable and computation dtypes from the input dtype.
+      This is the default behavior.
+    * 'infer_float32_vars': Infer the computation dtypes from the input
+      dtype, but create variables in float32. Variables will be casted to the
+      computation dtype. This is intended to enable mixed precision. Users can
+      cast tensors to float16 before passing them to a layer, which causes the
+      layer to run it's computation in float16 while keeping variables in
+      float32.
+
+  To use mixed precision in a model, the 'infer_float32_vars' policy can be used
+  alongside float16 input tensors, which results in float16 computations and
+  float32 variables. For example:
+
+  ```python
+  tf.keras.mixed_precision.experimental.set_policy('infer_float32_vars')
+  model = tf.keras.models.Sequential(
+      tf.keras.layers.Input((100,), dtype='float16'),
+      tf.keras.layers.Dense(10),
+      tf.keras.layers.Dense(10),
+      tf.keras.layers.Lambda(lambda x: tf.cast(x, 'float32')),
+      tf.keras.layers.Activation('Softmax')
+  )
+  ```
+
+  Alternatively, the policy can be passed to individual layers instead of
+  setting the global policy with `set_policy`:
+
+  ```python
+  policy = tf.keras.mixed_precision.experimental.Policy('infer_float32_vars')
+  model = tf.keras.models.Sequential(
+      tf.keras.layers.Input((100,), dtype='float16'),
+      tf.keras.layers.Dense(10, dtype=policy),
+      tf.keras.layers.Dense(10, dtype=policy),
+      tf.keras.layers.Lambda(lambda x: tf.cast(x, 'float32')),
+      tf.keras.layers.Activation('Softmax')
+  )
+  ```
+  """
+
+  def __init__(self, name):
+    self._name = name
+    if name == 'infer':
+      self._default_variable_dtype = None
+    elif name == 'infer_float32_vars':
+      self._default_variable_dtype = 'float32'
+    else:
+      raise ValueError('"name" argument to Policy constructor must be "infer" '
+                       'or "infer_float32_vars", but got: %s' % name)
+
+  @property
+  def name(self):
+    """Returns the name of the policy: "infer" or "infer_float32_vars."""
+    return self._name
+
+  @property
+  def default_variable_dtype(self):
+    """Returns the default variable dtype of this policy.
+
+    This is the dtype layers will create their variables in, unless a layer
+    explicit chooses a different dtype. Layers will cast variables to the
+    appropriate dtype to avoid type errors.
+
+    Returns:
+      The default variable dtype of this policy, or None if the default variable
+      dtype should be derived from the inputs.
+    """
+    return self._default_variable_dtype
+
+  @property
+  def should_cast_variables(self):
+    """Returns true if variables should be casted."""
+    return self.default_variable_dtype is not None
+
+  # TODO(reedwm): Implement get_config/from_config.
+
+
+# TODO(reedwm): Make this thread local?
+_global_policy = Policy('infer')
+
+
+@keras_export('keras.mixed_precision.experimental.global_policy')
+def global_policy():
+  """Returns the global Policy.
+
+  The global policy is the default policy used for layers, if no policy is
+  passed to the layer constructor. When TensorFlow starts, the global policy is
+  set to an "infer" policy, and can be changed with `set_policy`.
+
+  Returns:
+    The global Policy.
+  """
+  return _global_policy
+
+
+@keras_export('keras.mixed_precision.experimental.set_policy')
+def set_policy(policy):
+  """Sets the global Policy."""
+  global _global_policy
+  if not isinstance(policy, Policy):
+    policy = Policy(policy)
+  _global_policy = policy
+
+
+# TODO(reedwm): Make this thread local
+@contextlib.contextmanager
+def policy_scope(policy):
+  old_policy = _global_policy
+  try:
+    set_policy(policy)
+    yield
+  finally:
+    set_policy(old_policy)
--- a/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
@ -0,0 +1,69 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests Policies."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.mixed_precision.experimental import policy as mp_policy
+from tensorflow.python.platform import test
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class PolicyTest(test.TestCase):
+  """Tests Policies."""
+
+  def test_infer(self):
+    policy = mp_policy.Policy('infer')
+    self.assertEqual(policy.name, 'infer')
+    self.assertEqual(policy.default_variable_dtype, None)
+
+  def test_infer_float32_vars(self):
+    policy = mp_policy.Policy('infer_float32_vars')
+    self.assertEqual(policy.name, 'infer_float32_vars')
+    self.assertEqual(policy.default_variable_dtype, 'float32')
+
+  def test_global_policy(self):
+    self.assertEqual(mp_policy.global_policy().name, 'infer')
+    default_policy = mp_policy.global_policy()
+    try:
+      mp_policy.set_policy('infer_float32_vars')
+      self.assertEqual(mp_policy.global_policy().name, 'infer_float32_vars')
+      self.assertEqual(mp_policy.global_policy().default_variable_dtype,
+                       'float32')
+      with ops.Graph().as_default():  # Policies are not associated with a graph
+        self.assertEqual(mp_policy.global_policy().name, 'infer_float32_vars')
+      mp_policy.set_policy('infer')
+      self.assertEqual(mp_policy.global_policy().name, 'infer')
+      self.assertEqual(mp_policy.global_policy().default_variable_dtype, None)
+      policy = mp_policy.Policy('infer_float32_vars')
+      mp_policy.set_policy(policy)
+      self.assertIs(mp_policy.global_policy(), policy)
+    finally:
+      mp_policy.set_policy(default_policy)
+
+  def test_policy_scope(self):
+    with mp_policy.policy_scope('infer_float32_vars'):
+      self.assertEqual(mp_policy.global_policy().name, 'infer_float32_vars')
+      with mp_policy.policy_scope('infer'):
+        self.assertEqual(mp_policy.global_policy().name, 'infer')
+      self.assertEqual(mp_policy.global_policy().name, 'infer_float32_vars')
+    self.assertEqual(mp_policy.global_policy().name, 'infer')
+
+if __name__ == '__main__':
+  test.main()
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@ -307,7 +307,8 @@ class Layer(base_layer.Layer):
                 use_resource=None,
                 synchronization=vs.VariableSynchronization.AUTO,
                 aggregation=vs.VariableAggregation.NONE,
-                 partitioner=None):
+                 partitioner=None,
+                 **kwargs):
    """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
@ -342,6 +343,7 @@ class Layer(base_layer.Layer):
        `tf.variable_axis_size_partitioner`.  For more details, see the
        documentation of `tf.get_variable` and the  "Variable Partitioners
        and Sharding" section of the API guide.
+      **kwargs: Additional keyword arguments.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
@ -354,6 +356,9 @@ class Layer(base_layer.Layer):
      ValueError: When trainable has been set to True with synchronization
        set as `ON_READ`.
    """
+    for kwarg in kwargs:
+      if kwarg != 'experimental_autocast':
+        raise TypeError('Unknown keyword argument:', kwarg)
    if self._keras_style:
      return super(Layer, self).add_weight(
          name=name,
@ -366,7 +371,8 @@ class Layer(base_layer.Layer):
          use_resource=use_resource,
          synchronization=vs.VariableSynchronization.AUTO,
          aggregation=vs.VariableAggregation.NONE,
-          partitioner=partitioner)
+          partitioner=partitioner,
+          **kwargs)

    if synchronization == vs.VariableSynchronization.ON_READ:
      if trainable:
@ -433,7 +439,8 @@ class Layer(base_layer.Layer):
            use_resource=use_resource,
            synchronization=synchronization,
            aggregation=aggregation,
-            getter=vs.get_variable)
+            getter=vs.get_variable,
+            **kwargs)

        if regularizer:
          if (ops.executing_eagerly_outside_functions()
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@ -90,6 +90,8 @@ KERAS_API_INIT_FILES = [
    "keras/layers/experimental/__init__.py",
    "keras/losses/__init__.py",
    "keras/metrics/__init__.py",
+    "keras/mixed_precision/__init__.py",
+    "keras/mixed_precision/experimental/__init__.py",
    "keras/models/__init__.py",
    "keras/optimizers/__init__.py",
    "keras/optimizers/schedules/__init__.py",
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@ -114,6 +114,8 @@ KERAS_API_INIT_FILES_V1 = [
    "keras/layers/experimental/__init__.py",
    "keras/losses/__init__.py",
    "keras/metrics/__init__.py",
+    "keras/mixed_precision/__init__.py",
+    "keras/mixed_precision/experimental/__init__.py",
    "keras/models/__init__.py",
    "keras/optimizers/__init__.py",
    "keras/optimizers/schedules/__init__.py",
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
@ -0,0 +1,21 @@
+path: "tensorflow.keras.mixed_precision.experimental.Policy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.mixed_precision.experimental.policy.Policy\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "default_variable_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "should_cast_variables"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None"
+  }
+}
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.pbtxt
@ -0,0 +1,15 @@
+path: "tensorflow.keras.mixed_precision.experimental"
+tf_module {
+  member {
+    name: "Policy"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "global_policy"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_policy"
+    argspec: "args=[\'policy\'], varargs=None, keywords=None, defaults=None"
+  }
+}
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.pbtxt
@ -0,0 +1,7 @@
+path: "tensorflow.keras.mixed_precision"
+tf_module {
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
+}
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.pbtxt
@ -56,6 +56,10 @@ tf_module {
    name: "metrics"
    mtype: "<type \'module\'>"
  }
+  member {
+    name: "mixed_precision"
+    mtype: "<type \'module\'>"
+  }
  member {
    name: "models"
    mtype: "<type \'module\'>"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
@ -118,7 +118,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
@ -118,7 +118,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
@ -116,7 +116,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
@ -116,7 +116,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
@ -116,7 +116,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
@ -114,7 +114,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
@ -117,7 +117,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
@ -118,7 +118,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
@ -118,7 +118,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.experimental.nn.-t-f-lite-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.experimental.nn.-t-f-lite-l-s-t-m-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.experimental.nn.-tf-lite-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.experimental.nn.-tf-lite-r-n-n-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
@ -124,7 +124,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
@ -129,7 +129,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
@ -124,7 +124,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
@ -123,7 +123,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
@ -0,0 +1,21 @@
+path: "tensorflow.keras.mixed_precision.experimental.Policy"
+tf_class {
+  is_instance: "<class \'tensorflow.python.keras.mixed_precision.experimental.policy.Policy\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "default_variable_dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "should_cast_variables"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None"
+  }
+}
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.pbtxt
@ -0,0 +1,15 @@
+path: "tensorflow.keras.mixed_precision.experimental"
+tf_module {
+  member {
+    name: "Policy"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "global_policy"
+    argspec: "args=[], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "set_policy"
+    argspec: "args=[\'policy\'], varargs=None, keywords=None, defaults=None"
+  }
+}
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.pbtxt
@ -0,0 +1,7 @@
+path: "tensorflow.keras.mixed_precision"
+tf_module {
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
+}
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.pbtxt
@ -56,6 +56,10 @@ tf_module {
    name: "metrics"
    mtype: "<type \'module\'>"
  }
+  member {
+    name: "mixed_precision"
+    mtype: "<type \'module\'>"
+  }
  member {
    name: "models"
    mtype: "<type \'module\'>"
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.experimental.nn.-t-f-lite-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.experimental.nn.-t-f-lite-l-s-t-m-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.experimental.nn.-tf-lite-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.experimental.nn.-tf-lite-r-n-n-cell.pbtxt
@ -125,7 +125,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.-dropout-wrapper.pbtxt
@ -132,7 +132,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.-residual-wrapper.pbtxt
@ -128,7 +128,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
@ -124,7 +124,7 @@ tf_class {
  }
  member_method {
    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\', \'partitioner\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\', \'None\'], "
  }
  member_method {
    name: "apply"