Support Layer.get_config with a mixed_[b]float16 dtype policy.

Note saving a model with Model.save() is still unsupported. PiperOrigin-RevId: 272759486
2019-10-03 15:52:24 -07:00 · 2019-10-03 15:52:24 -07:00 · 3c30766e1b
commit 3c30766e1b
parent 0c25833dfe
7 changed files with 317 additions and 8 deletions
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@ -468,8 +468,10 @@ class Layer(module.Module):
    config = {'name': self.name, 'trainable': self.trainable}
    if hasattr(self, '_batch_input_shape'):
      config['batch_input_shape'] = self._batch_input_shape
    # TODO(reedwm): Remove the hasattr(self, 'dtype') check. All layers have a
    # dtype.
    if hasattr(self, 'dtype'):
-      config['dtype'] = self.dtype
+      config['dtype'] = policy.serialize(self._dtype_policy)
    if hasattr(self, 'dynamic'):
      # Only include `dynamic` in the `config` if it is `True`
      if self.dynamic:
@ -484,7 +486,6 @@ class Layer(module.Module):
    if len(extra_args) > 1 and hasattr(self.get_config, '_is_default'):
      raise NotImplementedError('Layers with arguments in `__init__` must '
                                'override `get_config`.')
    # TODO(reedwm): Handle serializing self._dtype_policy.
    return config
  @classmethod
@ -1677,6 +1678,8 @@ class Layer(module.Module):
    """Sets self._dtype_policy."""
    if isinstance(dtype, policy.Policy):
      self._dtype_policy = dtype
    elif isinstance(dtype, dict):
      self._dtype_policy = policy.deserialize(dtype)
    elif dtype:
      self._dtype_policy = policy.Policy(dtypes.as_dtype(dtype).name)
    else:
--- a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
@ -94,6 +94,9 @@ class AddLayer(AssertTypeLayer):
      **kwargs: Passed to AssertTypeLayer constructor.
    """
    self._regularizer = regularizer
    if isinstance(regularizer, dict):
      self._regularizer = regularizers.deserialize(regularizer,
                                                   custom_objects=globals())
    self._use_operator = use_operator
    self._var_name = var_name
    super(AddLayer, self).__init__(**kwargs)
@ -116,8 +119,7 @@ class AddLayer(AssertTypeLayer):
  def get_config(self):
    config = super(AddLayer, self).get_config()
-    assert self._regularizer is None, (
+    config['regularizer'] = regularizers.serialize(self._regularizer)
        'regularizer must be None to get config for AddLayer')
    config['use_operator'] = self._use_operator
    config['var_name'] = self._var_name
    config['assert_type'] = self._assert_type
@ -159,6 +161,9 @@ class IdentityRegularizer(regularizers.Regularizer):
    assert x.dtype == dtypes.float32
    return array_ops.identity(x)
  def get_config(self):
    return {}
 # If called outside any strategy.scope() calls, this will return the default
 # strategy.
@ -429,6 +434,83 @@ class KerasLayerTest(keras_parameterized.TestCase):
    self._test_checkpointing_layer_weights(
        strategy_fn, mixed_prec_when_saving=False, mixed_prec_when_loading=True)
  @parameterized.named_parameters(*TESTCASES)
  @test_util.run_in_graph_and_eager_modes
  @testing_utils.enable_v2_dtype_behavior
  def test_config(self, strategy_fn):
    x = constant_op.constant([1.], dtype=dtypes.float16)
    with strategy_fn().scope():
      for layer, dtype in (
          (AddLayer(), 'float32'),
          (AddLayer(dtype='float64'), 'float64'),
          (AddLayer(dtype=policy.Policy('float64')), 'float64')):
        config = layer.get_config()
        self.assertEqual(config['dtype'], dtype)
        self.assertIsInstance(config['dtype'], str)
        layer = AddLayer.from_config(config)
        self.assertEqual(layer.dtype, dtype)
        self.assertEqual(layer(x).dtype, dtype)
        self.assertEqual(layer.v.dtype, dtype)
      layer = AddLayer(dtype=policy.Policy('mixed_float16'))
      config = layer.get_config()
      self.assertEqual(config['dtype'],
                       {'class_name': 'Policy',
                        'config': {'name': 'mixed_float16'}})
      layer = AddLayer.from_config(config)
      self.assertEqual(layer.dtype, 'float32')
      self.assertEqual(layer(x).dtype, 'float16')
      self.assertEqual(layer.v.dtype, 'float32')
      layer = AddLayer(dtype=policy.Policy('mixed_float16', loss_scale=None))
      config = layer.get_config()
      self.assertEqual(config['dtype'],
                       {'class_name': 'Policy',
                        'config': {'name': 'mixed_float16',
                                   'loss_scale': None}})
      layer = AddLayer.from_config(config)
      self.assertEqual(layer.dtype, 'float32')
      self.assertEqual(layer(x).dtype, 'float16')
      self.assertEqual(layer.v.dtype, 'float32')
      layer = AddLayer(dtype=policy.Policy('float64', loss_scale=2.))
      config = layer.get_config()
      self.assertEqual(config['dtype'],
                       {'class_name': 'Policy',
                        'config': {'name': 'float64',
                                   'loss_scale': {
                                       'class_name': 'FixedLossScale',
                                       'config': {'loss_scale_value': 2.0}}}})
      layer = AddLayer.from_config(config)
      self.assertEqual(layer.dtype, 'float64')
      self.assertEqual(layer(x).dtype, 'float64')
      self.assertEqual(layer.v.dtype, 'float64')
      layer = AddLayer(dtype=policy.Policy('infer'))
      config = layer.get_config()
      self.assertIsNone(config['dtype'])
      layer = AddLayer.from_config(config)
      # If a layer is serialized with the "infer" policy, when deserialized into
      # TF 2 it will have the global policy instead of "infer". This is because
      # "infer" is serialized into None, and passing dtype=None in TensorFlow 2
      # indicates to use the global policy.
      self.assertEqual(layer.dtype, 'float32')
      self.assertEqual(layer(x).dtype, 'float32')
      self.assertEqual(layer.v.dtype, 'float32')
      layer = AddLayer(dtype=policy.Policy('infer', loss_scale=2.))
      config = layer.get_config()
      self.assertEqual(config['dtype'],
                       {'class_name': 'Policy',
                        'config': {'name': 'infer',
                                   'loss_scale': {
                                       'class_name': 'FixedLossScale',
                                       'config': {'loss_scale_value': 2.0}}}})
      layer = AddLayer.from_config(config)
      self.assertEqual(layer.dtype, None)
      self.assertEqual(layer(x).dtype, 'float16')
      self.assertEqual(layer.v.dtype, 'float16')
  @test_util.run_in_graph_and_eager_modes
  def test_delete_variable(self):
    layer = base_layer.Layer(dtype=policy.Policy('mixed_float16'))
@ -485,6 +567,15 @@ class KerasModelTest(keras_parameterized.TestCase):
          'testcase_name': 'infer',
          'strategy_fn': create_mirrored_strategy,
          'policy_name': 'mixed_float16'
      }, {
          'testcase_name': 'get_config',
          'strategy_fn': create_mirrored_strategy,
          'get_config': True
      }, {
          'testcase_name': 'get_config_with_regularizer',
          'strategy_fn': create_mirrored_strategy,
          'get_config': True,
          'use_regularizer': True,
      }, {
          'testcase_name': 'norun_distributed',
          'strategy_fn': create_mirrored_strategy,
@ -496,6 +587,7 @@ class KerasModelTest(keras_parameterized.TestCase):
                 use_operator=False,
                 use_regularizer=False,
                 policy_name='mixed_float16',
                 get_config=False,
                 experimental_run_tf_function=True):
    if not self._is_strategy_supported(strategy_fn, check_model_type=True):
      return
@ -521,6 +613,12 @@ class KerasModelTest(keras_parameterized.TestCase):
        layer_list += [layer, cast_f32_layer]
        model = testing_utils.get_model_from_layers(
            layer_list, input_shape=(1,), input_dtype=dtypes.float16)
        if get_config:
          config = model.get_config()
          model = model.__class__.from_config(
              config, custom_objects={'AddLayer': AddLayer})
          (layer,) = (layer for layer in model.layers
                      if isinstance(layer, AddLayer))
        def loss_fn(y_true, y_pred):
          del y_true
@ -703,6 +801,15 @@ class KerasModelTest(keras_parameterized.TestCase):
          'testcase_name': 'pass_loss_scale_to_policy',
          'strategy_fn': create_mirrored_strategy,
          'pass_loss_scale_to_policy': True,
      }, {
          'testcase_name': 'get_config',
          'strategy_fn': create_mirrored_strategy,
          'get_config': True,
      }, {
          'testcase_name': 'get_config_and_pass_loss_scale_to_policy',
          'strategy_fn': create_mirrored_strategy,
          'get_config': True,
          'pass_loss_scale_to_policy': True,
      }, {
          'testcase_name': 'norun_distributed',
          'strategy_fn': create_mirrored_strategy,
@ -711,6 +818,7 @@ class KerasModelTest(keras_parameterized.TestCase):
  def test_dynamic_loss_scaling(self,
                                strategy_fn,
                                pass_loss_scale_to_policy=False,
                                get_config=False,
                                experimental_run_tf_function=True):
    if not self._is_strategy_supported(strategy_fn):
      return
@ -746,6 +854,12 @@ class KerasModelTest(keras_parameterized.TestCase):
        y = core.Lambda(identity_with_grad_check_fn)(y)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)
        if get_config:
          config = model.get_config()
          model = model.__class__.from_config(
              config, custom_objects={'AddLayer': AddLayer})
          (layer,) = (layer for layer in model.layers
                      if isinstance(layer, AddLayer))
        def loss_fn(y_true, y_pred):
          del y_true
@ -980,6 +1094,8 @@ class KerasModelTest(keras_parameterized.TestCase):
  def test_save_model_with_dynamic_loss_scaling(self, strategy_fn, h5=False):
    if not self._is_strategy_supported(strategy_fn):
      return
    # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
    # as well.
    strategy = strategy_fn()
    if (isinstance(strategy, mirrored_strategy.MirroredStrategy) and
        not context.executing_eagerly()):
--- a/tensorflow/python/keras/mixed_precision/experimental/policy.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/policy.py
@ -24,8 +24,9 @@ import six
 from tensorflow.python.framework import dtypes
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.keras.mixed_precision.experimental import loss_scale as keras_loss_scale_module
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.training.experimental import mixed_precision_global_state
 from tensorflow.python.util.tf_export import keras_export
@ -306,12 +307,15 @@ class Policy(object):
    if loss_scale == USE_DEFAULT:
      loss_scale = 'dynamic' if name == 'mixed_float16' else None
      self._using_default_loss_scale = True
    else:
      self._using_default_loss_scale = False
    if loss_scale and self._compute_dtype not in (None, 'float16'):
      tf_logging.warn('Creating a Policy with a loss scale is only useful for '
                      'float16 policies. You passed loss_scale=%r for policy '
                      '%s. Consider not passing any loss_scale instead.' %
                      (loss_scale, name))
-    self._loss_scale = loss_scale_module.get(loss_scale)
+    self._loss_scale = keras_loss_scale_module.get(loss_scale)
  def _parse_name(self, name):
    """Parses a Policy name into a compute and variable dtype.
@ -426,6 +430,25 @@ class Policy(object):
  def __repr__(self):
    return '<Policy "%s", loss_scale=%s>' % (self._name, self.loss_scale)
  def get_config(self):
    config = {
        'name': self.name
    }
    if not self._using_default_loss_scale:
      # We only include the loss scale if the default loss scale is not used.
      # This allows us to change the loss scale config format without breaking
      # users who use the default loss scale.
      config['loss_scale'] = keras_loss_scale_module.serialize(self.loss_scale)
    return config
  @classmethod
  def from_config(cls, config, custom_objects=None):
    if 'loss_scale' in config and isinstance(config['loss_scale'], dict):
      config = config.copy()
      config['loss_scale'] = keras_loss_scale_module.deserialize(
          config['loss_scale'], custom_objects=custom_objects)
    return cls(**config)
 def with_input_dtype(policy, dtype):
  """Copies "infer" `policy`, adding `dtype` to it.
@ -556,3 +579,55 @@ def policy_scope(policy):
    yield
  finally:
    set_policy(old_policy)
 def _is_convertible_to_dtype(dtype):
  try:
    dtypes.as_dtype(dtype)
    return True
  except TypeError:
    return False
 def _policy_equivalent_to_dtype(policy):
  """Returns True if the Policy is equivalent to a single dtype.
  A policy is equivalent to a single dtype if the policy's compute and variable
  dtypes are the same and the policy does not cause the layer/model to have
  additional behavior, such as loss scaling.
  The "infer" policy is considered equivalent to a single dtype.
  Args:
    policy: A Policy.
  Returns:
    True, if the policy is equivalent to a single dtype.
  """
  # We use type() instead of isinstance because a sublcass of Policy is never
  # equivalent to a dtype.
  return (type(policy) == Policy and  # pylint: disable=unidiomatic-typecheck
          list(policy.get_config().keys()) == ['name'] and
          (policy.name == 'infer' or _is_convertible_to_dtype(policy.name)))
 def serialize(policy):
  if _policy_equivalent_to_dtype(policy):
    # We return either None or the policy name for compatibility with older
    # versions of Keras. If the policy name is returned, it is a dtype string
    # such as 'float32'.
    return None if policy.name == 'infer' else policy.name
  return generic_utils.serialize_keras_object(policy)
 def deserialize(config, custom_objects=None):
  if isinstance(config, str) and _is_convertible_to_dtype(config):
    return Policy(config)
  if config is None:
    return Policy('infer')
  module_objects = {'Policy': Policy}
  return generic_utils.deserialize_keras_object(
      config,
      module_objects=module_objects,
      custom_objects=custom_objects,
      printable_module_name='dtype policy')
--- a/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/policy_test.py
@ -227,6 +227,97 @@ class PolicyTest(test.TestCase):
                       'infer_with_float32_vars')
    self.assertEqual(mp_policy.global_policy().name, default_policy)
  @testing_utils.enable_v2_dtype_behavior
  def test_config(self):
    for policy in (
        mp_policy.Policy('float16'),
        mp_policy.Policy('float32'),
        mp_policy.Policy('int16'),
        mp_policy.Policy('mixed_float16'),
        mp_policy.Policy('mixed_bfloat16'),
        mp_policy.Policy('infer'),
        mp_policy.Policy('infer_float32_vars'),
        mp_policy.Policy('float16_with_float32_vars'),
        mp_policy.Policy('float32', loss_scale=2.),
        mp_policy.Policy('float32', loss_scale=None),
        mp_policy.Policy('mixed_float16', loss_scale=2.),
        mp_policy.Policy('mixed_float16', loss_scale=None),
        mp_policy.Policy('mixed_bfloat16', loss_scale=2.),
        mp_policy.Policy('mixed_bfloat16', loss_scale=None),
    ):
      config = policy.get_config()
      new_policy = mp_policy.Policy.from_config(config)
      # Comparing strings is the easiest way to ensure the policies are the
      # same, as policy does not override the == operator.
      self.assertEqual(str(policy), str(new_policy))
  @testing_utils.enable_v2_dtype_behavior
  def test_serialization(self):
    # Test policies that are equivalent to a single dtype
    for policy_name in 'float16', 'float32', 'int8', 'string', 'bool':
      policy = mp_policy.Policy(policy_name)
      config = mp_policy.serialize(policy)
      self.assertEqual(config, policy_name)
      new_policy = mp_policy.deserialize(config)
      self.assertEqual(str(policy), str(new_policy))
    # Test "infer" policy
    policy = mp_policy.Policy('infer')
    config = mp_policy.serialize(policy)
    self.assertIsNone(config)
    new_policy = mp_policy.deserialize(config)
    self.assertEqual(str(policy), str(new_policy))
    class MyPolicy(mp_policy.Policy):
      pass
    # Test policies that do not override the loss scale
    for policy in (
        mp_policy.Policy('mixed_float16'),
        mp_policy.Policy('mixed_bfloat16'),
        mp_policy.Policy('infer_with_float32_vars'),
        mp_policy.Policy('float16_with_float32_vars'),
        MyPolicy('float32')
    ):
      config = mp_policy.serialize(policy)
      self.assertEqual(config, {'class_name': policy.__class__.__name__,
                                'config': {'name': policy.name}})
      new_policy = mp_policy.deserialize(config,
                                         custom_objects={'MyPolicy': MyPolicy})
      self.assertEqual(str(policy), str(new_policy))
    # Test policies that override the loss scale
    for policy in (
        mp_policy.Policy('float32', loss_scale=2.),
        mp_policy.Policy('float32', loss_scale=None),
        mp_policy.Policy('mixed_float16', loss_scale=2.),
        mp_policy.Policy('mixed_float16', loss_scale=None),
        mp_policy.Policy('mixed_bfloat16', loss_scale=2.),
        mp_policy.Policy('mixed_bfloat16', loss_scale=None),
        mp_policy.Policy('infer_with_float32_vars', loss_scale=2.),
        mp_policy.Policy('infer_with_float32_vars', loss_scale=None),
        mp_policy.Policy('float16_with_float32_vars', loss_scale=2.),
        mp_policy.Policy('float16_with_float32_vars', loss_scale=None),
    ):
      config = mp_policy.serialize(policy)
      expected_loss_scale_config = None
      if policy.loss_scale:
        expected_loss_scale_config = {
            'class_name': 'FixedLossScale',
            'config': {'loss_scale_value': 2.}
        }
      self.assertEqual(
          config, {
              'class_name': policy.__class__.__name__,
              'config': {
                  'name': policy.name,
                  'loss_scale': expected_loss_scale_config
              }
          })
      new_policy = mp_policy.deserialize(
          config, custom_objects={'MyPolicy': MyPolicy})
      self.assertEqual(str(policy), str(new_policy))
  @testing_utils.enable_v2_dtype_behavior
  def test_error_if_graph_rewrite_enabled(self):
    try:
--- a/tensorflow/python/keras/mixed_precision/experimental/test_util.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/test_util.py
@ -43,7 +43,7 @@ def create_identity_with_grad_check_fn(expected_gradient, expected_dtype=None):
    certain value.
  """
  @custom_gradient.custom_gradient
-  def identity_with_grad_check(x):
+  def _identity_with_grad_check(x):
    """Function that asserts it's gradient has a certain value."""
    x = array_ops.identity(x)
    def grad(dx):
@ -57,6 +57,10 @@ def create_identity_with_grad_check_fn(expected_gradient, expected_dtype=None):
        dx = array_ops.identity(dx)
      return dx
    return x, grad
  # Keras sometimes has trouble serializing Lambda layers with a decorated
  # function. So we define and return a non-decorated function.
  def identity_with_grad_check(x):
    return _identity_with_grad_check(x)
  return identity_with_grad_check
@ -77,7 +81,7 @@ def create_identity_with_nan_gradients_fn(have_nan_gradients):
    `have_nan_gradients` is True.
  """
  @custom_gradient.custom_gradient
-  def identity_with_nan_gradients(x):
+  def _identity_with_nan_gradients(x):
    """Function whose gradient is NaN iff `have_nan_gradients` is True."""
    x = array_ops.identity(x)
    def grad(dx):
@ -93,4 +97,8 @@ def create_identity_with_nan_gradients_fn(have_nan_gradients):
          lambda: dx
      )
    return x, grad
  # Keras sometimes has trouble serializing Lambda layers with a decorated
  # function. So we define and return a non-decorated function.
  def identity_with_nan_gradients(x):
    return _identity_with_nan_gradients(x)
  return identity_with_nan_gradients
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
@ -26,4 +26,12 @@ tf_class {
    name: "__init__"
    argspec: "args=[\'self\', \'name\', \'loss_scale\'], varargs=None, keywords=None, defaults=[\'USE_DEFAULT\'], "
  }
  member_method {
    name: "from_config"
    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
  }
  member_method {
    name: "get_config"
    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
  }
 }
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-policy.pbtxt
@ -26,4 +26,12 @@ tf_class {
    name: "__init__"
    argspec: "args=[\'self\', \'name\', \'loss_scale\'], varargs=None, keywords=None, defaults=[\'USE_DEFAULT\'], "
  }
  member_method {
    name: "from_config"
    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
  }
  member_method {
    name: "get_config"
    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
  }
 }