Merge pull request from geetachavan1/cherrypicks_ZX1AI

[Cherrypick:r2.4] Fix issue when using mixed precision with RMSprop.
This commit is contained in:
Mihai Maruseac 2021-01-19 09:21:17 -08:00 committed by GitHub
commit e9c0ef3064
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 81 additions and 21 deletions
tensorflow/python/keras/mixed_precision

View File

@ -57,12 +57,11 @@ class AutoCastVariable(variables.Variable, core.Tensor):
called.
"""
def __init__(self, variable, op=None):
def __init__(self, variable):
"""Creates an AutoCastVariable instance.
Args:
variable: A floating-point resource variable to wrap.
op: Optional operation of this variable.
Raises:
ValueError: If `variable` is not a floating-point resource variable
@ -74,7 +73,11 @@ class AutoCastVariable(variables.Variable, core.Tensor):
raise ValueError('variable must be a floating point variable but has '
'type: %s' % variable.dtype.name)
self._variable = variable
self._op = op
# 'delegate' means AutoCastVariable.op return self._variable.op, which will
# raise an AttributeError in Eager (as intended). If set to any other value,
# AutoCastVariable.op returns that value instead, which is used to set the
# op attribute in AutoCastVariable.assign().
self._op = 'delegate'
def _should_cast(self):
"""Returns True if this variable should be casted when accessed."""
@ -199,10 +202,18 @@ class AutoCastVariable(variables.Variable, core.Tensor):
use_locking=None,
name=None,
read_value=True):
# TODO(b/146181571): This logic can be simplified once
# DistributedVariable.assign returns a DistributedVariable. Currently for
# MirroredStrategy, it returns a Mirrored value.
if ops.executing_eagerly_outside_functions():
assign_op = update_fn(value, use_locking, name, False)
if read_value:
return create_autocast_variable(self._variable, op=assign_op)
# We create a new AutoCastVariable with the same underlying tf.Variable.
# The new AutoCastVariable is identical except the 'op' attribute is
# defined. This matches the behavior of tf.Variable.assign.
var = create_autocast_variable(self._variable)
var._op = assign_op # pylint:disable=protected-access
return var
return assign_op
# Fallback to wrapping the returned variable in graph mode if possible
@ -298,9 +309,9 @@ class AutoCastVariable(variables.Variable, core.Tensor):
@property
def op(self):
if self._op is not None:
return self._op
return self._variable.op
if self._op == 'delegate':
return self._variable.op
return self._op
def _as_graph_element(self):
graph_element = self._variable._as_graph_element() # pylint:disable=protected-access
@ -469,7 +480,7 @@ ops.register_tensor_conversion_function(AutoCastVariable,
AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access
def create_autocast_variable(variable, op=None):
def create_autocast_variable(variable):
"""Creates an AutoCastVariable that wraps another variable.
This typically just returns `AutoCastVariable(variable)`. But, if the variable
@ -481,14 +492,13 @@ def create_autocast_variable(variable, op=None):
Args:
variable: A floating-point resource variable to wrap.
op: Optional operation of this variable.
Returns:
An AutoCastVariable that wraps the variable.
"""
if not isinstance(variable, (distribute_values.DistributedVariable,
ps_distribute_values.AggregatingVariable)):
return AutoCastVariable(variable, op=op)
return AutoCastVariable(variable)
class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
"""An AutoCastVariable that also subclasses from variable.__class__.
@ -511,7 +521,7 @@ def create_autocast_variable(variable, op=None):
).format(v=self)
# pylint: enable=missing-format-attribute
return AutoCastDistributedVariable(variable, op=op)
return AutoCastDistributedVariable(variable)
class enable_auto_cast_variables(object): # pylint:disable=invalid-name

View File

@ -37,7 +37,14 @@ from tensorflow.python.framework import indexed_slices
from tensorflow.python.framework import ops
from tensorflow.python.framework import test_combinations as combinations
from tensorflow.python.keras.mixed_precision import autocast_variable
from tensorflow.python.keras.optimizer_v2 import adadelta
from tensorflow.python.keras.optimizer_v2 import adagrad
from tensorflow.python.keras.optimizer_v2 import adam
from tensorflow.python.keras.optimizer_v2 import adamax
from tensorflow.python.keras.optimizer_v2 import ftrl
from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_v2
from tensorflow.python.keras.optimizer_v2 import nadam
from tensorflow.python.keras.optimizer_v2 import rmsprop
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import state_ops
from tensorflow.python.ops import variables
@ -352,11 +359,28 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
self.assertAllClose(5., self.evaluate(run_assign()))
@ds_combinations.generate(maybe_distribute)
def test_assign_op(self, distribution):
def test_op_attribute(self, distribution):
with distribution.scope():
x = get_var(0., dtypes.float32)
x = autocast_variable.create_autocast_variable(x)
# Variable.op raises an AttributeError in Eager mode and is an op in graph
# mode. Variable.assign(...).op is None in Eager mode and an op in Graph
# mode or a tf.function. We test this is also true of AutoCastVariable.
if context.executing_eagerly():
with self.assertRaisesRegex(
AttributeError,
'Tensor.op is meaningless when eager execution is enabled'):
x.op # pylint: disable=pointless-statement
self.assertIsNone(x.assign(1.0).op)
self.assertIsNone(x.assign_add(1.0).op)
self.assertIsNone(x.assign_sub(1.0).op)
else:
self.assertIsNotNone(x.op)
self.assertIsNotNone(x.assign(1.0).op)
self.assertIsNotNone(x.assign_add(1.0).op)
self.assertIsNotNone(x.assign_sub(1.0).op)
@def_function.function
def func():
self.assertIsNotNone(x.assign(1.0).op)
@ -503,25 +527,51 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
'dtype_to_cast_to=float32 '
'inner_variable=MirroredVariable.*>')
@parameterized.named_parameters(
('v1', gradient_descent_v1.GradientDescentOptimizer),
('v2', gradient_descent_v2.SGD))
def test_optimizer(self, optimizer_class):
@ds_combinations.generate(combinations.combine(
optimizer_class=[
adadelta.Adadelta,
adagrad.Adagrad,
adam.Adam,
adamax.Adamax,
ftrl.Ftrl,
gradient_descent_v2.SGD,
nadam.Nadam,
rmsprop.RMSprop,
gradient_descent_v1.GradientDescentOptimizer
],
use_tf_function=[False, True]))
def test_optimizer(self, optimizer_class, use_tf_function):
if use_tf_function and not context.executing_eagerly():
self.skipTest('Test does not support graph mode with tf.function')
x = get_var(1., dtypes.float32)
x = autocast_variable.create_autocast_variable(x)
opt = optimizer_class(1.)
y = get_var(1., dtypes.float32)
opt = optimizer_class(learning_rate=1.)
@def_function.function
def f():
opt.minimize(lambda: x + 1., var_list=[x])
# Minimize both the AutoCastVariable and the normal tf.Variable. Both
# variables should be updated to the same value.
op = opt.minimize(lambda: x + y, var_list=[x, y])
return None if ops.executing_eagerly_outside_functions() else op
if use_tf_function:
f = def_function.function(f)
if context.executing_eagerly():
f()
else:
op = f() # pylint: disable=assignment-from-no-return
op = f()
self.evaluate(variables.global_variables_initializer())
self.evaluate(op)
self.assertEqual(self.evaluate(x), 0)
# Assert the AutoCastVariable has changed from its initial value
self.assertNotEqual(self.evaluate(x), 1.)
# Assert AutoCastVariable is updated correctly by comparing it to the normal
# variable
self.assertAlmostEqual(self.evaluate(x), self.evaluate(y))
if optimizer_class in (gradient_descent_v2.SGD,
gradient_descent_v1.GradientDescentOptimizer):
# With SGD, the variables decreases by exactly 1
self.assertEqual(self.evaluate(x), 0)
if __name__ == '__main__':