Merge pull request #46349 from geetachavan1/cherrypicks_ZX1AI
[Cherrypick:r2.4] Fix issue when using mixed precision with RMSprop.
This commit is contained in:
commit
e9c0ef3064
tensorflow/python/keras/mixed_precision
@ -57,12 +57,11 @@ class AutoCastVariable(variables.Variable, core.Tensor):
|
||||
called.
|
||||
"""
|
||||
|
||||
def __init__(self, variable, op=None):
|
||||
def __init__(self, variable):
|
||||
"""Creates an AutoCastVariable instance.
|
||||
|
||||
Args:
|
||||
variable: A floating-point resource variable to wrap.
|
||||
op: Optional operation of this variable.
|
||||
|
||||
Raises:
|
||||
ValueError: If `variable` is not a floating-point resource variable
|
||||
@ -74,7 +73,11 @@ class AutoCastVariable(variables.Variable, core.Tensor):
|
||||
raise ValueError('variable must be a floating point variable but has '
|
||||
'type: %s' % variable.dtype.name)
|
||||
self._variable = variable
|
||||
self._op = op
|
||||
# 'delegate' means AutoCastVariable.op return self._variable.op, which will
|
||||
# raise an AttributeError in Eager (as intended). If set to any other value,
|
||||
# AutoCastVariable.op returns that value instead, which is used to set the
|
||||
# op attribute in AutoCastVariable.assign().
|
||||
self._op = 'delegate'
|
||||
|
||||
def _should_cast(self):
|
||||
"""Returns True if this variable should be casted when accessed."""
|
||||
@ -199,10 +202,18 @@ class AutoCastVariable(variables.Variable, core.Tensor):
|
||||
use_locking=None,
|
||||
name=None,
|
||||
read_value=True):
|
||||
# TODO(b/146181571): This logic can be simplified once
|
||||
# DistributedVariable.assign returns a DistributedVariable. Currently for
|
||||
# MirroredStrategy, it returns a Mirrored value.
|
||||
if ops.executing_eagerly_outside_functions():
|
||||
assign_op = update_fn(value, use_locking, name, False)
|
||||
if read_value:
|
||||
return create_autocast_variable(self._variable, op=assign_op)
|
||||
# We create a new AutoCastVariable with the same underlying tf.Variable.
|
||||
# The new AutoCastVariable is identical except the 'op' attribute is
|
||||
# defined. This matches the behavior of tf.Variable.assign.
|
||||
var = create_autocast_variable(self._variable)
|
||||
var._op = assign_op # pylint:disable=protected-access
|
||||
return var
|
||||
return assign_op
|
||||
|
||||
# Fallback to wrapping the returned variable in graph mode if possible
|
||||
@ -298,9 +309,9 @@ class AutoCastVariable(variables.Variable, core.Tensor):
|
||||
|
||||
@property
|
||||
def op(self):
|
||||
if self._op is not None:
|
||||
return self._op
|
||||
return self._variable.op
|
||||
if self._op == 'delegate':
|
||||
return self._variable.op
|
||||
return self._op
|
||||
|
||||
def _as_graph_element(self):
|
||||
graph_element = self._variable._as_graph_element() # pylint:disable=protected-access
|
||||
@ -469,7 +480,7 @@ ops.register_tensor_conversion_function(AutoCastVariable,
|
||||
AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access
|
||||
|
||||
|
||||
def create_autocast_variable(variable, op=None):
|
||||
def create_autocast_variable(variable):
|
||||
"""Creates an AutoCastVariable that wraps another variable.
|
||||
|
||||
This typically just returns `AutoCastVariable(variable)`. But, if the variable
|
||||
@ -481,14 +492,13 @@ def create_autocast_variable(variable, op=None):
|
||||
|
||||
Args:
|
||||
variable: A floating-point resource variable to wrap.
|
||||
op: Optional operation of this variable.
|
||||
|
||||
Returns:
|
||||
An AutoCastVariable that wraps the variable.
|
||||
"""
|
||||
if not isinstance(variable, (distribute_values.DistributedVariable,
|
||||
ps_distribute_values.AggregatingVariable)):
|
||||
return AutoCastVariable(variable, op=op)
|
||||
return AutoCastVariable(variable)
|
||||
|
||||
class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
|
||||
"""An AutoCastVariable that also subclasses from variable.__class__.
|
||||
@ -511,7 +521,7 @@ def create_autocast_variable(variable, op=None):
|
||||
).format(v=self)
|
||||
# pylint: enable=missing-format-attribute
|
||||
|
||||
return AutoCastDistributedVariable(variable, op=op)
|
||||
return AutoCastDistributedVariable(variable)
|
||||
|
||||
|
||||
class enable_auto_cast_variables(object): # pylint:disable=invalid-name
|
||||
|
@ -37,7 +37,14 @@ from tensorflow.python.framework import indexed_slices
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import test_combinations as combinations
|
||||
from tensorflow.python.keras.mixed_precision import autocast_variable
|
||||
from tensorflow.python.keras.optimizer_v2 import adadelta
|
||||
from tensorflow.python.keras.optimizer_v2 import adagrad
|
||||
from tensorflow.python.keras.optimizer_v2 import adam
|
||||
from tensorflow.python.keras.optimizer_v2 import adamax
|
||||
from tensorflow.python.keras.optimizer_v2 import ftrl
|
||||
from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_v2
|
||||
from tensorflow.python.keras.optimizer_v2 import nadam
|
||||
from tensorflow.python.keras.optimizer_v2 import rmsprop
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import state_ops
|
||||
from tensorflow.python.ops import variables
|
||||
@ -352,11 +359,28 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
|
||||
self.assertAllClose(5., self.evaluate(run_assign()))
|
||||
|
||||
@ds_combinations.generate(maybe_distribute)
|
||||
def test_assign_op(self, distribution):
|
||||
def test_op_attribute(self, distribution):
|
||||
with distribution.scope():
|
||||
x = get_var(0., dtypes.float32)
|
||||
x = autocast_variable.create_autocast_variable(x)
|
||||
|
||||
# Variable.op raises an AttributeError in Eager mode and is an op in graph
|
||||
# mode. Variable.assign(...).op is None in Eager mode and an op in Graph
|
||||
# mode or a tf.function. We test this is also true of AutoCastVariable.
|
||||
if context.executing_eagerly():
|
||||
with self.assertRaisesRegex(
|
||||
AttributeError,
|
||||
'Tensor.op is meaningless when eager execution is enabled'):
|
||||
x.op # pylint: disable=pointless-statement
|
||||
self.assertIsNone(x.assign(1.0).op)
|
||||
self.assertIsNone(x.assign_add(1.0).op)
|
||||
self.assertIsNone(x.assign_sub(1.0).op)
|
||||
else:
|
||||
self.assertIsNotNone(x.op)
|
||||
self.assertIsNotNone(x.assign(1.0).op)
|
||||
self.assertIsNotNone(x.assign_add(1.0).op)
|
||||
self.assertIsNotNone(x.assign_sub(1.0).op)
|
||||
|
||||
@def_function.function
|
||||
def func():
|
||||
self.assertIsNotNone(x.assign(1.0).op)
|
||||
@ -503,25 +527,51 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
|
||||
'dtype_to_cast_to=float32 '
|
||||
'inner_variable=MirroredVariable.*>')
|
||||
|
||||
@parameterized.named_parameters(
|
||||
('v1', gradient_descent_v1.GradientDescentOptimizer),
|
||||
('v2', gradient_descent_v2.SGD))
|
||||
def test_optimizer(self, optimizer_class):
|
||||
@ds_combinations.generate(combinations.combine(
|
||||
optimizer_class=[
|
||||
adadelta.Adadelta,
|
||||
adagrad.Adagrad,
|
||||
adam.Adam,
|
||||
adamax.Adamax,
|
||||
ftrl.Ftrl,
|
||||
gradient_descent_v2.SGD,
|
||||
nadam.Nadam,
|
||||
rmsprop.RMSprop,
|
||||
gradient_descent_v1.GradientDescentOptimizer
|
||||
],
|
||||
use_tf_function=[False, True]))
|
||||
def test_optimizer(self, optimizer_class, use_tf_function):
|
||||
if use_tf_function and not context.executing_eagerly():
|
||||
self.skipTest('Test does not support graph mode with tf.function')
|
||||
x = get_var(1., dtypes.float32)
|
||||
x = autocast_variable.create_autocast_variable(x)
|
||||
opt = optimizer_class(1.)
|
||||
y = get_var(1., dtypes.float32)
|
||||
opt = optimizer_class(learning_rate=1.)
|
||||
|
||||
@def_function.function
|
||||
def f():
|
||||
opt.minimize(lambda: x + 1., var_list=[x])
|
||||
# Minimize both the AutoCastVariable and the normal tf.Variable. Both
|
||||
# variables should be updated to the same value.
|
||||
op = opt.minimize(lambda: x + y, var_list=[x, y])
|
||||
return None if ops.executing_eagerly_outside_functions() else op
|
||||
|
||||
if use_tf_function:
|
||||
f = def_function.function(f)
|
||||
|
||||
if context.executing_eagerly():
|
||||
f()
|
||||
else:
|
||||
op = f() # pylint: disable=assignment-from-no-return
|
||||
op = f()
|
||||
self.evaluate(variables.global_variables_initializer())
|
||||
self.evaluate(op)
|
||||
self.assertEqual(self.evaluate(x), 0)
|
||||
# Assert the AutoCastVariable has changed from its initial value
|
||||
self.assertNotEqual(self.evaluate(x), 1.)
|
||||
# Assert AutoCastVariable is updated correctly by comparing it to the normal
|
||||
# variable
|
||||
self.assertAlmostEqual(self.evaluate(x), self.evaluate(y))
|
||||
if optimizer_class in (gradient_descent_v2.SGD,
|
||||
gradient_descent_v1.GradientDescentOptimizer):
|
||||
# With SGD, the variables decreases by exactly 1
|
||||
self.assertEqual(self.evaluate(x), 0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
Reference in New Issue
Block a user