diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 94d52a8ab06..d932899ab0d 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5220,7 +5220,6 @@ py_library( "//tensorflow/python/distribute:distribute_coordinator_context", "//tensorflow/python/distribute:distribute_lib", "//tensorflow/python/distribute:reduce_util", - "//tensorflow/python/distribute:values", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/keras/optimizer_v2:learning_rate_schedule", @@ -6591,9 +6590,6 @@ cuda_py_tests( ":variable_scope", ":variables", "//tensorflow/core:protos_all_py", - "//tensorflow/python/distribute:cross_device_ops", - "//tensorflow/python/distribute:mirrored_strategy", - "//tensorflow/python/distribute:values", "//third_party/py/numpy", "@six_archive//:six", ], diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index f89dc362cf8..f1a31d01dd4 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -27,7 +27,6 @@ import six from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx from tensorflow.python.distribute import reduce_util as ds_reduce_util -from tensorflow.python.distribute import values as ds_values from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.framework import dtypes @@ -82,17 +81,10 @@ def _deduplicate_indexed_slices(values, indices): def _var_key(var): - """Returns slot key for `var`.""" - # pylint: disable=protected-access - if hasattr(var, "_distributed_container"): - var = var._distributed_container() - if ops.executing_eagerly_outside_functions(): - return var._unique_id - if ds_values.is_distributed_variable(var): - return (var.graph, var._shared_name) - else: + # TODO(ashankar): Consolidate handling for eager and graph + if hasattr(var, "op"): return (var.op.graph, var.op.name) - # pylint: enable=protected-access + return var._unique_id # pylint: disable=protected-access @six.add_metaclass(abc.ABCMeta) @@ -759,16 +751,26 @@ class Optimizer( Returns: The `Variable` for the slot if it was created, `None` otherwise. """ + # pylint: disable=protected-access named_slots = self._slots.get(name, None) if not named_slots: return None - slot = named_slots.get(_var_key(var), None) - if (ds_values.is_distributed_variable(slot) and - not ds_values.is_distributed_variable(var)): - # Make sure var and slot are either both DistributedVariable, or both - # per replica variables. - slot = slot._get_closest() # pylint: disable=protected-access - return slot + + if hasattr(var, "_distributed_container"): + # NOTE: If this isn't patched, then there is no `handle` in + # `_resource_apply_dense`. + distributed_container = var._distributed_container() + assert distributed_container is not None + if ops.executing_eagerly_outside_functions(): + key = distributed_container._unique_id + else: + key = (distributed_container.graph, distributed_container._shared_name) + # pylint: enable=protected-access + mirrored_slot = named_slots.get(key, None) + if mirrored_slot is None: return None + return mirrored_slot._get_closest() # pylint: disable=protected-access + + return named_slots.get(_var_key(var), None) def get_slot_names(self): """Return a list of the names of slots created by the `Optimizer`. diff --git a/tensorflow/python/training/optimizer_test.py b/tensorflow/python/training/optimizer_test.py index 30fa5cd0388..5775d0b8091 100644 --- a/tensorflow/python/training/optimizer_test.py +++ b/tensorflow/python/training/optimizer_test.py @@ -18,9 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.distribute import cross_device_ops -from tensorflow.python.distribute import mirrored_strategy -from tensorflow.python.distribute import values as ds_values from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -32,7 +29,6 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test -from tensorflow.python.training import adam from tensorflow.python.training import gradient_descent @@ -273,28 +269,6 @@ class OptimizerTest(test.TestCase): self.assertAllClose([-0.1, -0.1], self.evaluate(var0)) self.assertAllClose([0., 0.], self.evaluate(var1)) - @test_util.run_deprecated_v1 - def testGetSlotUnderDistributedStrategy(self): - # Only run this test in graph mode so we don't need actual GPU. - ds = mirrored_strategy.MirroredStrategy( - ['CPU:0', 'GPU:0'], - cross_device_ops=cross_device_ops.HierarchicalCopyAllReduce()) - # We need an optimizer that creates slots. - optimizer = adam.AdamOptimizer() - - def f(): - v = variables.Variable([1.0]) - self.assertTrue(ds_values.is_distributed_variable(v)) - # Slot variables are created in the first call to apply_gradients. - optimizer.apply_gradients([(ops.convert_to_tensor([1.0]), v)]) - self.assertTrue(optimizer.get_slot_names()) - for name in optimizer.get_slot_names(): - slot = optimizer.get_slot(v, name) - self.assertIsNotNone(slot) - self.assertTrue(ds_values.is_distributed_variable(slot)) - - ds.experimental_run_v2(f) - if __name__ == '__main__': test.main()