diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py index 202c1e9afc0..ab47b74c65a 100644 --- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py +++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py @@ -25,10 +25,12 @@ from absl.testing import parameterized import numpy as np from tensorflow.contrib.optimizer_v2 import rmsprop +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -448,5 +450,56 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): ]), var1.eval()) +class SlotColocationTest(test.TestCase, parameterized.TestCase): + + @parameterized.parameters([True, False]) + @test_util.run_in_graph_and_eager_modes + def testRunMinimizeOnGPUForCPUVariables(self, use_resource): + if not context.context().num_gpus(): + self.skipTest("No GPUs found") + + with ops.device("/device:CPU:0"): + if use_resource: + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], + dtype=dtypes.float32) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], + dtype=dtypes.float32) + global_step = resource_variable_ops.ResourceVariable( + array_ops.zeros([], dtypes.int64), name="global_step") + else: + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + global_step = variables.Variable( + array_ops.zeros([], dtypes.int64), name="global_step") + + def loss(): + return 5 * var0 + 3 * var1 + + opt = rmsprop.RMSPropOptimizer( + learning_rate=1.0, decay=0.9, momentum=0.5, epsilon=1.0) + + # Fetch params to validate initial values + self.evaluate(variables.global_variables_initializer()) + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 1 step through optimizer on GPU. + # Slot variables are created the first time optimizer is used on some + # variable. This tests that slot variables will be colocated with the base + # variable. + with ops.device("/device:GPU:0"): + # Note that for eager execution, minimize expects a function instead of a + # Tensor. + opt_op = opt.minimize(loss, global_step, [var0, var1]) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(opt_op) + + # Validate updated params, All variables should have decreased. + self.assertTrue(all(v < 0.0 for v in self.evaluate(var0)), + msg="updated variables: %s" % self.evaluate(var0)) + self.assertTrue(all(v < 2.0 for v in self.evaluate(var1)), + msg="updated variables: %s" % self.evaluate(var1)) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD index b8f01249419..7fa755147a0 100644 --- a/tensorflow/python/keras/optimizer_v2/BUILD +++ b/tensorflow/python/keras/optimizer_v2/BUILD @@ -201,6 +201,7 @@ cuda_py_test( srcs = ["rmsprop_test.py"], additional_deps = [ ":optimizer_v2", + "@absl_py//absl/testing:parameterized", "//tensorflow/python/eager:def_function", "//tensorflow/python:client_testlib", "//tensorflow/python:framework", diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index d0f16f0b4f3..7b784fd9af7 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -443,11 +443,12 @@ class OptimizerV2(checkpointable.CheckpointableBase): initializer, shape=var.shape, dtype=var.dtype) else: initial_value = initializer - weight = tf_variables.Variable( - name="%s/%s" % (var._shared_name, slot_name), # pylint: disable=protected-access - dtype=var.dtype, - trainable=False, - initial_value=initial_value) + with ops._colocate_with_for_gradient(var, None): # pylint: disable=protected-access + weight = tf_variables.Variable( + name="%s/%s" % (var._shared_name, slot_name), # pylint: disable=protected-access + dtype=var.dtype, + trainable=False, + initial_value=initial_value) backend.track_variable(weight) slot_dict[slot_name] = weight self._restore_slot_variable( diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py index 4d61cfbbc52..1b53f7e579c 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py @@ -22,6 +22,7 @@ import copy import itertools import math +from absl.testing import parameterized import numpy as np from tensorflow.python.eager import context @@ -421,5 +422,52 @@ class RMSpropOptimizerTest(test.TestCase): self.assertEqual(opt_3.lr, 0.1) +class SlotColocationTest(test.TestCase, parameterized.TestCase): + + @parameterized.parameters([True, False]) + @test_util.run_in_graph_and_eager_modes + def testRunMinimizeOnGPUForCPUVariables(self, use_resource): + if not context.context().num_gpus(): + self.skipTest("No GPUs found") + + with ops.device("/device:CPU:0"): + if use_resource: + var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], + dtype=dtypes.float32) + var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], + dtype=dtypes.float32) + else: + var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) + var1 = variables.Variable([3.0, 4.0], dtype=dtypes.float32) + + def loss(): + return 5 * var0 + 3 * var1 + + opt = rmsprop.RMSprop( + learning_rate=1.0, decay=0.9, momentum=0.5, epsilon=1.0) + + # Fetch params to validate initial values + self.evaluate(variables.global_variables_initializer()) + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 1 step through optimizer on GPU. + # Slot variables are created the first time optimizer is used on some + # variable. This tests that slot variables will be colocated with the base + # variable. + with ops.device("/device:GPU:0"): + # Note that for eager execution, minimize expects a function instead of a + # Tensor. + opt_op = opt.minimize(loss, [var0, var1]) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(opt_op) + + # Validate updated params, All variables should have decreased. + self.assertTrue(all(v < 0.0 for v in self.evaluate(var0)), + msg="updated variables: %s" % self.evaluate(var0)) + self.assertTrue(all(v < 2.0 for v in self.evaluate(var1)), + msg="updated variables: %s" % self.evaluate(var1)) + + if __name__ == "__main__": test.main()