diff --git a/tensorflow/python/keras/engine/training_gpu_test.py b/tensorflow/python/keras/engine/training_gpu_test.py
index 996e281bf0c..0498a03a1ed 100644
--- a/tensorflow/python/keras/engine/training_gpu_test.py
+++ b/tensorflow/python/keras/engine/training_gpu_test.py
@@ -20,10 +20,9 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 import numpy as np
-
-from tensorflow.python.framework import test_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import combinations
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import input_layer
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.layers.convolutional import Conv2D
@@ -71,7 +70,7 @@ class TrainingGPUTest(test.TestCase, parameterized.TestCase):
       return simple_model
 
     if test.is_gpu_available(cuda_only=True):
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         losses_to_test = ['sparse_categorical_crossentropy',
                           'categorical_crossentropy', 'binary_crossentropy']
 
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index 18e37a9d6a4..3ab3acd0ff9 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -435,7 +435,7 @@ class GroupedConvTest(keras_parameterized.TestCase):
   )
   def disable_test_group_conv(self, layer_cls, input_shape):
     if test.is_gpu_available(cuda_only=True):
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         inputs = random_ops.random_uniform(shape=input_shape)
 
         layer = layer_cls(16, 3, groups=4, use_bias=False)
@@ -453,7 +453,7 @@ class GroupedConvTest(keras_parameterized.TestCase):
 
   def test_group_conv_depthwise(self):
     if test.is_gpu_available(cuda_only=True):
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         inputs = random_ops.random_uniform(shape=(3, 27, 27, 32))
 
         layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False)
@@ -474,7 +474,7 @@ class Conv1DTransposeTest(keras_parameterized.TestCase):
     stack_size = 3
     num_col = 6
 
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           keras.layers.Conv1DTranspose,
           kwargs=kwargs,
@@ -509,7 +509,7 @@ class Conv3DTransposeTest(keras_parameterized.TestCase):
     num_col = 6
     depth = 5
 
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           keras.layers.Conv3DTranspose,
           kwargs=kwargs,
diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD
index adf10787f1e..1fa6deb8cd9 100644
--- a/tensorflow/python/keras/layers/preprocessing/BUILD
+++ b/tensorflow/python/keras/layers/preprocessing/BUILD
@@ -473,6 +473,7 @@ cuda_py_test(
     deps = [
         ":image_preprocessing",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/keras:testing_utils",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
index a039ec644e3..b51e948baea 100644
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
@@ -47,7 +47,7 @@ class ResizingTest(keras_parameterized.TestCase):
     orig_width = 8
     channels = 3
     kwargs.update({'height': expected_height, 'width': expected_width})
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           image_preprocessing.Resizing,
           kwargs=kwargs,
@@ -79,7 +79,7 @@ class ResizingTest(keras_parameterized.TestCase):
 
   def test_down_sampling_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype)
         layer = image_preprocessing.Resizing(
             height=2, width=2, interpolation='nearest')
@@ -95,7 +95,7 @@ class ResizingTest(keras_parameterized.TestCase):
 
   def test_up_sampling_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype)
         layer = image_preprocessing.Resizing(
             height=4, width=4, interpolation='nearest')
@@ -152,7 +152,7 @@ class CenterCropTest(keras_parameterized.TestCase):
         (num_samples, orig_height, orig_width, channels)).astype(np.float32)
     expected_output = get_numpy_center_crop(
         input_images, expected_height, expected_width)
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           image_preprocessing.CenterCrop,
           kwargs=kwargs,
@@ -209,7 +209,7 @@ class RandomCropTest(keras_parameterized.TestCase):
     orig_width = 8
     channels = 3
     kwargs = {'height': expected_height, 'width': expected_width}
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           image_preprocessing.RandomCrop,
           kwargs=kwargs,
@@ -240,7 +240,7 @@ class RandomCropTest(keras_parameterized.TestCase):
     with test.mock.patch.object(
         stateless_random_ops, 'stateless_random_uniform',
         return_value=mock_offset):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomCrop(height, width)
         inp = np.random.random((12, 5, 8, 3))
         actual_output = layer(inp, training=1)
@@ -270,7 +270,7 @@ class RandomCropTest(keras_parameterized.TestCase):
     np.random.seed(1337)
     height, width = 8, 16
     inp = np.random.random((12, 8, 16, 3))
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       layer = image_preprocessing.RandomCrop(height, width)
       actual_output = layer(inp, training=0)
       self.assertAllClose(inp, actual_output)
@@ -279,7 +279,7 @@ class RandomCropTest(keras_parameterized.TestCase):
     np.random.seed(1337)
     height, width = 3, 3
     inp = np.random.random((12, 10, 6, 3))
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       layer = image_preprocessing.RandomCrop(height, width)
       actual_output = layer(inp, training=0)
       resized_inp = image_ops.resize_images_v2(
@@ -291,7 +291,7 @@ class RandomCropTest(keras_parameterized.TestCase):
     np.random.seed(1337)
     height, width = 4, 6
     inp = np.random.random((12, 8, 16, 3))
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       layer = image_preprocessing.RandomCrop(height, width)
       actual_output = layer(inp, training=0)
       resized_inp = image_ops.resize_images_v2(inp, size=[4, 8])
@@ -359,7 +359,7 @@ class RandomFlipTest(keras_parameterized.TestCase):
         expected_output = np.flip(expected_output, axis=1)
     with test.mock.patch.object(
         random_ops, 'random_uniform', return_value=mock_random):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomFlip(mode)
         actual_output = layer(inp, training=1)
         self.assertAllClose(expected_output, actual_output)
@@ -396,7 +396,7 @@ class RandomFlipTest(keras_parameterized.TestCase):
     with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomFlip()
         actual_output = layer(input_images, training=0)
         self.assertAllClose(expected_output, actual_output)
@@ -446,7 +446,7 @@ class RandomContrastTest(keras_parameterized.TestCase):
       expected_output = (inp - inp_mean) * mock_random + inp_mean
     with test.mock.patch.object(
         random_ops, 'random_uniform', return_value=mock_random):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomContrast((lower, upper))
         actual_output = layer(inp, training=True)
         self.assertAllClose(expected_output, actual_output)
@@ -467,7 +467,7 @@ class RandomContrastTest(keras_parameterized.TestCase):
     with CustomObjectScope(
         {'RandomContrast': image_preprocessing.RandomContrast}):
       input_images = np.random.random((2, 5, 8, 3))
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomContrast(amplitude)
         layer(input_images)
 
@@ -476,7 +476,7 @@ class RandomContrastTest(keras_parameterized.TestCase):
         {'RandomContrast': image_preprocessing.RandomContrast}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomContrast((0.1, 0.2))
         actual_output = layer(input_images, training=False)
         self.assertAllClose(expected_output, actual_output)
@@ -485,7 +485,7 @@ class RandomContrastTest(keras_parameterized.TestCase):
     with CustomObjectScope(
         {'RandomContrast': image_preprocessing.RandomContrast}):
       input_images = np.random.randint(low=0, high=255, size=(2, 5, 8, 3))
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomContrast((0.1, 0.2))
         layer(input_images)
 
@@ -517,7 +517,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
     orig_width = 8
     channels = 3
     kwargs = {'height_factor': height_factor, 'width_factor': width_factor}
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           image_preprocessing.RandomTranslation,
           kwargs=kwargs,
@@ -532,7 +532,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_up_numeric_reflect(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
         # Shifting by -.2 * 5 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -552,7 +552,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_up_numeric_constant(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
         # Shifting by -.2 * 5 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -572,7 +572,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_down_numeric_reflect(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
         # Shifting by .2 * 5 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -592,7 +592,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_asymmetric_size_numeric_reflect(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype)
         # Shifting by .5 * 8 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -615,7 +615,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_down_numeric_constant(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
         # Shifting by -.2 * 5 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -635,7 +635,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_left_numeric_reflect(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
         # Shifting by .2 * 5 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -655,7 +655,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
 
   def test_random_translation_left_numeric_constant(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
         # Shifting by -.2 * 5 = 1 pixel.
         layer = image_preprocessing.RandomTranslation(
@@ -678,7 +678,7 @@ class RandomTranslationTest(keras_parameterized.TestCase):
         {'RandomTranslation': image_preprocessing.RandomTranslation}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomTranslation(.5, .5)
         actual_output = layer(input_images, training=0)
         self.assertAllClose(expected_output, actual_output)
@@ -996,7 +996,7 @@ class RandomRotationTest(keras_parameterized.TestCase):
     orig_width = 8
     channels = 3
     kwargs = {'factor': factor}
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           image_preprocessing.RandomRotation,
           kwargs=kwargs,
@@ -1014,7 +1014,7 @@ class RandomRotationTest(keras_parameterized.TestCase):
         {'RandomTranslation': image_preprocessing.RandomRotation}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomRotation(.5)
         actual_output = layer(input_images, training=0)
         self.assertAllClose(expected_output, actual_output)
@@ -1025,7 +1025,7 @@ class RandomRotationTest(keras_parameterized.TestCase):
     And that replicas got the same random result.
     """
     input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       strat = MirroredStrategy(devices=['cpu', 'gpu'])
       with strat.scope():
         layer = image_preprocessing.RandomRotation(.5)
@@ -1052,7 +1052,7 @@ class RandomZoomTest(keras_parameterized.TestCase):
     orig_width = 8
     channels = 3
     kwargs = {'height_factor': height_factor, 'width_factor': width_factor}
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       testing_utils.layer_test(
           image_preprocessing.RandomZoom,
           kwargs=kwargs,
@@ -1073,7 +1073,7 @@ class RandomZoomTest(keras_parameterized.TestCase):
 
   def test_random_zoom_in_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
         layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5),
                                                interpolation='nearest')
@@ -1092,7 +1092,7 @@ class RandomZoomTest(keras_parameterized.TestCase):
 
   def test_random_zoom_out_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
         layer = image_preprocessing.RandomZoom((.5, .5), (.8, .8),
                                                fill_mode='constant',
@@ -1112,7 +1112,7 @@ class RandomZoomTest(keras_parameterized.TestCase):
 
   def test_random_zoom_out_numeric_preserve_aspect_ratio(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
         layer = image_preprocessing.RandomZoom((.5, .5),
                                                fill_mode='constant',
@@ -1135,7 +1135,7 @@ class RandomZoomTest(keras_parameterized.TestCase):
         {'RandomZoom': image_preprocessing.RandomZoom}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomZoom(.5, .5)
         actual_output = layer(input_images, training=0)
         self.assertAllClose(expected_output, actual_output)
@@ -1157,7 +1157,7 @@ class RandomHeightTest(keras_parameterized.TestCase):
     orig_height = 5
     orig_width = 8
     channels = 3
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       img = np.random.random((num_samples, orig_height, orig_width, channels))
       layer = image_preprocessing.RandomHeight(factor)
       img_out = layer(img, training=True)
@@ -1176,7 +1176,7 @@ class RandomHeightTest(keras_parameterized.TestCase):
     mock_factor = 0
     with test.mock.patch.object(
         gen_stateful_random_ops, 'stateful_uniform', return_value=mock_factor):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         img = np.random.random((12, 5, 8, 3))
         layer = image_preprocessing.RandomHeight(.4)
         img_out = layer(img, training=True)
@@ -1184,7 +1184,7 @@ class RandomHeightTest(keras_parameterized.TestCase):
 
   def test_random_height_longer_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype(dtype)
         layer = image_preprocessing.RandomHeight(factor=(1., 1.))
         # Return type of RandomHeight() is float32 if `interpolation` is not
@@ -1204,7 +1204,7 @@ class RandomHeightTest(keras_parameterized.TestCase):
 
   def test_random_height_shorter_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype(dtype)
         layer = image_preprocessing.RandomHeight(
             factor=(-.5, -.5), interpolation='nearest')
@@ -1226,7 +1226,7 @@ class RandomHeightTest(keras_parameterized.TestCase):
     with CustomObjectScope({'RandomHeight': image_preprocessing.RandomHeight}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomHeight(.5)
         actual_output = layer(input_images, training=0)
         self.assertAllClose(expected_output, actual_output)
@@ -1248,7 +1248,7 @@ class RandomWidthTest(keras_parameterized.TestCase):
     orig_height = 5
     orig_width = 8
     channels = 3
-    with tf_test_util.use_gpu():
+    with testing_utils.use_gpu():
       img = np.random.random((num_samples, orig_height, orig_width, channels))
       layer = image_preprocessing.RandomWidth(factor)
       img_out = layer(img, training=True)
@@ -1267,7 +1267,7 @@ class RandomWidthTest(keras_parameterized.TestCase):
     mock_factor = 0
     with test.mock.patch.object(
         gen_stateful_random_ops, 'stateful_uniform', return_value=mock_factor):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         img = np.random.random((12, 8, 5, 3))
         layer = image_preprocessing.RandomWidth(.4)
         img_out = layer(img, training=True)
@@ -1275,7 +1275,7 @@ class RandomWidthTest(keras_parameterized.TestCase):
 
   def test_random_width_longer_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype(dtype)
         layer = image_preprocessing.RandomWidth(factor=(1., 1.))
         # Return type of RandomWidth() is float32 if `interpolation` is not
@@ -1294,7 +1294,7 @@ class RandomWidthTest(keras_parameterized.TestCase):
 
   def test_random_width_shorter_numeric(self):
     for dtype in (np.int64, np.float32):
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype(dtype)
         layer = image_preprocessing.RandomWidth(
             factor=(-.5, -.5), interpolation='nearest')
@@ -1316,7 +1316,7 @@ class RandomWidthTest(keras_parameterized.TestCase):
     with CustomObjectScope({'RandomWidth': image_preprocessing.RandomWidth}):
       input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
       expected_output = input_images
-      with tf_test_util.use_gpu():
+      with testing_utils.use_gpu():
         layer = image_preprocessing.RandomWidth(.5)
         actual_output = layer(input_images, training=0)
         self.assertAllClose(expected_output, actual_output)
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
index 1e242256c7b..e994a6e1e44 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -73,7 +73,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testBasic(self):
     for dtype in _DATA_TYPES:
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
@@ -138,7 +138,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testPrecomputedGradient(self):
     for dtype in _DATA_TYPES:
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
@@ -162,7 +162,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testNoGradients(self):
     for dtype in _DATA_TYPES:
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0  # pylint: disable=cell-var-from-loop
@@ -174,7 +174,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testNoGradientsForAnyVariables_Minimize(self):
     for dtype in _DATA_TYPES:
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         loss = lambda: constant_op.constant(5.0)
@@ -187,7 +187,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testNoGradientsForAnyVariables_ApplyGradients(self):
     for dtype in _DATA_TYPES:
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         sgd_op = gradient_descent.SGD(3.0)
@@ -198,7 +198,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testGradientsAsVariables(self):
     for i, dtype in enumerate(_DATA_TYPES):
-      with test_util.use_gpu():
+      with testing_utils.use_gpu():
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
@@ -236,7 +236,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testComputeGradientsWithTensors(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       x = ops.convert_to_tensor_v2(1.0)
 
       def f():
@@ -256,7 +256,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
   def testConstraint(self):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       var0 = variables.Variable([1.0, 2.0],
                                 constraint=constraint_01)
       var1 = variables.Variable([3.0, 4.0],
@@ -278,14 +278,14 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testIterationWithoutMinimize(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       sgd = gradient_descent.SGD(3.0)
       self.evaluate(sgd.iterations.initializer)
       self.assertEqual(0, self.evaluate(sgd.iterations))
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testConfig(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       opt = gradient_descent.SGD(learning_rate=1.0)
       config = opt.get_config()
       opt2 = gradient_descent.SGD.from_config(config)
@@ -305,7 +305,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testConfigWithLearningRateDecay(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       var0 = variables.Variable([[1.0], [2.0]], dtype=dtypes.float32)
       for decay_schedule in [
           learning_rate_schedule.InverseTimeDecay(
@@ -336,7 +336,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testGradClipValue(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       var = variables.Variable([1.0, 2.0])
       loss = lambda: 3 * var
       opt = gradient_descent.SGD(learning_rate=1.0, clipvalue=1.0)
@@ -347,7 +347,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testGradClipNorm(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       var = variables.Variable([1.0])
       loss = lambda: 3 * var
       opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0)
@@ -368,7 +368,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testWeights(self):
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       opt1 = adam.Adam(learning_rate=1.0)
       var1 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
       loss1 = lambda: 3 * var1
@@ -671,7 +671,7 @@ class OptimizersCompatibilityTest(keras_parameterized.TestCase):
       self.skipTest(
           'v1 optimizer does not run in eager mode')
     np.random.seed(1331)
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       train_samples = 20
       input_dim = 3
       num_classes = 2
@@ -757,7 +757,7 @@ class OptimizersCompatibilityTest(keras_parameterized.TestCase):
       self.skipTest(
           'v1 optimizer does not run in eager mode')
     np.random.seed(1331)
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       train_samples = 20
       input_dim = 3
       num_classes = 2
@@ -814,7 +814,7 @@ class OptimizersCompatibilityTest(keras_parameterized.TestCase):
       self.skipTest(
           'v1 optimizer does not run in eager mode')
     np.random.seed(1331)
-    with test_util.use_gpu():
+    with testing_utils.use_gpu():
       train_samples = 20
       input_dim = 3
       num_classes = 2
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
index 5fd91588227..35f795edb53 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import combinations
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule
 from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.ops import embedding_ops
@@ -104,7 +105,7 @@ class RMSpropOptimizerTest(test.TestCase):
   def testDense(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS:
-      with ops.get_default_graph().as_default(), test_util.use_gpu():
+      with ops.get_default_graph().as_default(), testing_utils.use_gpu():
         # Initialize variables for numpy implementation.
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
         grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
@@ -379,7 +380,7 @@ class RMSpropOptimizerTest(test.TestCase):
   def testSparse(self):
     # TODO(tanzheny, omalleyt): Fix test in eager mode.
     for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS:
-      with ops.get_default_graph().as_default(), test_util.use_gpu():
+      with ops.get_default_graph().as_default(), testing_utils.use_gpu():
         # Initialize variables for numpy implementation.
         var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
         grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index cceaabe37a5..550ff664823 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
 import functools
 import threading
 
@@ -26,6 +27,7 @@ import numpy as np
 from tensorflow.python import tf2
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
@@ -917,3 +919,21 @@ def _set_v2_dtype_behavior(fn, enabled):
       base_layer_utils.V2_DTYPE_BEHAVIOR = v2_dtype_behavior
 
   return tf_decorator.make_decorator(fn, wrapper)
+
+
+@contextlib.contextmanager
+def device(should_use_gpu):
+  """Uses gpu when requested and available."""
+  if should_use_gpu and test_util.is_gpu_available():
+    dev = '/device:GPU:0'
+  else:
+    dev = '/device:CPU:0'
+  with ops.device(dev):
+    yield
+
+
+@contextlib.contextmanager
+def use_gpu():
+  """Uses gpu when requested and available."""
+  with device(should_use_gpu=True):
+    yield