diff --git a/tensorflow/compiler/tests/categorical_op_test.py b/tensorflow/compiler/tests/categorical_op_test.py
index f4918e50dc8..5d5e486f616 100644
--- a/tensorflow/compiler/tests/categorical_op_test.py
+++ b/tensorflow/compiler/tests/categorical_op_test.py
@@ -57,11 +57,11 @@ class CategoricalTest(xla_test.XLATestCase):
     Returns:
       Frequencies from sampled classes; shape [batch_size, num_classes].
     """
-    with self.cached_session() as sess, self.test_scope():
+    with self.cached_session(), self.test_scope():
       random_seed.set_random_seed(1618)
       op = random_ops.multinomial(logits, num_samples,
                                   output_dtype=dtypes.int32)
-      d = sess.run(op)
+      d = self.evaluate(op)
 
     batch_size, num_classes = logits.shape
     freqs_mat = []
@@ -80,15 +80,15 @@ class CategoricalTest(xla_test.XLATestCase):
 
   def _testRngIsNotConstant(self, rng, dtype, output_dtype):
     # Tests that 'rng' does not always return the same value.
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         x = rng(dtype, output_dtype)
 
       # The random-number generator, if working correctly, should produce the
       # same output multiple times with low probability.
-      y = sess.run(x)
-      z = sess.run(x)
-      w = sess.run(x)
+      y = self.evaluate(x)
+      z = self.evaluate(x)
+      w = self.evaluate(x)
 
       # We use exact equality here. If the random-number generator is producing
       # deterministic output, all three outputs will be bitwise identical.
@@ -108,12 +108,12 @@ class CategoricalTest(xla_test.XLATestCase):
   def testCategoricalIsInRange(self):
     for dtype in self.float_types:
       for output_dtype in self.output_dtypes():
-        with self.cached_session() as sess:
+        with self.cached_session():
           with self.test_scope():
             x = random_ops.multinomial(
                 array_ops.ones(shape=[1, 20], dtype=dtype), 1000,
                 output_dtype=output_dtype)
-          y = sess.run(x)
+          y = self.evaluate(x)
           self.assertTrue((y >= 0).sum() == 1000)
           self.assertTrue((y < 20).sum() == 1000)
 
@@ -170,11 +170,11 @@ class CategoricalTest(xla_test.XLATestCase):
             self.assertEqual(s0 == s1, np.all(v0 == v1))
 
   def testEmpty(self):
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         x = random_ops.multinomial(
             array_ops.zeros([42, 40]), 0, output_dtype=dtypes.int32)
-        y = sess.run(x)
+        y = self.evaluate(x)
         self.assertEqual(y.shape, (42, 0))
 
   def testEmptyStateless(self):
diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py
index 30fbe6f701f..2187f57960f 100644
--- a/tensorflow/compiler/tests/concat_ops_test.py
+++ b/tensorflow/compiler/tests/concat_ops_test.py
@@ -254,7 +254,7 @@ class ConcatTest(xla_test.XLATestCase):
   def DISABLED_testZeroSize(self):
     # Verify that concat doesn't crash and burn for zero size inputs
     np.random.seed(7)
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         for shape0 in (), (2,):
           axis = len(shape0)
@@ -270,7 +270,7 @@ class ConcatTest(xla_test.XLATestCase):
                 self.assertAllEqual(c.eval(), correct)
                 # Check gradients
                 dc = np.random.randn(*c.get_shape().as_list())
-                dxs = sess.run(gradients_impl.gradients(c, xs, dc))
+                dxs = self.evaluate(gradients_impl.gradients(c, xs, dc))
                 self.assertAllEqual(dc, np.concatenate(dxs, axis=axis))
 
   def testConcatTuple(self):
@@ -330,47 +330,47 @@ class ConcatTest(xla_test.XLATestCase):
 class ConcatOffsetTest(xla_test.XLATestCase):
 
   def testBasic(self):
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         cdim = constant_op.constant(1, dtypes.int32)
         s0 = constant_op.constant([2, 3, 5], dtypes.int32)
         s1 = constant_op.constant([2, 7, 5], dtypes.int32)
         s2 = constant_op.constant([2, 20, 5], dtypes.int32)
         off = gen_array_ops.concat_offset(cdim, [s0, s1, s2])
-        ans = sess.run(off)
+        ans = self.evaluate(off)
         self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]])
 
 
 class PackTest(xla_test.XLATestCase):
 
   def testBasic(self):
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         s0 = constant_op.constant([2, 3, 5], dtypes.int32)
         s1 = constant_op.constant([2, 7, 5], dtypes.int32)
         s2 = constant_op.constant([2, 20, 5], dtypes.int32)
         packed = array_ops.stack([s0, s1, s2])
-        ans = sess.run(packed)
+        ans = self.evaluate(packed)
         self.assertAllEqual(ans, [[2, 3, 5], [2, 7, 5], [2, 20, 5]])
 
   def testScalars(self):
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         s0 = constant_op.constant(2, dtypes.int32)
         s1 = constant_op.constant(3, dtypes.int32)
         s2 = constant_op.constant(5, dtypes.int32)
         packed = array_ops.stack([s0, s1, s2])
-        ans = sess.run(packed)
+        ans = self.evaluate(packed)
         self.assertAllEqual(ans, [2, 3, 5])
 
   def testEmpty(self):
-    with self.cached_session() as sess:
+    with self.cached_session():
       with self.test_scope():
         s0 = constant_op.constant([[]], dtypes.int32)
         s1 = constant_op.constant([[]], dtypes.int32)
         s2 = constant_op.constant([[]], dtypes.int32)
         packed = array_ops.stack([s0, s1, s2])
-        ans = sess.run(packed)
+        ans = self.evaluate(packed)
         self.assertAllEqual(ans, [[[]], [[]], [[]]])
 
 
diff --git a/tensorflow/compiler/tests/dense_layer_test.py b/tensorflow/compiler/tests/dense_layer_test.py
index 23c94cf2451..bf5ea7b1fb6 100644
--- a/tensorflow/compiler/tests/dense_layer_test.py
+++ b/tensorflow/compiler/tests/dense_layer_test.py
@@ -72,7 +72,7 @@ class DenseLayerTest(test.TestCase):
       x = array_ops.placeholder(shape=[None, None, 3], dtype=np.float32)
       y = layers.dense(x, 3)
 
-      sess.run(variables.initialize_all_variables())
+      self.evaluate(variables.initialize_all_variables())
       run_metadata = config_pb2.RunMetadata()
       test_utils.RunWithWarmup(
           sess,
@@ -97,7 +97,7 @@ class DenseLayerTest(test.TestCase):
       with jit_scope():
         y = layers.dense(x, 3)
 
-      sess.run(variables.initialize_all_variables())
+      self.evaluate(variables.initialize_all_variables())
       run_metadata = config_pb2.RunMetadata()
       test_utils.RunWithWarmup(
           sess,
@@ -126,7 +126,7 @@ class DenseLayerTest(test.TestCase):
       with jit_scope():
         y = layers.dense(x, 3)
 
-      sess.run(variables.initialize_all_variables())
+      self.evaluate(variables.initialize_all_variables())
       run_metadata = config_pb2.RunMetadata()
       test_utils.RunWithWarmup(
           sess,
diff --git a/tensorflow/compiler/tests/eager_test.py b/tensorflow/compiler/tests/eager_test.py
index 63cee550fde..2af32b537ba 100644
--- a/tensorflow/compiler/tests/eager_test.py
+++ b/tensorflow/compiler/tests/eager_test.py
@@ -101,12 +101,12 @@ class EagerTest(xla_test.XLATestCase):
       self.assertAllEqual(15, product)
 
     # Run some ops graphly
-    with context.graph_mode(), self.cached_session() as sess:
+    with context.graph_mode(), self.cached_session():
       with self.test_scope():
         three = constant_op.constant(3)
         five = constant_op.constant(5)
         product = three * five
-        self.assertAllEqual(15, sess.run(product))
+        self.assertAllEqual(15, self.evaluate(product))
 
   def testDegenerateSlices(self):
     with self.test_scope():
diff --git a/tensorflow/compiler/tests/function_test.py b/tensorflow/compiler/tests/function_test.py
index b1891b918c6..a61827c2ae4 100644
--- a/tensorflow/compiler/tests/function_test.py
+++ b/tensorflow/compiler/tests/function_test.py
@@ -40,7 +40,7 @@ class FunctionTest(xla_test.XLATestCase):
     bval = np.array([5, 6, 7, 8]).reshape([2, 2]).astype(np.float32)
     expected = APlus2B(aval, bval)
 
-    with self.cached_session() as sess:
+    with self.cached_session():
 
       @function.Defun(dtypes.float32, dtypes.float32)
       def Foo(a, b):
@@ -50,7 +50,7 @@ class FunctionTest(xla_test.XLATestCase):
       b = constant_op.constant(bval, name="b")
       with self.test_scope():
         call_f = Foo(a, b)
-      result = sess.run(call_f)
+      result = self.evaluate(call_f)
     self.assertAllClose(result, expected, rtol=1e-3)
 
   def testNestedFunctions(self):
@@ -66,7 +66,7 @@ class FunctionTest(xla_test.XLATestCase):
     bval = np.array([4, 3, 2, 1]).reshape([2, 2]).astype(np.float32)
     expected = APlus2B(aval, bval)
 
-    with self.cached_session() as sess:
+    with self.cached_session():
 
       @function.Defun(dtypes.float32, dtypes.float32)
       def Foo(a, b):
@@ -76,7 +76,7 @@ class FunctionTest(xla_test.XLATestCase):
       b = constant_op.constant(bval, name="b")
       with self.test_scope():
         call_g = Foo(a, b)
-      result = sess.run(call_g)
+      result = self.evaluate(call_g)
     self.assertAllClose(result, expected, rtol=1e-3)
 
   def testFunctionMultipleRetvals(self):
@@ -90,7 +90,7 @@ class FunctionTest(xla_test.XLATestCase):
     bval = np.array([5, 6, 7, 8]).reshape([2, 2]).astype(np.float32)
     expected = Func(aval, bval)
 
-    with self.cached_session() as sess:
+    with self.cached_session():
 
       @function.Defun(dtypes.float32, dtypes.float32)
       def Foo(a, b):
@@ -100,7 +100,7 @@ class FunctionTest(xla_test.XLATestCase):
       b = constant_op.constant(bval, name="b")
       with self.test_scope():
         call_f = Foo(a, b)
-      result = sess.run(call_f)
+      result = self.evaluate(call_f)
     self.assertAllClose(result, expected, rtol=1e-3)
 
   def testCompileTimeConstantsInDefun(self):
diff --git a/tensorflow/compiler/tests/listdiff_op_test.py b/tensorflow/compiler/tests/listdiff_op_test.py
index 58622114e4f..0210201fa71 100644
--- a/tensorflow/compiler/tests/listdiff_op_test.py
+++ b/tensorflow/compiler/tests/listdiff_op_test.py
@@ -33,13 +33,13 @@ class ListDiffTest(xla_test.XLATestCase):
   def _testListDiff(self, x, y, out, idx):
     for dtype in [dtypes.int32, dtypes.int64]:
       for index_dtype in [dtypes.int32, dtypes.int64]:
-        with self.cached_session() as sess:
+        with self.cached_session():
           x_tensor = ops.convert_to_tensor(x, dtype=dtype)
           y_tensor = ops.convert_to_tensor(y, dtype=dtype)
           with self.test_scope():
             out_tensor, idx_tensor = array_ops.listdiff(
                 x_tensor, y_tensor, out_idx=index_dtype)
-            tf_out, tf_idx = sess.run([out_tensor, idx_tensor])
+            tf_out, tf_idx = self.evaluate([out_tensor, idx_tensor])
         self.assertAllEqual(out, tf_out)
         self.assertAllEqual(idx, tf_idx)
         self.assertEqual(1, out_tensor.get_shape().ndims)
diff --git a/tensorflow/compiler/tests/lstm_test.py b/tensorflow/compiler/tests/lstm_test.py
index 265c0b6d141..776ed899e68 100644
--- a/tensorflow/compiler/tests/lstm_test.py
+++ b/tensorflow/compiler/tests/lstm_test.py
@@ -88,8 +88,8 @@ class LSTMTest(test.TestCase):
                  (basename, m_prev_scalar, c_prev_scalar, pad_scalar))
 
       # Initialize variables and run the unrolled LSTM step.
-      sess.run(variables.global_variables_initializer())
-      return sess.run([m, c])
+      self.evaluate(variables.global_variables_initializer())
+      return self.evaluate([m, c])
 
   def testLSTMCell(self):
     # Run with all-0 weights, no padding.
@@ -173,8 +173,8 @@ class LSTMTest(test.TestCase):
                  (basename, m_init_scalar, c_init_scalar, pad_scalar))
 
       # Initialize variables and run the unrolled LSTM layer.
-      sess.run(variables.global_variables_initializer())
-      return sess.run(out_seq)
+      self.evaluate(variables.global_variables_initializer())
+      return self.evaluate(out_seq)
 
   def testLSTMLayer(self):
     # Run with all-0 weights, no padding.
diff --git a/tensorflow/compiler/tests/placeholder_test.py b/tensorflow/compiler/tests/placeholder_test.py
index 77bb839409f..9671ae0ae97 100644
--- a/tensorflow/compiler/tests/placeholder_test.py
+++ b/tensorflow/compiler/tests/placeholder_test.py
@@ -33,7 +33,7 @@ class PlaceholderTest(xla_test.XLATestCase):
       ph = array_ops.placeholder_with_default(v, shape=[])
       out = ph * 2
       sess.run(variables.variables_initializer([v]))
-      self.assertEqual(8.0, sess.run(out))
+      self.assertEqual(8.0, self.evaluate(out))
 
   def test_placeholder_with_default_fed(self):
     with self.cached_session() as sess, self.test_scope():
diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py
index 36ef6ed5fee..97ffad34c00 100644
--- a/tensorflow/compiler/tests/random_ops_test.py
+++ b/tensorflow/compiler/tests/random_ops_test.py
@@ -46,9 +46,9 @@ class RandomOpsTest(xla_test.XLATestCase):
 
       # The random-number generator, if working correctly, should produce the
       # same output multiple times with low probability.
-      y = sess.run(x)
-      z = sess.run(x)
-      w = sess.run(x)
+      y = self.evaluate(x)
+      z = self.evaluate(x)
+      w = self.evaluate(x)
 
       # We use exact equality here. If the random-number generator is producing
       # deterministic output, all three outputs will be bitwise identical.
@@ -83,7 +83,7 @@ class RandomOpsTest(xla_test.XLATestCase):
         with self.test_scope():
           x = random_ops.random_uniform(
               shape=[1000], dtype=dtype, minval=-2, maxval=33)
-        y = sess.run(x)
+        y = self.evaluate(x)
         self.assertTrue((y >= -2).sum() == 1000)
         self.assertTrue((y < 33).sum() == 1000)
 
@@ -102,7 +102,7 @@ class RandomOpsTest(xla_test.XLATestCase):
       with self.cached_session() as sess:
         with self.test_scope():
           x = random_ops.truncated_normal(shape=[count], dtype=dtype)
-        y = sess.run(x)
+        y = self.evaluate(x)
 
         def normal_cdf(x):
           return .5 * math.erfc(-x / math.sqrt(2))
@@ -111,7 +111,7 @@ class RandomOpsTest(xla_test.XLATestCase):
           return math.exp(-(x**2) / 2.) / math.sqrt(2 * math.pi)
 
         def probit(x, sess=sess):
-          return sess.run(special_math.ndtri(x))
+          return self.evaluate(special_math.ndtri(x))
 
         a = -2.
         b = 2.
@@ -148,7 +148,7 @@ class RandomOpsTest(xla_test.XLATestCase):
       with self.test_scope():
         x = math_ops.range(1 << 16)
         shuffle = random_ops.random_shuffle(x)
-      result = sess.run(shuffle)
+      result = self.evaluate(shuffle)
       expected = range(1 << 16)
       # Compare sets to avoid randomness behavior changes but make sure still
       # have all the values.
@@ -159,7 +159,7 @@ class RandomOpsTest(xla_test.XLATestCase):
       with self.test_scope():
         x = array_ops.diag(math_ops.range(20))
         shuffle = random_ops.random_shuffle(x)
-      result = sess.run(shuffle)
+      result = self.evaluate(shuffle)
       expected = np.diag(range(20)).flatten()
       # Compare sets to avoid randomness behavior changes but make sure still
       # have all the values.
diff --git a/tensorflow/compiler/tests/stateless_random_ops_test.py b/tensorflow/compiler/tests/stateless_random_ops_test.py
index 21708aa1587..ee7ca7e6f19 100644
--- a/tensorflow/compiler/tests/stateless_random_ops_test.py
+++ b/tensorflow/compiler/tests/stateless_random_ops_test.py
@@ -156,7 +156,7 @@ class StatelessRandomOpsTest(xla_test.XLATestCase):
           return math.exp(-(x**2) / 2.) / math.sqrt(2 * math.pi)
 
         def probit(x, sess=sess):
-          return sess.run(special_math.ndtri(x))
+          return self.evaluate(special_math.ndtri(x))
 
         a = -2.
         b = 2.
diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py
index c8208adb587..d7e26d79c4c 100644
--- a/tensorflow/compiler/tests/tensor_array_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_array_ops_test.py
@@ -505,7 +505,7 @@ class TensorArrayTest(xla_test.XLATestCase):
                 [-0.5, 1.5],  # read(0) gradient
                 [20.0, 30.0, 40.0, 50.0],  # concat gradient
             ])
-      grad_vals = sess.run(grad_r)  # 2 + 2 entries
+      grad_vals = self.evaluate(grad_r)  # 2 + 2 entries
 
       self.assertAllClose([2.0 - 0.5 + 20.0, 3.0 + 1.5 + 30.0], grad_vals[0])
       self.assertAllEqual([4.0 + 40.0, 5.0 + 50.0], grad_vals[1])
diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py
index 77cdeac8168..fcd7ac5ba1c 100644
--- a/tensorflow/compiler/tests/variable_ops_test.py
+++ b/tensorflow/compiler/tests/variable_ops_test.py
@@ -77,7 +77,7 @@ class VariableOpsTest(xla_test.XLATestCase):
         sess.run(variables.variables_initializer([v]))
         x = v.sparse_read(2)
         self.assertAllClose(
-            np.array([8j, 9, 10, 11]).astype(dtype), sess.run(x))
+            np.array([8j, 9, 10, 11]).astype(dtype), self.evaluate(x))
 
   def testSparseRead1DIndices(self):
     for dtype in self.numeric_types:
@@ -89,7 +89,7 @@ class VariableOpsTest(xla_test.XLATestCase):
         x = v.sparse_read([2, 1])
         self.assertAllClose(
             np.array([[8, 9, 10, 11], [4, 5, 6j, 7]]).astype(dtype),
-            sess.run(x))
+            self.evaluate(x))
 
   def testSparseRead2DIndices(self):
     for dtype in self.numeric_types:
@@ -102,7 +102,7 @@ class VariableOpsTest(xla_test.XLATestCase):
         self.assertAllClose(
             np.array([[[8, 9, 10, 11], [4, 5, 6, 7]],
                       [[0, 1, 2j, 3], [8, 9, 10, 11]]]).astype(dtype),
-            sess.run(x))
+            self.evaluate(x))
 
   def testSparseRead2DIndices3DTensor(self):
     for dtype in self.numeric_types:
@@ -115,9 +115,9 @@ class VariableOpsTest(xla_test.XLATestCase):
         x = v.sparse_read([[2, 1], [3, 0]])
         self.assertAllClose(
             np.array(
-                [[[[20, 21, 22], [23, 24j, 25]], [[10, 11, 12], [13, 14, 15]]
-                 ], [[[30, 31, 32], [33, 34, 35]], [[0, 1, 2], [3, 4, 5]]]
-                ],).astype(dtype), sess.run(x))
+                [[[[20, 21, 22], [23, 24j, 25]], [[10, 11, 12], [13, 14, 15]]],
+                 [[[30, 31, 32], [33, 34, 35]], [[0, 1, 2], [3, 4, 5]]]
+                ],).astype(dtype), self.evaluate(x))
 
   def testShape(self):
     for dtype in self.numeric_types:
@@ -229,7 +229,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_add(
               handle, [0], constant_op.constant([[2]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertAllEqual(sess.run(read), [[3], [7]])
+      self.assertAllEqual(self.evaluate(read), [[3], [7]])
 
   def testScatterSub(self):
     with self.test_session() as sess, self.test_scope():
@@ -242,7 +242,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_sub(
               handle, [1], constant_op.constant([[2]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertAllEqual(sess.run(read), [[4], [-1]])
+      self.assertAllEqual(self.evaluate(read), [[4], [-1]])
 
   def testScatterMul(self):
     with self.test_session() as sess, self.test_scope():
@@ -255,7 +255,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_mul(
               handle, [0], constant_op.constant([[5]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[5]])
+      self.assertEqual(self.evaluate(read), [[5]])
 
   def testScatterDiv(self):
     with self.test_session() as sess, self.test_scope():
@@ -268,7 +268,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_div(
               handle, [0], constant_op.constant([[3]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertAllEqual(sess.run(read), [[2]])
+      self.assertAllEqual(self.evaluate(read), [[2]])
 
   def testScatterMin(self):
     with self.test_session() as sess, self.test_scope():
@@ -281,7 +281,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_min(
               handle, [0], constant_op.constant([[3]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[3]])
+      self.assertEqual(self.evaluate(read), [[3]])
 
   def testScatterMax(self):
     with self.test_session() as sess, self.test_scope():
@@ -294,7 +294,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_max(
               handle, [0], constant_op.constant([[3]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[6]])
+      self.assertEqual(self.evaluate(read), [[6]])
 
   def testScatterUpdate(self):
     with self.test_session() as sess, self.test_scope():
@@ -307,7 +307,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_update(
               handle, [0], constant_op.constant([[3]], dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[3]])
+      self.assertEqual(self.evaluate(read), [[3]])
 
   def testScatterAddScalar(self):
     with self.test_session() as sess, self.test_scope():
@@ -320,7 +320,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_add(
               handle, [0], constant_op.constant(2, dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[3]])
+      self.assertEqual(self.evaluate(read), [[3]])
 
   def testScatterSubScalar(self):
     with self.test_session() as sess, self.test_scope():
@@ -333,7 +333,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_sub(
               handle, [0], constant_op.constant(2, dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[-1]])
+      self.assertEqual(self.evaluate(read), [[-1]])
 
   def testScatterMulScalar(self):
     with self.test_session() as sess, self.test_scope():
@@ -346,7 +346,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_mul(
               handle, [0], constant_op.constant(5, dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[5]])
+      self.assertEqual(self.evaluate(read), [[5]])
 
   def testScatterDivScalar(self):
     with self.test_session() as sess, self.test_scope():
@@ -359,7 +359,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_div(
               handle, [0], constant_op.constant(3, dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[2]])
+      self.assertEqual(self.evaluate(read), [[2]])
 
   def testScatterMinScalar(self):
     with self.test_session() as sess, self.test_scope():
@@ -372,7 +372,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_min(
               handle, [0], constant_op.constant(3, dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[3]])
+      self.assertEqual(self.evaluate(read), [[3]])
 
   def testScatterMaxScalar(self):
     with self.test_session() as sess, self.test_scope():
@@ -385,7 +385,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           resource_variable_ops.resource_scatter_max(
               handle, [0], constant_op.constant(3, dtype=dtypes.int32)))
       read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)
-      self.assertEqual(sess.run(read), [[6]])
+      self.assertEqual(self.evaluate(read), [[6]])
 
   def testScatterNdAddOps(self):
     with self.test_session() as sess, self.test_scope():
@@ -400,7 +400,7 @@ class VariableOpsTest(xla_test.XLATestCase):
       sess.run(gen_state_ops.resource_scatter_nd_add(handle, indices, updates))
       read = resource_variable_ops.read_variable_op(
           handle, dtype=dtypes.float32)
-      self.assertAllClose(expected, sess.run(read))
+      self.assertAllClose(expected, self.evaluate(read))
 
   def testScatterNdUpdateAddOps(self):
     with self.test_session() as sess, self.test_scope():
@@ -416,7 +416,7 @@ class VariableOpsTest(xla_test.XLATestCase):
           gen_state_ops.resource_scatter_nd_update(handle, indices, updates))
       read = resource_variable_ops.read_variable_op(
           handle, dtype=dtypes.float32)
-      self.assertAllClose(expected, sess.run(read))
+      self.assertAllClose(expected, self.evaluate(read))
 
 
 class StridedSliceAssignChecker(object):
diff --git a/tensorflow/compiler/tests/xla_device_test.py b/tensorflow/compiler/tests/xla_device_test.py
index 28d61fb07dc..ef55292b1be 100644
--- a/tensorflow/compiler/tests/xla_device_test.py
+++ b/tensorflow/compiler/tests/xla_device_test.py
@@ -81,7 +81,7 @@ class XlaDeviceTest(xla_test.XLATestCase):
     with self.cached_session() as sess:
       with self.test_scope():
         x = gen_control_flow_ops.control_trigger()
-      sess.run(x)
+      self.evaluate(x)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/examples/autograph/integration_tests/keras_test.py b/tensorflow/examples/autograph/integration_tests/keras_test.py
index dca7c07b470..fc0b0736965 100644
--- a/tensorflow/examples/autograph/integration_tests/keras_test.py
+++ b/tensorflow/examples/autograph/integration_tests/keras_test.py
@@ -93,10 +93,10 @@ class KerasTest(tf.test.TestCase):
         init = tf.global_variables_initializer()
 
         with tf.Session() as sess:
-          sess.run(init)
+          self.evaluate(init)
           sample_input = tf.random_uniform((1, 10, 10, 1))
           output = model(sample_input)  # pylint: disable=not-callable
-          self.assertEqual(sess.run(output).shape, (1, 3))
+          self.assertEqual(self.evaluate(output).shape, (1, 3))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/examples/autograph/integration_tests/list_literals_test.py b/tensorflow/examples/autograph/integration_tests/list_literals_test.py
index 917f5ff9d84..e85d4abcfc9 100644
--- a/tensorflow/examples/autograph/integration_tests/list_literals_test.py
+++ b/tensorflow/examples/autograph/integration_tests/list_literals_test.py
@@ -34,7 +34,7 @@ class ListLiteralsTest(tf.test.TestCase):
     result = converted()
 
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(result), [1, 2, 3])
+      self.assertAllEqual(self.evaluate(result), [1, 2, 3])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/examples/speech_commands/input_data_test.py b/tensorflow/examples/speech_commands/input_data_test.py
index b766ba6de0d..33b58b9d09b 100644
--- a/tensorflow/examples/speech_commands/input_data_test.py
+++ b/tensorflow/examples/speech_commands/input_data_test.py
@@ -35,7 +35,7 @@ class InputDataTest(test.TestCase):
     with self.cached_session() as sess:
       sample_data = tf.zeros([32000, 2])
       wav_encoder = contrib_audio.encode_wav(sample_data, 16000)
-      wav_data = sess.run(wav_encoder)
+      wav_data = self.evaluate(wav_encoder)
     return wav_data
 
   def _saveTestWavFile(self, filename, wav_data):
diff --git a/tensorflow/examples/speech_commands/label_wav_test.py b/tensorflow/examples/speech_commands/label_wav_test.py
index f0af2a47987..77a88f98e16 100644
--- a/tensorflow/examples/speech_commands/label_wav_test.py
+++ b/tensorflow/examples/speech_commands/label_wav_test.py
@@ -33,7 +33,7 @@ class LabelWavTest(test.TestCase):
     with self.cached_session() as sess:
       sample_data = tf.zeros([1000, 2])
       wav_encoder = contrib_audio.encode_wav(sample_data, 16000)
-      wav_data = sess.run(wav_encoder)
+      wav_data = self.evaluate(wav_encoder)
     return wav_data
 
   def _saveTestWavFile(self, filename, wav_data):
diff --git a/tensorflow/examples/speech_commands/wav_to_features_test.py b/tensorflow/examples/speech_commands/wav_to_features_test.py
index 87f29876939..cb8ea912fa2 100644
--- a/tensorflow/examples/speech_commands/wav_to_features_test.py
+++ b/tensorflow/examples/speech_commands/wav_to_features_test.py
@@ -33,7 +33,7 @@ class WavToFeaturesTest(test.TestCase):
     with self.cached_session() as sess:
       sample_data = tf.zeros([32000, 2])
       wav_encoder = contrib_audio.encode_wav(sample_data, 16000)
-      wav_data = sess.run(wav_encoder)
+      wav_data = self.evaluate(wav_encoder)
     return wav_data
 
   def _saveTestWavFile(self, filename, wav_data):
diff --git a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
index eeb48d12311..9c00d0501ab 100644
--- a/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
+++ b/tensorflow/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py
@@ -111,7 +111,7 @@ class UnidirectionalSequenceLstmTest(test_util.TensorFlowTestCase):
 
     # Initialize variables
     init = tf.global_variables_initializer()
-    sess.run(init)
+    self.evaluate(init)
     for _ in range(TRAIN_STEPS):
       batch_x, batch_y = self.mnist.train.next_batch(
           batch_size=self.batch_size, shuffle=False)
diff --git a/tensorflow/python/autograph/converters/asserts_test.py b/tensorflow/python/autograph/converters/asserts_test.py
index eef628aeb6f..803b6a06dab 100644
--- a/tensorflow/python/autograph/converters/asserts_test.py
+++ b/tensorflow/python/autograph/converters/asserts_test.py
@@ -41,7 +41,7 @@ class AssertsTest(converter_testing.TestCase):
         op = result.test_fn(constant_op.constant(False))
         with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                      'test message'):
-          sess.run(op)
+          self.evaluate(op)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/converters/call_trees_test.py b/tensorflow/python/autograph/converters/call_trees_test.py
index 916c736fb4b..9d760167a9d 100644
--- a/tensorflow/python/autograph/converters/call_trees_test.py
+++ b/tensorflow/python/autograph/converters/call_trees_test.py
@@ -94,7 +94,7 @@ class CallTreesTest(converter_testing.TestCase):
                         dtypes.int64) as result:
       with self.cached_session() as sess:
         self.assertTrue(isinstance(result.test_fn(), ops.Tensor))
-        self.assertIn(sess.run(result.test_fn()), (0, 1, 2))
+        self.assertIn(self.evaluate(result.test_fn()), (0, 1, 2))
 
   def test_uncompiled_modules(self):
 
@@ -113,7 +113,7 @@ class CallTreesTest(converter_testing.TestCase):
     with self.compiled(node, ns) as result:
       with self.cached_session() as sess:
         result_tensor = result.test_fn(constant_op.constant(1))
-        self.assertEquals(sess.run(result_tensor), 3)
+        self.assertEquals(self.evaluate(result_tensor), 3)
 
   def test_call_to_decorated_function(self):
 
diff --git a/tensorflow/python/autograph/converters/lists_test.py b/tensorflow/python/autograph/converters/lists_test.py
index f6da845fcc3..39843c7d74f 100644
--- a/tensorflow/python/autograph/converters/lists_test.py
+++ b/tensorflow/python/autograph/converters/lists_test.py
@@ -68,7 +68,7 @@ class ListTest(converter_testing.TestCase):
       with self.cached_session() as sess:
         tl = result.test_fn()
         r = list_ops.tensor_list_stack(tl, dtypes.int32)
-        self.assertAllEqual(sess.run(r), [1, 2, 3])
+        self.assertAllEqual(self.evaluate(r), [1, 2, 3])
 
   def test_list_pop(self):
 
@@ -91,8 +91,8 @@ class ListTest(converter_testing.TestCase):
       with self.cached_session() as sess:
         ts, tl = result.test_fn()
         r = list_ops.tensor_list_stack(tl, dtypes.int32)
-        self.assertAllEqual(sess.run(r), [1, 2])
-        self.assertAllEqual(sess.run(ts), 3)
+        self.assertAllEqual(self.evaluate(r), [1, 2])
+        self.assertAllEqual(self.evaluate(ts), 3)
 
   def test_double_list_pop(self):
 
@@ -123,7 +123,7 @@ class ListTest(converter_testing.TestCase):
 
     with self.compiled(node, {}, array_ops.stack, dtypes.int32) as result:
       with self.cached_session() as sess:
-        self.assertAllEqual(sess.run(result.test_fn()), [1, 2, 3])
+        self.assertAllEqual(self.evaluate(result.test_fn()), [1, 2, 3])
 
   # TODO(mdan): Add a test with tf.stack with axis kwarg.
 
diff --git a/tensorflow/python/autograph/converters/side_effect_guards_test.py b/tensorflow/python/autograph/converters/side_effect_guards_test.py
index cef3199169c..f6d0f73b5b9 100644
--- a/tensorflow/python/autograph/converters/side_effect_guards_test.py
+++ b/tensorflow/python/autograph/converters/side_effect_guards_test.py
@@ -48,12 +48,12 @@ class SideEffectGuardsTest(converter_testing.TestCase):
     with self.compiled(node, {}, state_ops.assign) as result:
       with self.cached_session() as sess:
         v = variable_scope.get_variable('test', initializer=2)
-        sess.run(v.initializer)
-        sess.run(result.test_fn(v))
+        self.evaluate(v.initializer)
+        self.evaluate(result.test_fn(v))
         # TODO(mdan): Add support for this use case.
         # Right now the variable `a` is not conditioned on the `assign` because
         # there's no way to add control dependencies to a variable object.
-        self.assertEqual(2, sess.run(v))
+        self.assertEqual(2, self.evaluate(v))
 
   def test_side_effect_on_used_variable(self):
 
@@ -69,11 +69,11 @@ class SideEffectGuardsTest(converter_testing.TestCase):
     with self.compiled(node, {}, state_ops.assign) as result:
       with self.cached_session() as sess:
         v = variable_scope.get_variable('test', initializer=2)
-        sess.run(v.initializer)
-        sess.run(result.test_fn(v))
+        self.evaluate(v.initializer)
+        self.evaluate(result.test_fn(v))
         # TODO(mdan): Ensure the result of test_fn(v) is also deterministic.
         # Right now it's 3 or 4 based on whether the read is synchronized.
-        self.assertEqual(3, sess.run(v))
+        self.assertEqual(3, self.evaluate(v))
 
   def test_side_effect_on_tensor(self):
 
@@ -109,10 +109,10 @@ class SideEffectGuardsTest(converter_testing.TestCase):
     with self.compiled(node, {}, state_ops.assign_add) as result:
       with self.cached_session() as sess:
         v = variable_scope.get_variable('test', initializer=2)
-        sess.run(v.initializer)
-        sess.run(result.test_fn(v))
+        self.evaluate(v.initializer)
+        self.evaluate(result.test_fn(v))
         # TODO(mdan): Ensure the result of test_fn(v) is also deterministic.
-        self.assertEqual(4, sess.run(v))
+        self.assertEqual(4, self.evaluate(v))
 
   def test_multiline_nested_block(self):
 
@@ -130,10 +130,10 @@ class SideEffectGuardsTest(converter_testing.TestCase):
     with self.compiled(node, {}, state_ops.assign, ops.name_scope) as result:
       with self.cached_session() as sess:
         v = variable_scope.get_variable('test', initializer=2)
-        sess.run(v.initializer)
-        sess.run(result.test_fn(v))
+        self.evaluate(v.initializer)
+        self.evaluate(result.test_fn(v))
         # TODO(mdan): Ensure the result of test_fn(v) is also deterministic.
-        self.assertEqual(3, sess.run(v))
+        self.assertEqual(3, self.evaluate(v))
 
   def test_multiline_block_unsafe(self):
 
@@ -153,10 +153,10 @@ class SideEffectGuardsTest(converter_testing.TestCase):
                        state_ops.assign_add) as result:
       with self.cached_session() as sess:
         v = variable_scope.get_variable('test', initializer=2)
-        sess.run(v.initializer)
-        sess.run(result.test_fn(v))
+        self.evaluate(v.initializer)
+        self.evaluate(result.test_fn(v))
         # TODO(mdan): Ensure the result of test_fn(v) is also deterministic.
-        self.assertEqual(4, sess.run(v))
+        self.assertEqual(4, self.evaluate(v))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/converters/slices_test.py b/tensorflow/python/autograph/converters/slices_test.py
index e190a7cfe84..bd049afdfce 100644
--- a/tensorflow/python/autograph/converters/slices_test.py
+++ b/tensorflow/python/autograph/converters/slices_test.py
@@ -49,7 +49,7 @@ class SliceTest(converter_testing.TestCase):
         tl = list_ops.tensor_list_from_tensor(
             [1, 2], element_shape=constant_op.constant([], dtype=dtypes.int32))
         y = result.test_fn(tl)
-        self.assertEqual(2, sess.run(y))
+        self.assertEqual(2, self.evaluate(y))
 
   def test_index_access_multiple_definitions(self):
 
diff --git a/tensorflow/python/autograph/core/errors_test.py b/tensorflow/python/autograph/core/errors_test.py
index aa6c293268c..00c8a726eda 100644
--- a/tensorflow/python/autograph/core/errors_test.py
+++ b/tensorflow/python/autograph/core/errors_test.py
@@ -55,7 +55,7 @@ class RuntimeErrorsTest(test.TestCase):
     with self.assertRaises(errors.TfRuntimeError) as cm:
       with errors.improved_errors(zero_div_caller):
         with self.cached_session() as sess:
-          sess.run(ops)
+          self.evaluate(ops)
 
     for frame in cm.exception.custom_traceback:
       _, _, function_name, _ = frame
@@ -70,7 +70,7 @@ class RuntimeErrorsTest(test.TestCase):
     with self.assertRaises(errors.TfRuntimeError) as cm:
       with errors.improved_errors(zero_div_caller):
         with self.cached_session() as sess:
-          sess.run(ops)
+          self.evaluate(ops)
 
     all_function_names = set()
     for frame in cm.exception.custom_traceback:
@@ -87,7 +87,7 @@ class RuntimeErrorsTest(test.TestCase):
     with self.assertRaises(tf_errors.InvalidArgumentError):
       with errors.improved_errors(zero_div_caller):
         with self.cached_session() as sess:
-          sess.run(ops)
+          self.evaluate(ops)
 
   def test_improved_errors_validation(self):
     with self.assertRaisesRegexp(
diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index ef577568c4e..44cb99d657f 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py
@@ -63,7 +63,7 @@ class ApiTest(test.TestCase):
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
-      self.assertListEqual([0, 1], sess.run(x).tolist())
+      self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_decorator_does_not_recurse(self):
 
@@ -83,7 +83,7 @@ class ApiTest(test.TestCase):
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
-      self.assertListEqual([0, 1], sess.run(x).tolist())
+      self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_decorator_calls_unconverted_graph(self):
 
@@ -104,7 +104,7 @@ class ApiTest(test.TestCase):
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
-      self.assertListEqual([0, 1], sess.run(x).tolist())
+      self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_decorator_calls_unconverted_py_func(self):
 
@@ -130,7 +130,7 @@ class ApiTest(test.TestCase):
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
-      self.assertListEqual([0, 1], sess.run(x).tolist())
+      self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_decorator_calls_decorated(self):
 
@@ -153,7 +153,7 @@ class ApiTest(test.TestCase):
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
-      self.assertListEqual([0, 1], sess.run(x).tolist())
+      self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_decorator_preserves_argspec(self):
 
@@ -192,7 +192,7 @@ class ApiTest(test.TestCase):
       x = tc.test_method(
           constant_op.constant([2, 4]), constant_op.constant(1),
           constant_op.constant(-2))
-      self.assertListEqual([0, 1], sess.run(x).tolist())
+      self.assertListEqual([0, 1], self.evaluate(x).tolist())
 
   def test_converted_call_builtin(self):
     x = api.converted_call(range, None, converter.ConversionOptions(), 3)
@@ -208,7 +208,7 @@ class ApiTest(test.TestCase):
     with self.cached_session() as sess:
       x = api.converted_call(test_fn, None, converter.ConversionOptions(),
                              constant_op.constant(-1))
-      self.assertEqual(1, sess.run(x))
+      self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_method_explicit_owner(self):
     # TODO(mdan): Implement.
@@ -234,7 +234,7 @@ class ApiTest(test.TestCase):
       tc = TestClass(constant_op.constant(-1))
       x = api.converted_call(tc.test_method, None,
                              converter.ConversionOptions(), tc)
-      self.assertEqual(1, sess.run(x))
+      self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_method_by_class(self):
 
@@ -252,7 +252,7 @@ class ApiTest(test.TestCase):
       tc = TestClass(constant_op.constant(-1))
       x = api.converted_call(TestClass.test_method, None,
                              converter.ConversionOptions(), tc)
-      self.assertEqual(1, sess.run(x))
+      self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_callable_object(self):
 
@@ -269,7 +269,7 @@ class ApiTest(test.TestCase):
     with self.cached_session() as sess:
       tc = TestClass(constant_op.constant(-1))
       x = api.converted_call(tc, None, converter.ConversionOptions())
-      self.assertEqual(1, sess.run(x))
+      self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_constructor(self):
 
@@ -288,7 +288,7 @@ class ApiTest(test.TestCase):
                               constant_op.constant(-1))
       # tc is now a converted object.
       x = tc.test_method()
-      self.assertEqual(1, sess.run(x))
+      self.assertEqual(1, self.evaluate(x))
 
   def test_converted_call_already_converted(self):
 
@@ -298,12 +298,12 @@ class ApiTest(test.TestCase):
     with self.cached_session() as sess:
       x = api.converted_call(f, None, converter.ConversionOptions(),
                              constant_op.constant(0))
-      self.assertTrue(sess.run(x))
+      self.assertTrue(self.evaluate(x))
 
       converted_f = api.to_graph(f)
       x = api.converted_call(converted_f, None, converter.ConversionOptions(),
                              constant_op.constant(0))
-      self.assertTrue(sess.run(x))
+      self.assertTrue(self.evaluate(x))
 
   def test_converted_call_no_user_code(self):
 
@@ -334,8 +334,8 @@ class ApiTest(test.TestCase):
                            constant_op.constant([[0.0]]), training=True)
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      self.assertAllEqual([[0.0, 0.0]], sess.run(x))
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
   def test_converted_call_whitelisted_method_extra_self(self):
 
@@ -349,8 +349,8 @@ class ApiTest(test.TestCase):
                            model, constant_op.constant([[0.0]]), training=True)
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      self.assertAllEqual([[0.0, 0.0]], sess.run(x))
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
   def test_converted_call_whitelisted_method_via_owner(self):
 
@@ -364,8 +364,8 @@ class ApiTest(test.TestCase):
                            constant_op.constant([[0.0]]), training=True)
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      self.assertAllEqual([[0.0, 0.0]], sess.run(x))
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
   def test_converted_call_lambda(self):
 
@@ -376,8 +376,8 @@ class ApiTest(test.TestCase):
     x = api.converted_call(l, None, opts, constant_op.constant(0))
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      self.assertAllEqual(True, sess.run(x))
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllEqual(True, self.evaluate(x))
 
   def test_to_graph_basic(self):
 
@@ -390,7 +390,7 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       x = compiled_fn(constant_op.constant([4, 8]), 4)
-      self.assertListEqual([1, 2], sess.run(x).tolist())
+      self.assertListEqual([1, 2], self.evaluate(x).tolist())
 
   def test_to_graph_with_defaults(self):
 
@@ -405,7 +405,7 @@ class ApiTest(test.TestCase):
 
     with self.cached_session() as sess:
       x = compiled_fn(constant_op.constant([4, 8]))
-      self.assertListEqual([1, 2], sess.run(x).tolist())
+      self.assertListEqual([1, 2], self.evaluate(x).tolist())
 
   def test_to_code_basic(self):
 
diff --git a/tensorflow/python/autograph/lang/special_functions_test.py b/tensorflow/python/autograph/lang/special_functions_test.py
index 123ee65b326..8d40f4036c5 100644
--- a/tensorflow/python/autograph/lang/special_functions_test.py
+++ b/tensorflow/python/autograph/lang/special_functions_test.py
@@ -36,7 +36,7 @@ class SpecialFunctionsTest(test.TestCase):
     python_one = special_functions.match_staging_level(1, 1)
     with self.cached_session() as sess:
       self.assertTrue(tensor_util.is_tensor(tensor_one))
-      self.assertAllEqual(sess.run(tensor_one), 1)
+      self.assertAllEqual(self.evaluate(tensor_one), 1)
       self.assertEqual(python_one, 1)
 
   def test_tensor_list_empty_list(self):
@@ -45,21 +45,21 @@ class SpecialFunctionsTest(test.TestCase):
                                       element_shape=())
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(sl), [])
+      self.assertAllEqual(self.evaluate(sl), [])
 
     l = special_functions.tensor_list((),
                                       element_dtype=dtypes.int32,
                                       element_shape=())
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(sl), [])
+      self.assertAllEqual(self.evaluate(sl), [])
 
   def test_tensor_list_tensor(self):
     l = special_functions.tensor_list(
         constant_op.constant([], dtype=dtypes.int32))
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(sl), [])
+      self.assertAllEqual(self.evaluate(sl), [])
 
   def test_tensor_list_unsupported_initializer(self):
     with self.assertRaisesRegexp(ValueError, 'unknown type'):
@@ -76,7 +76,7 @@ class SpecialFunctionsTest(test.TestCase):
     l = special_functions.tensor_list(elements)
     sl = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
+      self.assertAllEqual(self.evaluate(sl), [[1, 2], [3, 4]])
 
   def test_tensor_list_array_from_elements(self):
     elements = [constant_op.constant([1, 2]), constant_op.constant([3, 4])]
@@ -84,7 +84,7 @@ class SpecialFunctionsTest(test.TestCase):
     l = special_functions.tensor_list(elements, use_tensor_array=True)
     sl = l.stack()
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(sl), [[1, 2], [3, 4]])
+      self.assertAllEqual(self.evaluate(sl), [[1, 2], [3, 4]])
 
   def test_stack(self):
     self.assertEqual(special_functions.stack(1, strict=False), 1)
diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py
index 2dea18dc5fa..05b5660941d 100644
--- a/tensorflow/python/autograph/operators/control_flow_test.py
+++ b/tensorflow/python/autograph/operators/control_flow_test.py
@@ -35,7 +35,7 @@ class ForLoopTest(test.TestCase):
         body=lambda i, s: (s + i,),
         init_state=(0,))
     with self.cached_session() as sess:
-      self.assertEqual((10,), sess.run(s))
+      self.assertEqual((10,), self.evaluate(s))
 
   def test_python(self):
     s = control_flow.for_stmt(
@@ -53,7 +53,7 @@ class ForLoopTest(test.TestCase):
         body=lambda i, s: (s + i,),
         init_state=(0,))
     with self.cached_session() as sess:
-      self.assertEqual((10,), sess.run(s))
+      self.assertEqual((10,), self.evaluate(s))
 
 
 class WhileLoopTest(test.TestCase):
@@ -66,7 +66,7 @@ class WhileLoopTest(test.TestCase):
         init_state=(0, 0),
         extra_deps=(n,))
     with self.cached_session() as sess:
-      self.assertEqual((5, 10), sess.run(results))
+      self.assertEqual((5, 10), self.evaluate(results))
 
   def test_python(self):
     n = 5
@@ -90,9 +90,9 @@ class IfStmtTest(test.TestCase):
   def test_tensor(self):
     with self.cached_session() as sess:
       t = self.single_return_if_stmt(constant_op.constant(True))
-      self.assertEqual(1, sess.run(t))
+      self.assertEqual(1, self.evaluate(t))
       t = self.single_return_if_stmt(constant_op.constant(False))
-      self.assertEqual(-1, sess.run(t))
+      self.assertEqual(-1, self.evaluate(t))
 
   def test_python(self):
     self.assertEqual(1, self.single_return_if_stmt(True))
@@ -101,9 +101,9 @@ class IfStmtTest(test.TestCase):
   def test_tensor_multiple_returns(self):
     with self.cached_session() as sess:
       t = self.multi_return_if_stmt(constant_op.constant(True))
-      self.assertAllEqual([1, 2], sess.run(t))
+      self.assertAllEqual([1, 2], self.evaluate(t))
       t = self.multi_return_if_stmt(constant_op.constant(False))
-      self.assertAllEqual([-1, -2], sess.run(t))
+      self.assertAllEqual([-1, -2], self.evaluate(t))
 
   def test_python_multiple_returns(self):
     self.assertEqual((1, 2), self.multi_return_if_stmt(True))
diff --git a/tensorflow/python/autograph/operators/data_structures_test.py b/tensorflow/python/autograph/operators/data_structures_test.py
index 72476ccb6bf..0433e3f130c 100644
--- a/tensorflow/python/autograph/operators/data_structures_test.py
+++ b/tensorflow/python/autograph/operators/data_structures_test.py
@@ -43,7 +43,7 @@ class ListTest(test.TestCase):
     l = data_structures.tf_tensor_list_new([3, 4, 5])
     t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(t), [3, 4, 5])
+      self.assertAllEqual(self.evaluate(t), [3, 4, 5])
 
   def test_tf_tensor_list_new_empty(self):
     l = data_structures.tf_tensor_list_new([],
@@ -51,13 +51,13 @@ class ListTest(test.TestCase):
                                            element_shape=())
     t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(t), [])
+      self.assertAllEqual(self.evaluate(t), [])
 
   def test_tf_tensor_list_new_from_tensor(self):
     l = data_structures.tf_tensor_list_new(constant_op.constant([3, 4, 5]))
     t = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(t), [3, 4, 5])
+      self.assertAllEqual(self.evaluate(t), [3, 4, 5])
 
   def test_tf_tensor_list_new_illegal_input(self):
     with self.assertRaises(ValueError):
@@ -77,7 +77,7 @@ class ListTest(test.TestCase):
     l = data_structures.tf_tensor_array_new([3, 4, 5])
     t = l.stack()
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(t), [3, 4, 5])
+      self.assertAllEqual(self.evaluate(t), [3, 4, 5])
 
   def test_tf_tensor_array_new_illegal_input(self):
     with self.assertRaises(ValueError):
@@ -102,15 +102,15 @@ class ListTest(test.TestCase):
 
     t = list_ops.tensor_list_stack(l, element_dtype=x.dtype)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(t), [[1, 2, 3]])
+      self.assertAllEqual(self.evaluate(t), [[1, 2, 3]])
 
   def test_append_tensorarray(self):
     l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True)
     l1 = data_structures.list_append(l, 1)
     l2 = data_structures.list_append(l1, 2)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(l1.stack()), [1])
-      self.assertAllEqual(sess.run(l2.stack()), [1, 2])
+      self.assertAllEqual(self.evaluate(l1.stack()), [1])
+      self.assertAllEqual(self.evaluate(l2.stack()), [1, 2])
 
   def test_append_python(self):
     l = []
@@ -131,10 +131,10 @@ class ListTest(test.TestCase):
 
     with self.cached_session() as sess:
       l, x = data_structures.list_pop(l, None, opts)
-      self.assertAllEqual(sess.run(x), [3, 4])
+      self.assertAllEqual(self.evaluate(x), [3, 4])
 
       t = list_ops.tensor_list_stack(l, element_dtype=initial_list.dtype)
-      self.assertAllEqual(sess.run(t), [[1, 2]])
+      self.assertAllEqual(self.evaluate(t), [[1, 2]])
 
   def test_pop_python(self):
     l = [1, 2, 3]
@@ -152,7 +152,7 @@ class ListTest(test.TestCase):
 
     with self.cached_session() as sess:
       t = data_structures.list_stack(l, opts)
-      self.assertAllEqual(sess.run(t), sess.run(initial_list))
+      self.assertAllEqual(self.evaluate(t), self.evaluate(initial_list))
 
   def test_stack_tensor_list_empty(self):
     l = list_ops.empty_tensor_list(
diff --git a/tensorflow/python/autograph/operators/exceptions_test.py b/tensorflow/python/autograph/operators/exceptions_test.py
index 186535d05b5..24d3f1bd35f 100644
--- a/tensorflow/python/autograph/operators/exceptions_test.py
+++ b/tensorflow/python/autograph/operators/exceptions_test.py
@@ -30,7 +30,7 @@ class ExceptionsTest(test.TestCase):
     with self.cached_session() as sess:
       t = exceptions.assert_stmt(
           constant_op.constant(True), lambda: constant_op.constant('ignored'))
-      sess.run(t)
+      self.evaluate(t)
 
   def test_assert_tf_triggered(self):
     with self.cached_session() as sess:
@@ -40,7 +40,7 @@ class ExceptionsTest(test.TestCase):
 
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    'test message'):
-        sess.run(t)
+        self.evaluate(t)
 
   def test_assert_tf_multiple_printed_values(self):
     two_tensors = [
@@ -53,7 +53,7 @@ class ExceptionsTest(test.TestCase):
 
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                    'test message.*another message'):
-        sess.run(t)
+        self.evaluate(t)
 
   def test_assert_python_untriggered(self):
     side_effect_trace = []
diff --git a/tensorflow/python/autograph/operators/logical_test.py b/tensorflow/python/autograph/operators/logical_test.py
index d6649f7b2bf..ebf6458f01e 100644
--- a/tensorflow/python/autograph/operators/logical_test.py
+++ b/tensorflow/python/autograph/operators/logical_test.py
@@ -45,11 +45,11 @@ class LogicalOperatorsTest(test.TestCase):
   def test_and_tf(self):
     with self.cached_session() as sess:
       t = logical.and_(self._tf_true, self._tf_true)
-      self.assertEqual(sess.run(t), True)
+      self.assertEqual(self.evaluate(t), True)
       t = logical.and_(self._tf_true, lambda: True)
-      self.assertEqual(sess.run(t), True)
+      self.assertEqual(self.evaluate(t), True)
       t = logical.and_(self._tf_false, lambda: True)
-      self.assertEqual(sess.run(t), False)
+      self.assertEqual(self.evaluate(t), False)
       # TODO(mdan): Add a test for ops with side effects.
 
   def test_or_python(self):
@@ -63,11 +63,11 @@ class LogicalOperatorsTest(test.TestCase):
   def test_or_tf(self):
     with self.cached_session() as sess:
       t = logical.or_(self._tf_false, self._tf_true)
-      self.assertEqual(sess.run(t), True)
+      self.assertEqual(self.evaluate(t), True)
       t = logical.or_(self._tf_false, lambda: True)
-      self.assertEqual(sess.run(t), True)
+      self.assertEqual(self.evaluate(t), True)
       t = logical.or_(self._tf_true, lambda: True)
-      self.assertEqual(sess.run(t), True)
+      self.assertEqual(self.evaluate(t), True)
       # TODO(mdan): Add a test for ops with side effects.
 
   def test_not_python(self):
@@ -78,7 +78,7 @@ class LogicalOperatorsTest(test.TestCase):
   def test_not_tf(self):
     with self.cached_session() as sess:
       t = logical.not_(self._tf_false())
-      self.assertEqual(sess.run(t), True)
+      self.assertEqual(self.evaluate(t), True)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py
index 443e30a475d..4d9eec77c38 100644
--- a/tensorflow/python/autograph/operators/py_builtins_test.py
+++ b/tensorflow/python/autograph/operators/py_builtins_test.py
@@ -38,29 +38,29 @@ class PyBuiltinsTest(test.TestCase):
     self.assertEqual(py_builtins.abs_(-1), 1)
     with self.cached_session() as sess:
       t = py_builtins.abs_(constant_op.constant(-1))
-      self.assertEqual(sess.run(t), 1)
+      self.assertEqual(self.evaluate(t), 1)
       t = py_builtins.abs_(constant_op.constant([-1, 2, -3]))
-      self.assertAllEqual(sess.run(t), [1, 2, 3])
+      self.assertAllEqual(self.evaluate(t), [1, 2, 3])
 
   def test_float(self):
     self.assertEqual(py_builtins.float_(10), 10.0)
     self.assertEqual(py_builtins.float_('10.0'), 10.0)
     with self.cached_session() as sess:
       t = py_builtins.float_(constant_op.constant(1, dtype=dtypes.int64))
-      self.assertEqual(sess.run(t), 1.0)
+      self.assertEqual(self.evaluate(t), 1.0)
       st = py_builtins.float_(constant_op.constant('1.0'))
-      self.assertEqual(sess.run(st), 1.0)
+      self.assertEqual(self.evaluate(st), 1.0)
 
   def test_int(self):
     self.assertEqual(py_builtins.int_(10.0), 10)
     self.assertEqual(py_builtins.int_('11', 2), 3)
     with self.cached_session() as sess:
       t = py_builtins.int_(constant_op.constant(1, dtype=dtypes.float64))
-      self.assertEqual(sess.run(t), 1)
+      self.assertEqual(self.evaluate(t), 1)
       st = py_builtins.int_(constant_op.constant('1'))
-      self.assertEqual(sess.run(st), 1)
+      self.assertEqual(self.evaluate(st), 1)
       st = py_builtins.int_(constant_op.constant('1'), 10)
-      self.assertEqual(sess.run(st), 1)
+      self.assertEqual(self.evaluate(st), 1)
 
   def test_int_unsupported_base(self):
     t = constant_op.constant(1, dtype=dtypes.float64)
@@ -73,9 +73,9 @@ class PyBuiltinsTest(test.TestCase):
       t = py_builtins.len_(constant_op.constant([[1], [2], [3]]))
       self.assertEqual(t, 3)
       ta = py_builtins.len_(tensor_array_ops.TensorArray(dtypes.int32, size=5))
-      self.assertEqual(sess.run(ta), 5)
+      self.assertEqual(self.evaluate(ta), 5)
       tl = py_builtins.len_(data_structures.tf_tensor_list_new([3, 4, 5]))
-      self.assertEqual(sess.run(tl), 3)
+      self.assertEqual(self.evaluate(tl), 3)
 
   def test_len_scalar(self):
     with self.assertRaises(ValueError):
@@ -120,18 +120,18 @@ class PyBuiltinsTest(test.TestCase):
   def test_range_tensor(self):
     with self.cached_session() as sess:
       r = py_builtins.range_(constant_op.constant(3))
-      self.assertAllEqual(sess.run(r), [0, 1, 2])
+      self.assertAllEqual(self.evaluate(r), [0, 1, 2])
       r = py_builtins.range_(1, constant_op.constant(3))
-      self.assertAllEqual(sess.run(r), [1, 2])
+      self.assertAllEqual(self.evaluate(r), [1, 2])
       r = py_builtins.range_(2, 0, constant_op.constant(-1))
-      self.assertAllEqual(sess.run(r), [2, 1])
+      self.assertAllEqual(self.evaluate(r), [2, 1])
 
   def test_range_tensor_empty_range(self):
     with self.session() as sess:
       r = py_builtins.range_(constant_op.constant(-3))
-      self.assertAllEqual(sess.run(r), [])
+      self.assertAllEqual(self.evaluate(r), [])
       r = py_builtins.range_(5, constant_op.constant(2))
-      self.assertAllEqual(sess.run(r), [])
+      self.assertAllEqual(self.evaluate(r), [])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/operators/slices_test.py b/tensorflow/python/autograph/operators/slices_test.py
index 9e4865b3c66..d444054fd77 100644
--- a/tensorflow/python/autograph/operators/slices_test.py
+++ b/tensorflow/python/autograph/operators/slices_test.py
@@ -34,7 +34,7 @@ class SlicesTest(test.TestCase):
 
     with self.cached_session() as sess:
       t = list_ops.tensor_list_stack(l, element_dtype=initial_list.dtype)
-      self.assertAllEqual(sess.run(t), [[5, 6], [3, 4]])
+      self.assertAllEqual(self.evaluate(t), [[5, 6], [3, 4]])
 
   def test_get_item_tensor_list(self):
     initial_list = constant_op.constant([[1, 2], [3, 4]])
@@ -44,7 +44,7 @@ class SlicesTest(test.TestCase):
         l, 1, slices.GetItemOpts(element_dtype=initial_list.dtype))
 
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(t), [3, 4])
+      self.assertAllEqual(self.evaluate(t), [3, 4])
 
   def test_get_item_tensor_string(self):
     initial_str = constant_op.constant('abcd')
@@ -52,14 +52,14 @@ class SlicesTest(test.TestCase):
                         slices.GetItemOpts(element_dtype=initial_str.dtype))
 
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(t), b'b')
+      self.assertEqual(self.evaluate(t), b'b')
 
     initial_list_str = constant_op.constant(['abcd', 'bcde'])
     t = slices.get_item(initial_list_str, 1,
                         slices.GetItemOpts(element_dtype=initial_str.dtype))
 
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(t), b'bcde')
+      self.assertEqual(self.evaluate(t), b'bcde')
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/utils/misc_test.py b/tensorflow/python/autograph/utils/misc_test.py
index 8d2b0d6e138..c813e0f5c96 100644
--- a/tensorflow/python/autograph/utils/misc_test.py
+++ b/tensorflow/python/autograph/utils/misc_test.py
@@ -32,7 +32,7 @@ class MiscTest(test.TestCase):
     new_a = alias_tensors(a)
     self.assertFalse(new_a is a)
     with self.cached_session() as sess:
-      self.assertEqual(1, sess.run(new_a))
+      self.assertEqual(1, self.evaluate(new_a))
 
   def test_alias_tensors(self):
     a = constant(1)
@@ -47,7 +47,7 @@ class MiscTest(test.TestCase):
     self.assertTrue(new_s is s)
     self.assertTrue(new_l is l)
     with self.cached_session() as sess:
-      self.assertEqual(1, sess.run(new_a))
+      self.assertEqual(1, self.evaluate(new_a))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/autograph/utils/py_func_test.py b/tensorflow/python/autograph/utils/py_func_test.py
index 1c220d94922..28cefd8c3ed 100644
--- a/tensorflow/python/autograph/utils/py_func_test.py
+++ b/tensorflow/python/autograph/utils/py_func_test.py
@@ -34,13 +34,13 @@ class PyFuncTest(test.TestCase):
     with self.cached_session() as sess:
       result = py_func.wrap_py_func(test_fn, dtypes.int64,
                                     (1, constant_op.constant(1), 1))
-      self.assertEqual(3, sess.run(result))
+      self.assertEqual(3, self.evaluate(result))
       result = py_func.wrap_py_func(test_fn, dtypes.int64, (1, 1, 1))
-      self.assertEqual(3, sess.run(result))
+      self.assertEqual(3, self.evaluate(result))
       result = py_func.wrap_py_func(
           test_fn, dtypes.int64,
           (constant_op.constant(1), 1, constant_op.constant(1)))
-      self.assertEqual(3, sess.run(result))
+      self.assertEqual(3, self.evaluate(result))
 
   def test_wrap_py_func_complex_args(self):
 
@@ -54,10 +54,10 @@ class PyFuncTest(test.TestCase):
 
     with self.cached_session() as sess:
       result = py_func.wrap_py_func(test_fn, dtypes.int64, (7, TestClass()))
-      self.assertEqual(35, sess.run(result))
+      self.assertEqual(35, self.evaluate(result))
       result = py_func.wrap_py_func(test_fn, dtypes.int64,
                                     (constant_op.constant(7), TestClass()))
-      self.assertEqual(35, sess.run(result))
+      self.assertEqual(35, self.evaluate(result))
 
   def test_wrap_py_func_kwargs(self):
 
@@ -74,13 +74,13 @@ class PyFuncTest(test.TestCase):
           'c': 11,
           'd': TestClass(13)
       })
-      self.assertEqual(178, sess.run(result))
+      self.assertEqual(178, self.evaluate(result))
       result = py_func.wrap_py_func(test_fn, dtypes.int64,
                                     (constant_op.constant(7), TestClass(5)), {
                                         'c': constant_op.constant(11),
                                         'd': TestClass(13)
                                     })
-      self.assertEqual(178, sess.run(result))
+      self.assertEqual(178, self.evaluate(result))
 
   def test_wrap_py_func_dummy_return(self):
 
@@ -91,11 +91,11 @@ class PyFuncTest(test.TestCase):
 
     with self.cached_session() as sess:
       result = py_func.wrap_py_func(test_fn, None, (5,), use_dummy_return=True)
-      self.assertEqual(1, sess.run(result))
+      self.assertEqual(1, self.evaluate(result))
       self.assertEqual([1], side_counter)
       result = py_func.wrap_py_func(
           test_fn, None, (constant_op.constant(5),), use_dummy_return=True)
-      self.assertEqual(1, sess.run(result))
+      self.assertEqual(1, self.evaluate(result))
       self.assertEqual([2], side_counter)
 
 
diff --git a/tensorflow/python/autograph/utils/tensor_list_test.py b/tensorflow/python/autograph/utils/tensor_list_test.py
index 697c166eb12..c655f773b00 100644
--- a/tensorflow/python/autograph/utils/tensor_list_test.py
+++ b/tensorflow/python/autograph/utils/tensor_list_test.py
@@ -43,13 +43,13 @@ class TensorListTest(test.TestCase):
     l = tl.dynamic_list_append(l, 1)
     s = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(s), [1])
+      self.assertAllEqual(self.evaluate(s), [1])
 
     l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True)
     l = tl.dynamic_list_append(l, 1)
     s = l.stack()
     with self.cached_session() as sess:
-      self.assertAllEqual(sess.run(s), [1])
+      self.assertAllEqual(self.evaluate(s), [1])
 
     l = tl.TensorList(self._shape(()), dtypes.int32)
     l = tl.dynamic_list_append(l, 1)
@@ -92,7 +92,7 @@ class TensorListTest(test.TestCase):
     a2 = l.pop()
     c4 = l.count()
     with Session() as sess:
-      c1, c2, c3, c4, a, a2 = sess.run([c1, c2, c3, c4, a, a2])
+      c1, c2, c3, c4, a, a2 = self.evaluate([c1, c2, c3, c4, a, a2])
       self.assertEqual(c1, 1)
       self.assertEqual(c2, 2)
       self.assertEqual(c3, 1)
@@ -108,7 +108,7 @@ class TensorListTest(test.TestCase):
     l[0] = b
     l1 = l[0]
     with self.cached_session() as sess:
-      l0, l1, a, b = sess.run([l0, l1, a, b])
+      l0, l1, a, b = self.evaluate([l0, l1, a, b])
       self.assertEqual(l0, a)
       self.assertEqual(l1, b)
 
diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py
index df020f88a88..224f880ed15 100644
--- a/tensorflow/python/client/session_clusterspec_prop_test.py
+++ b/tensorflow/python/client/session_clusterspec_prop_test.py
@@ -62,7 +62,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
 
     const = constant_op.constant(17)
     sess = session.Session(server1.target, config=config)
-    output = sess.run(const)
+    output = self.evaluate(const)
     self.assertEqual(17, output)
 
   def testClusterSpecPropagationWorker2Placement(self):
@@ -106,7 +106,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
     with ops.Graph().as_default() as g, ops.device('/job:worker/task:0'):
       const = constant_op.constant(17)
     sess = session.Session(server1.target, config=config, graph=g)
-    output = sess.run(const)
+    output = self.evaluate(const)
     self.assertEqual(17, output)
 
   def testCanonicalDeviceNames(self):
@@ -208,7 +208,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
       with ops.device('/job:worker/task:0/cpu:0'):
         sum3 = sum1 + sum2
     sess = session.Session(server1.target, config=config, graph=g)
-    output = sess.run(sum3)
+    output = self.evaluate(sum3)
     self.assertEqual(40, output)
 
   def testLegacyDeviceNames(self):
diff --git a/tensorflow/python/client/session_partial_run_test.py b/tensorflow/python/client/session_partial_run_test.py
index 92ca47efa93..a9bd5ab7e08 100644
--- a/tensorflow/python/client/session_partial_run_test.py
+++ b/tensorflow/python/client/session_partial_run_test.py
@@ -117,7 +117,7 @@ class PartialRunTest(test_util.TensorFlowTestCase):
     a = constant_op.constant(2.0, dtypes.float32)
     b = a * 2
     c = b * 3
-    r1 = sess.run([b, c])
+    r1 = self.evaluate([b, c])
     h = sess.partial_run_setup([b, c], [])
     r2 = sess.partial_run(h, [b, c])
     self.assertEqual(r1, r2)
diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py
index dfd01476430..f9bd50957a7 100644
--- a/tensorflow/python/client/timeline_test.py
+++ b/tensorflow/python/client/timeline_test.py
@@ -147,7 +147,7 @@ class TimelineTest(test.TestCase):
         num2 = variables.Variable(2.0, name='num2')
       with ops.device('/cpu:2'):
         result = num1 + num2 + num1 * num2
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       sess.run(result, options=run_options, run_metadata=run_metadata)
 
     self.assertTrue(run_metadata.HasField('step_stats'))
@@ -176,7 +176,7 @@ class TimelineTest(test.TestCase):
         num2 = variables.Variable(2.0, name='num2')
       with ops.device('/cpu:2'):
         result = num1 + num2 + num1 * num2
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       sess.run(result, options=run_options, run_metadata=run_metadata)
     self.assertTrue(run_metadata.HasField('step_stats'))
     step_stats = run_metadata.step_stats
diff --git a/tensorflow/python/client/virtual_gpu_test.py b/tensorflow/python/client/virtual_gpu_test.py
index 5892e0fc845..e82ee0666c3 100644
--- a/tensorflow/python/client/virtual_gpu_test.py
+++ b/tensorflow/python/client/virtual_gpu_test.py
@@ -216,7 +216,7 @@ class VirtualGpuTest(test_util.TensorFlowTestCase):
       for d in self._util.devices:
         with ops.device(d):
           var = variables.Variable(random_ops.random_uniform(mat_shape))
-          sess.run(var.initializer)
+          self.evaluate(var.initializer)
           data.append(var)
       s = data[0]
       for i in range(1, len(data)):
diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
index 3903ec49b98..af20e50fb9b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
@@ -110,9 +110,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
       with self.cached_session() as sess:
         batches = []
         for _ in range(4):
-          batches.append(sess.run(batch))
+          batches.append(self.evaluate(batch))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(batch)
+          self.evaluate(batch)
       batch_sizes_val = []
       lengths_val = []
       for batch in batches:
@@ -160,9 +160,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       batches = []
       for _ in range(3):
-        batches.append(sess.run(batch))
+        batches.append(self.evaluate(batch))
       with self.assertRaisesOpError("bucket_boundaries"):
-        sess.run(batch)
+        self.evaluate(batch)
     batch_sizes_val = []
     lengths_val = []
     for batch in batches:
@@ -197,9 +197,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       batches = []
       for _ in range(5):
-        batches.append(sess.run(batch))
+        batches.append(self.evaluate(batch))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(batch)
+        self.evaluate(batch)
 
     self.assertAllEqual(batches[0], [[1, 0],
                                      [1, 1]])
@@ -300,7 +300,7 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
       with self.cached_session() as sess:
         with self.assertRaises(errors.OutOfRangeError):
           while True:
-            output = sess.run(batch)
+            output = self.evaluate(batch)
             sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
                            tuple(output.values))
             all_sparse_tensors.add(sprs_tensor)
diff --git a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
index cea8bd6f0b7..7edaab81f4c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
@@ -57,9 +57,9 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceInt32(self):
     host_dataset = dataset_ops.Dataset.from_tensors([0, 1, 2, 3])
@@ -82,9 +82,9 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
 
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
-      self.assertAllEqual([0, 1, 2, 3], sess.run(next_element))
+      self.assertAllEqual([0, 1, 2, 3], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToSameDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -108,9 +108,9 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceWithPrefetch(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -134,9 +134,9 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyDictToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
@@ -160,9 +160,9 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual({"a": i}, sess.run(next_element))
+        self.assertEqual({"a": i}, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyDictToDeviceWithPrefetch(self):
     host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
@@ -186,9 +186,9 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual({"a": i}, sess.run(next_element))
+        self.assertEqual({"a": i}, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopySparseTensorsToDevice(self):
 
@@ -217,12 +217,12 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        actual = sess.run(next_element)
+        actual = self.evaluate(next_element)
         self.assertAllEqual([i], actual.values)
         self.assertAllEqual([[0, 0]], actual.indices)
         self.assertAllEqual([2, 2], actual.dense_shape)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopySparseTensorsToDeviceWithPrefetch(self):
 
@@ -251,12 +251,12 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        actual = sess.run(next_element)
+        actual = self.evaluate(next_element)
         self.assertAllEqual([i], actual.values)
         self.assertAllEqual([[0, 0]], actual.indices)
         self.assertAllEqual([2, 2], actual.dense_shape)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpu(self):
     if not test_util.is_gpu_available():
@@ -271,11 +271,11 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuWithPrefetch(self):
     if not test_util.is_gpu_available():
@@ -290,11 +290,11 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuWithMap(self):
     if not test_util.is_gpu_available():
@@ -323,14 +323,14 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        x, y, z = sess.run(next_element)
+        x, y, z = self.evaluate(next_element)
         self.assertEqual(i**2, x)
         self.assertEqual(float(i**2), y)
         self.assertEqual(util_compat.as_bytes(str(i)), z)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuInt32(self):
     if not test_util.is_gpu_available():
@@ -345,10 +345,10 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      self.assertAllEqual([0, 1, 2, 3], sess.run(next_element))
+      self.evaluate(iterator.initializer)
+      self.assertAllEqual([0, 1, 2, 3], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuInt32AndPrefetch(self):
     if not test_util.is_gpu_available():
@@ -363,10 +363,10 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      self.assertAllEqual([0, 1, 2, 3], sess.run(next_element))
+      self.evaluate(iterator.initializer)
+      self.assertAllEqual([0, 1, 2, 3], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuStrings(self):
     if not test_util.is_gpu_available():
@@ -381,10 +381,10 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      self.assertAllEqual([b"a", b"b", b"c"], sess.run(next_element))
+      self.evaluate(iterator.initializer)
+      self.assertAllEqual([b"a", b"b", b"c"], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuStringsAndPrefetch(self):
     if not test_util.is_gpu_available():
@@ -399,10 +399,10 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      self.assertAllEqual([b"a", b"b", b"c"], sess.run(next_element))
+      self.evaluate(iterator.initializer)
+      self.assertAllEqual([b"a", b"b", b"c"], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDevicePingPongCPUGPU(self):
     if not test_util.is_gpu_available():
@@ -420,11 +420,11 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
         next_element = iterator.get_next()
 
       with self.cached_session() as sess:
-        sess.run(iterator.initializer)
+        self.evaluate(iterator.initializer)
         for i in range(10):
-          self.assertEqual(i, sess.run(next_element))
+          self.assertEqual(i, self.evaluate(next_element))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
+          self.evaluate(next_element)
 
   def testCopyToDeviceWithReInit(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -447,14 +447,14 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
 
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
+        self.assertEqual(i, self.evaluate(next_element))
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceWithReInitAndPrefetch(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -477,14 +477,14 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
 
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
+        self.assertEqual(i, self.evaluate(next_element))
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuWithReInit(self):
     if not test_util.is_gpu_available():
@@ -499,14 +499,14 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
+        self.assertEqual(i, self.evaluate(next_element))
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testCopyToDeviceGpuWithReInitAndPrefetch(self):
     if not test_util.is_gpu_available():
@@ -521,14 +521,14 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
+        self.assertEqual(i, self.evaluate(next_element))
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testIteratorGetNextAsOptionalOnGPU(self):
     if not test_util.is_gpu_available():
@@ -547,24 +547,25 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       # Before initializing the iterator, evaluating the optional fails with
       # a FailedPreconditionError.
       with self.assertRaises(errors.FailedPreconditionError):
-        sess.run(elem_has_value_t)
+        self.evaluate(elem_has_value_t)
       with self.assertRaises(errors.FailedPreconditionError):
-        sess.run(elem_value_t)
+        self.evaluate(elem_value_t)
 
       # For each element of the dataset, assert that the optional evaluates to
       # the expected value.
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(3):
-        elem_has_value, elem_value = sess.run([elem_has_value_t, elem_value_t])
+        elem_has_value, elem_value = self.evaluate(
+            [elem_has_value_t, elem_value_t])
         self.assertTrue(elem_has_value)
         self.assertEqual(i, elem_value)
 
       # After exhausting the iterator, `next_elem.has_value()` will evaluate to
       # false, and attempting to get the value will fail.
       for _ in range(2):
-        self.assertFalse(sess.run(elem_has_value_t))
+        self.assertFalse(self.evaluate(elem_has_value_t))
         with self.assertRaises(errors.InvalidArgumentError):
-          sess.run(elem_value_t)
+          self.evaluate(elem_value_t)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/counter_test.py b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
index 4e114ac4791..d1dd07a8794 100644
--- a/tensorflow/python/data/experimental/kernel_tests/counter_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
@@ -38,13 +38,13 @@ class CounterTest(test_base.DatasetTestBase):
     negative_get_next = negative_iterator.get_next()
 
     with self.cached_session() as sess:
-      self.assertEqual(3, sess.run(get_next))
-      self.assertEqual(3 + 4, sess.run(get_next))
-      self.assertEqual(3 + 2 * 4, sess.run(get_next))
+      self.assertEqual(3, self.evaluate(get_next))
+      self.assertEqual(3 + 4, self.evaluate(get_next))
+      self.assertEqual(3 + 2 * 4, self.evaluate(get_next))
 
-      self.assertEqual(0, sess.run(negative_get_next))
-      self.assertEqual(-1, sess.run(negative_get_next))
-      self.assertEqual(-2, sess.run(negative_get_next))
+      self.assertEqual(0, self.evaluate(negative_get_next))
+      self.assertEqual(-1, self.evaluate(negative_get_next))
+      self.assertEqual(-2, self.evaluate(negative_get_next))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
index 73be6cbcca8..d9bbfb9c994 100644
--- a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
@@ -41,10 +41,10 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
 
       for start in range(0, len(components), 4):
-        results = sess.run(get_next)
+        results = self.evaluate(get_next)
         self.assertAllEqual([[i, j]
                              for i, c in enumerate(components[start:start + 4])
                              for j in range(c)], results.indices)
@@ -56,7 +56,7 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
                             results.dense_shape)
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testDenseToSparseBatchDatasetWithUnknownShape(self):
     components = np.random.randint(5, size=(40,)).astype(np.int32)
@@ -69,10 +69,10 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
 
       for start in range(0, len(components), 4):
-        results = sess.run(get_next)
+        results = self.evaluate(get_next)
         self.assertAllEqual([[i, j, z]
                              for i, c in enumerate(components[start:start + 4])
                              for j in range(c)
@@ -89,7 +89,7 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
         ], results.dense_shape)
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testDenseToSparseBatchDatasetWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
@@ -111,13 +111,13 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
       sess.run(init_op, feed_dict={input_tensor: [[1]]})
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    "incompatible with the row shape"):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
       # Initialize with an input tensor that is larger than `row_shape`.
       sess.run(init_op, feed_dict={input_tensor: range(13)})
       with self.assertRaisesRegexp(errors.DataLossError,
                                    "larger than the row shape"):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
index 796a692c56f..768a8d774b1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
@@ -40,12 +40,12 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
     next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for _ in range(100):
         for i in range(10):
-          self.assertEqual(i, sess.run(next_element))
+          self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def _normalize(self, vec):
     return vec / vec.sum()
@@ -71,9 +71,9 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       freqs = np.zeros([num_datasets])
       for _ in range(num_samples):
-        freqs[sess.run(next_element)] += 1
+        freqs[self.evaluate(next_element)] += 1
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
     return freqs
 
@@ -107,9 +107,9 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       for i in choice_array:
-        self.assertEqual(words[i], sess.run(next_element))
+        self.assertEqual(words[i], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testErrors(self):
     with self.assertRaisesRegexp(ValueError,
diff --git a/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
index e54235d9f80..f32d1d0a6fc 100644
--- a/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
@@ -44,12 +44,12 @@ class EnumerateDatasetTest(test_base.DatasetTestBase):
                      [t.shape for t in get_next[1]])
 
     with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertEqual((20, (b"a", 1, 37.0)), sess.run(get_next))
-      self.assertEqual((21, (b"b", 2, 38.0)), sess.run(get_next))
+      self.evaluate(init_op)
+      self.assertEqual((20, (b"a", 1, 37.0)), self.evaluate(get_next))
+      self.assertEqual((21, (b"b", 2, 38.0)), self.evaluate(get_next))
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
index c6ee88c676d..4f8cb1246f3 100644
--- a/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
@@ -52,12 +52,12 @@ class FilterBenchmark(test.Benchmark):
 
       with session.Session() as sess:
         for _ in range(10):
-          sess.run(next_element.op)
+          self.evaluate(next_element.op)
         deltas = []
         for _ in range(100):
           start = time.time()
           for _ in range(100):
-            sess.run(next_element.op)
+            self.evaluate(next_element.op)
           end = time.time()
           deltas.append(end - start)
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py b/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
index d38452e265a..860442571eb 100644
--- a/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/function_buffering_resource_test.py
@@ -94,18 +94,18 @@ class FunctionBufferingResourceTest(test_base.DatasetTestBase):
                                                   device0, device1)
 
     with self.test_session(config=worker_config) as sess:
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [4.0])
       self._event.wait()
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [5.0])
-      sess.run(destroy_op)
+      self.evaluate(destroy_op)
 
   def testSameDeviceCPU(self):
     self._prefetch_fn_helper_one_shot("same_device_cpu",
@@ -135,35 +135,35 @@ class FunctionBufferingResourceTest(test_base.DatasetTestBase):
         ds, ds_iterator, "reinit", device0, device1)
 
     with self.test_session(config=worker_config) as sess:
-      sess.run(ds_iterator.initializer)
-      elem = sess.run(prefetch_op)
+      self.evaluate(ds_iterator.initializer)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [4.0])
       self._event.wait()
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [5.0])
       # Lets reset the function buffering resource and reinitialize the
       # iterator. Should be able to go through this again.
       self._event.clear()
-      sess.run(reset_op)
-      sess.run(ds_iterator.initializer)
-      elem = sess.run(prefetch_op)
+      self.evaluate(reset_op)
+      self.evaluate(ds_iterator.initializer)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [1.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [2.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [3.0])
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [4.0])
       self._event.wait()
-      elem = sess.run(prefetch_op)
+      elem = self.evaluate(prefetch_op)
       self.assertEqual(elem, [5.0])
-      sess.run(destroy_op)
+      self.evaluate(destroy_op)
 
   def testReinitializationOutOfRange(self):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
@@ -175,30 +175,30 @@ class FunctionBufferingResourceTest(test_base.DatasetTestBase):
         ds, ds_iterator, "reinit", device0, device1)
 
     with self.test_session(config=worker_config) as sess:
-      sess.run(ds_iterator.initializer)
+      self.evaluate(ds_iterator.initializer)
       for i in range(1, 10):
-        elem = sess.run(prefetch_op)
+        elem = self.evaluate(prefetch_op)
         self.assertEqual(elem, [float(i)])
       # Try fetching after its over twice to test out end of sequence.
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
+        self.evaluate(prefetch_op)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
+        self.evaluate(prefetch_op)
 
       # Now reset everything and try it out again.
       self._event.clear()
-      sess.run(reset_op)
-      sess.run(ds_iterator.initializer)
+      self.evaluate(reset_op)
+      self.evaluate(ds_iterator.initializer)
       for i in range(1, 10):
-        elem = sess.run(prefetch_op)
+        elem = self.evaluate(prefetch_op)
         self.assertEqual(elem, [float(i)])
       # Try fetching after its over twice to test out end of sequence.
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
+        self.evaluate(prefetch_op)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
+        self.evaluate(prefetch_op)
 
-      sess.run(destroy_op)
+      self.evaluate(destroy_op)
 
   def testStringsGPU(self):
     if not test_util.is_gpu_available():
@@ -235,13 +235,13 @@ class FunctionBufferingResourceTest(test_base.DatasetTestBase):
           buffer_resource_handle, ignore_lookup_error=True)
 
     with self.cached_session() as sess:
-      self.assertEqual([b"a"], sess.run(prefetch_op))
-      self.assertEqual([b"b"], sess.run(prefetch_op))
-      self.assertEqual([b"c"], sess.run(prefetch_op))
+      self.assertEqual([b"a"], self.evaluate(prefetch_op))
+      self.assertEqual([b"b"], self.evaluate(prefetch_op))
+      self.assertEqual([b"c"], self.evaluate(prefetch_op))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(prefetch_op)
+        self.evaluate(prefetch_op)
 
-      sess.run(destroy_op)
+      self.evaluate(destroy_op)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
index 90303285931..f9856500c5c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
@@ -39,10 +39,10 @@ class GroupByReducerTest(test_base.DatasetTestBase):
     get_next = dataset.make_one_shot_iterator().get_next()
     with self.cached_session() as sess:
       for expected in values:
-        got = sess.run(get_next)
+        got = self.evaluate(get_next)
         self.assertEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testSum(self):
     reducer = grouping.Reducer(
@@ -127,11 +127,11 @@ class GroupByReducerTest(test_base.DatasetTestBase):
       iterator = dataset.make_one_shot_iterator()
       get_next = iterator.get_next()
       with self.cached_session() as sess:
-        x, y = sess.run(get_next)
+        x, y = self.evaluate(get_next)
         self.assertAllEqual([0] * (2**i), x)
         self.assertAllEqual(np.array(1, ndmin=i), y)
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+          self.evaluate(get_next)
 
   def testTypeMismatch(self):
     reducer = grouping.Reducer(
@@ -190,7 +190,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
             grouping.group_by_reducer(lambda x, y: np.int64(0), reducer))
     get_next = dataset.make_one_shot_iterator().get_next()
     with self.cached_session() as sess:
-      x, y = sess.run(get_next)
+      x, y = self.evaluate(get_next)
       self.assertAllEqual(x, np.asarray([x for x in range(10)]))
       self.assertEqual(y, 45)
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
index 557d56e8b9a..d5a36e7cb15 100644
--- a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
@@ -68,9 +68,9 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
 
-      which_bucket, bucketed_values = sess.run(get_next)
+      which_bucket, bucketed_values = self.evaluate(get_next)
 
       self.assertEqual(0, which_bucket)
 
@@ -103,11 +103,11 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
 
       # Get two minibatches (one containing even values, one containing odds)
-      which_bucket_even, bucketed_values_even = sess.run(get_next)
-      which_bucket_odd, bucketed_values_odd = sess.run(get_next)
+      which_bucket_even, bucketed_values_even = self.evaluate(get_next)
+      which_bucket_odd, bucketed_values_odd = self.evaluate(get_next)
 
       # Count number of bucket_tensors.
       self.assertEqual(3, len(bucketed_values_even))
@@ -174,11 +174,11 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
 
       # Get two minibatches ([0, 2, ...] and [64, 66, ...])
-      which_bucket0, bucketed_values_even0 = sess.run(get_next)
-      which_bucket1, bucketed_values_even1 = sess.run(get_next)
+      which_bucket0, bucketed_values_even0 = self.evaluate(get_next)
+      which_bucket1, bucketed_values_even1 = self.evaluate(get_next)
 
       # Ensure that bucket 1 was completely filtered out
       self.assertAllEqual(0, which_bucket0)
@@ -207,11 +207,11 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       with self.assertRaises(errors.OutOfRangeError):
         batches = 0
         while True:
-          result = sess.run(get_next)
+          result = self.evaluate(get_next)
           is_even = all(x % 2 == 0 for x in result)
           is_odd = all(x % 2 == 1 for x in result)
           self.assertTrue(is_even or is_odd)
@@ -232,11 +232,11 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       counts = []
       with self.assertRaises(errors.OutOfRangeError):
         while True:
-          result = sess.run(get_next)
+          result = self.evaluate(get_next)
           self.assertTrue(
               all(x % 2 == 0
                   for x in result) or all(x % 2 == 1)
@@ -259,16 +259,16 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       # The input is infinite, so this test demonstrates that:
       # 1. We produce output without having to consume the entire input,
       # 2. Different buckets can produce output at different rates, and
       # 3. For deterministic input, the output is deterministic.
       for _ in range(3):
-        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-        self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
-        self.assertAllEqual([2, 2, 2, 2], sess.run(get_next))
-        self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
+        self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next))
+        self.assertAllEqual([1, 1, 1, 1], self.evaluate(get_next))
+        self.assertAllEqual([2, 2, 2, 2], self.evaluate(get_next))
+        self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next))
 
   def testSmallGroups(self):
     components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
@@ -280,13 +280,13 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual([0, 0, 0, 0], sess.run(get_next))
-      self.assertAllEqual([1, 1, 1, 1], sess.run(get_next))
+      self.evaluate(init_op)
+      self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next))
+      self.assertAllEqual([1, 1, 1, 1], self.evaluate(get_next))
       # The small outputs at the end are deterministically produced in key
       # order.
-      self.assertAllEqual([0, 0, 0], sess.run(get_next))
-      self.assertAllEqual([1], sess.run(get_next))
+      self.assertAllEqual([0, 0, 0], self.evaluate(get_next))
+      self.assertAllEqual([1], self.evaluate(get_next))
 
   def testEmpty(self):
     iterator = (
@@ -297,11 +297,11 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           "Window size must be greater than zero, but got 0."):
-        print(sess.run(get_next))
+        print(self.evaluate(get_next))
 
   def testReduceFuncError(self):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
@@ -323,9 +323,9 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testConsumeWindowDatasetMoreThanOnce(self):
     components = np.random.randint(50, size=(200,)).astype(np.int64)
@@ -351,11 +351,11 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       counts = []
       with self.assertRaises(errors.OutOfRangeError):
         while True:
-          tight_result, multiple_of_10_result = sess.run(get_next)
+          tight_result, multiple_of_10_result = self.evaluate(get_next)
           self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
           self.assertAllEqual(tight_result,
                               multiple_of_10_result[:, :tight_result.shape[1]])
diff --git a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
index c0ec1486ab8..522b1960606 100644
--- a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
@@ -47,11 +47,11 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, sess.run(get_next))
+        self.assertEqual(x, self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testParallelMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
@@ -65,11 +65,11 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, sess.run(get_next))
+        self.assertEqual(x, self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testReadFileIgnoreError(self):
 
@@ -93,22 +93,22 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       # All of the files are present.
-      sess.run(init_op)
+      self.evaluate(init_op)
       for filename in filenames:
-        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
+        self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
       # Delete one of the files.
       os.remove(filenames[0])
 
       # Attempting to read filenames[0] will fail, but ignore_errors()
       # will catch the error.
-      sess.run(init_op)
+      self.evaluate(init_op)
       for filename in filenames[1:]:
-        self.assertEqual(compat.as_bytes(filename), sess.run(get_next))
+        self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
index c93a8353ce0..0a436034a8c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
@@ -46,14 +46,14 @@ class IndexedDatasetOpsTest(test_base.DatasetTestBase):
         handle, index, output_types=[dtypes.uint64], output_shapes=[[]])
 
     with self.cached_session() as sess:
-      sess.run(materialize)
+      self.evaluate(materialize)
       self.assertEqual([3], sess.run(get_op, feed_dict={index: 3}))
 
   def testIdentityIndexedDataset(self):
     ds = indexed_dataset_ops.IdentityIndexedDataset(16)
     materialized = ds.materialize()
     with self.cached_session() as sess:
-      sess.run(materialized.initializer)
+      self.evaluate(materialized.initializer)
       placeholder = array_ops.placeholder(dtypes.uint64, shape=[])
       for i in range(16):
         output = sess.run(
@@ -68,12 +68,13 @@ class IndexedDatasetOpsTest(test_base.DatasetTestBase):
     itr = ds.make_initializable_iterator()
     n = itr.get_next()
     with self.cached_session() as sess:
-      sess.run(itr.initializer)
+      self.evaluate(itr.initializer)
       for i in range(16):
-        output = sess.run(n)
+        output = self.evaluate(n)
         self.assertEqual(i, output)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(n)
+        self.evaluate(n)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
index 91ae8cb1bd2..109b3696b84 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
@@ -112,14 +112,14 @@ class MakeBatchedFeaturesDatasetTest(
     next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
           range(self._num_files), 2, 10):
-        actual_batch = sess.run(next_element)
+        actual_batch = self.evaluate(next_element)
         self.assertAllEqual(file_batch, actual_batch["file"])
         self.assertAllEqual(record_batch, actual_batch["record"])
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testReadWithFusedShuffleRepeatDataset(self):
     num_epochs = 5
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
index e4bf0891842..1f509384d72 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
@@ -90,7 +90,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         batch_size,
         num_epochs,
     ):
-      actual_features = sess.run(nxt)
+      actual_features = self.evaluate(nxt)
 
       if label_name is not None:
         expected_labels = expected_features.pop(label_name)
@@ -102,7 +102,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         self.assertAllEqual(expected_features[k], actual_features[k])
 
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(nxt)
+      self.evaluate(nxt)
 
   def _test_dataset(self,
                     inputs,
@@ -607,8 +607,8 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           outputs1 = dataset1.make_one_shot_iterator().get_next()
           outputs2 = dataset2.make_one_shot_iterator().get_next()
           for _ in range(total_records // batch_size):
-            batch1 = nest.flatten(sess.run(outputs1))
-            batch2 = nest.flatten(sess.run(outputs2))
+            batch1 = nest.flatten(self.evaluate(outputs1))
+            batch2 = nest.flatten(self.evaluate(outputs2))
             for i in range(len(batch1)):
               self.assertAllEqual(batch1[i], batch2[i])
 
@@ -639,8 +639,8 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           outputs2 = dataset2.make_one_shot_iterator().get_next()
           all_equal = False
           for _ in range(total_records // batch_size):
-            batch1 = nest.flatten(sess.run(outputs1))
-            batch2 = nest.flatten(sess.run(outputs2))
+            batch1 = nest.flatten(self.evaluate(outputs1))
+            batch2 = nest.flatten(self.evaluate(outputs2))
             for i in range(len(batch1)):
               all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
           self.assertFalse(all_equal)
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
index 657cf3c00ee..0bb7b7c5f35 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
@@ -105,7 +105,7 @@ class MakeTFRecordDatasetTest(
     for expected_batch in self._next_expected_batch(
         file_indices, batch_size, num_epochs, interleave_cycle_length,
         drop_final_batch, use_parser_fn):
-      actual_batch = sess.run(outputs)
+      actual_batch = self.evaluate(outputs)
       self.assertAllEqual(expected_batch, actual_batch)
 
   def _read_test(self, batch_size, num_epochs, file_index=None,
@@ -135,7 +135,7 @@ class MakeTFRecordDatasetTest(
             interleave_cycle_length=num_parallel_reads,
             drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(outputs)
+          self.evaluate(outputs)
 
   def testRead(self):
     for batch_size in [1, 2]:
@@ -188,19 +188,19 @@ class MakeTFRecordDatasetTest(
         iterator = dataset.make_initializable_iterator()
         next_element = iterator.get_next()
 
-        sess.run(iterator.initializer)
+        self.evaluate(iterator.initializer)
         first_batches = []
         try:
           while True:
-            first_batches.append(sess.run(next_element))
+            first_batches.append(self.evaluate(next_element))
         except errors.OutOfRangeError:
           pass
 
-        sess.run(iterator.initializer)
+        self.evaluate(iterator.initializer)
         second_batches = []
         try:
           while True:
-            second_batches.append(sess.run(next_element))
+            second_batches.append(self.evaluate(next_element))
         except errors.OutOfRangeError:
           pass
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index 5ead6d1c754..8449c0651de 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -89,13 +89,13 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       sess.run(init_op, feed_dict={count: 28, batch_size: 14})
       num_batches = (28 * 7) // 14
       for i in range(num_batches):
-        result = sess.run(get_next)
+        result = self.evaluate(get_next)
         for component, result_component in zip(components, result):
           for j in range(14):
             self.assertAllEqual(component[(i * 14 + j) % 7]**2,
                                 result_component[j])
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
       # Batch of a finite input, where the batch_size does not
       # divide the total number of elements.
@@ -104,23 +104,23 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       # We expect (num_batches - 1) full-sized batches.
       num_batches = int(math.ceil((14 * 7) / 8))
       for i in range(num_batches - 1):
-        result = sess.run(get_next)
+        result = self.evaluate(get_next)
         for component, result_component in zip(components, result):
           for j in range(8):
             self.assertAllEqual(component[(i * 8 + j) % 7]**2,
                                 result_component[j])
-      result = sess.run(get_next)
+      result = self.evaluate(get_next)
       for component, result_component in zip(components, result):
         for j in range((14 * 7) % 8):
           self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
                               result_component[j])
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
       # Batch of an empty input should fail straight away.
       sess.run(init_op, feed_dict={count: 0, batch_size: 8})
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
       # Empty batch should be an initialization time error.
       with self.assertRaises(errors.InvalidArgumentError):
@@ -152,12 +152,12 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertEqual([None, 1], iterator.output_shapes.as_list())
     next_element = iterator.get_next()
     with self.cached_session() as sess:
-      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
+      self.assertAllEqual([[0], [1], [4], [9]], self.evaluate(next_element))
+      self.assertAllEqual([[16], [25], [36], [49]], self.evaluate(next_element))
       if not drop_remainder:
-        self.assertAllEqual([[64], [81]], sess.run(next_element))
+        self.assertAllEqual([[64], [81]], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -177,11 +177,11 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertEqual([None, 1], iterator.output_shapes.as_list())
     next_element = iterator.get_next()
     with self.cached_session() as sess:
-      self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
-      self.assertAllEqual([[64], [81]], sess.run(next_element))
+      self.assertAllEqual([[0], [1], [4], [9]], self.evaluate(next_element))
+      self.assertAllEqual([[16], [25], [36], [49]], self.evaluate(next_element))
+      self.assertAllEqual([[64], [81]], self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -201,14 +201,14 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       elements.append(iterator.get_next())
     with self.cached_session() as sess:
       for i in range(5):
-        got = sess.run(elements)
+        got = self.evaluate(elements)
         got.sort(key=lambda x: x[0])
         expected = []
         for j in range(100):
           expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
         self.assertAllEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elements)
+        self.evaluate(elements)
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -230,14 +230,14 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       elements.append(iterator.get_next())
     with self.cached_session() as sess:
       for i in range(4):
-        got = sess.run(elements)
+        got = self.evaluate(elements)
         got.sort(key=lambda x: x[0])
         expected = []
         for j in range(100):
           expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
         self.assertAllEqual(got, expected)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elements)
+        self.evaluate(elements)
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -261,9 +261,9 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       for i in range(2):
-        actual = sess.run(get_next)
+        actual = self.evaluate(get_next)
         expected = sparse_tensor.SparseTensorValue(
             indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
             values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
@@ -271,7 +271,7 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         self.assertTrue(sparse_tensor.is_sparse(actual))
         self.assertSparseValuesEqual(actual, expected)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -321,10 +321,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     init_op = iterator.initializer
     get_next = iterator.get_next()
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                    "number of elements does not match"):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -354,7 +354,7 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for _ in range(3):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   @parameterized.named_parameters(
       ("1", 0, False),
@@ -393,13 +393,14 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for i in range(threshold // 10):
-        self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next))
+        self.assertAllEqual([i * 10 + j for j in range(10)],
+                            self.evaluate(get_next))
       if threshold % 10 != 0:
         self.assertAllEqual(
             [threshold // 10 * 10 + j for j in range(threshold % 10)],
-            sess.run(get_next))
+            self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   @parameterized.named_parameters(
       ("1", False, dtypes.bool, False),
@@ -442,7 +443,8 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for _ in range(10):
-        self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
+        self.assertAllEqual([element for _ in range(10)],
+                            self.evaluate(get_next))
 
   @parameterized.named_parameters(
       ("Identity", None, lambda x: x, None),
@@ -462,7 +464,7 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       else:
         expected = map_fn(
             sess.run(self.structuredElement(structure, shape=[10])))
-      self.assertAllEqual(expected, sess.run(get_next))
+      self.assertAllEqual(expected, self.evaluate(get_next))
 
   def testShortCircuitCapturedInput(self):
     captured_t = array_ops.placeholder(dtypes.int64, shape=[])
@@ -473,7 +475,7 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       sess.run(iterator.initializer, feed_dict={captured_t: 42})
-      self.assertAllEqual([42] * 10, sess.run(get_next))
+      self.assertAllEqual([42] * 10, self.evaluate(get_next))
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -501,13 +503,13 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
         print("Case %d" % i)
         if i < 5:
           self.assertAllEqual([i * 10 + j + 1 for j in range(10)],
-                              sess.run(get_next))
+                              self.evaluate(get_next))
         else:
           self.assertAllEqual(
               [((i * 10) + j) * ((i * 10) + j) for j in range(10)],
-              sess.run(get_next))
+              self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 11694540fae..6042ca1c63f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -218,7 +218,7 @@ class MapDefunTest(test_base.DatasetTestBase):
 
   def _assert_op_cancelled(self, sess, map_defun_op):
     with self.assertRaisesRegexp(errors.CancelledError, "was cancelled"):
-      sess.run(map_defun_op)
+      self.evaluate(map_defun_op)
 
   def testMapDefunWithParentCancellation(self):
     # Checks that a cancellation of the parent graph is threaded through to
@@ -260,10 +260,10 @@ class MapDefunBenchmark(test.Benchmark):
     with session.Session() as sess:
       # Warm up the session
       for _ in range(5):
-        sess.run(op)
+        self.evaluate(op)
       start = time.time()
       for _ in range(num_iters):
-        sess.run(op)
+        self.evaluate(op)
       end = time.time()
       mean_us = (end - start) * 1e6 / num_iters
       self.report_benchmark(
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_test.py
index ea2737c3c7e..d3c121491ae 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/model_dataset_test.py
@@ -41,9 +41,9 @@ class ModelDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      self.assertEqual(0, sess.run(get_next))
+      self.assertEqual(0, self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
index 510b197ddf7..df26a2c0cdf 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
@@ -51,7 +51,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       sess.run(init_op, {input_t: np.ones([512, 1024, 1025], np.int32)})
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # TODO(b/117581999): Add eager coverage for the following tests.
   def testSkipEagerOptimizationLargeInputFromTensorSlices(self):
@@ -64,7 +64,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       sess.run(init_op, {input_t: np.ones([1, 512, 1024, 1025], np.int32)})
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   def testOptimizationNestedDataset(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
index 497c011b92d..1dfe854f188 100644
--- a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
@@ -55,11 +55,11 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
     next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       thread_ids = []
       try:
         while True:
-          thread_ids.append(sess.run(next_element))
+          thread_ids.append(self.evaluate(next_element))
       except errors.OutOfRangeError:
         pass
       self.assertLen(thread_ids, len(set(thread_ids)))
diff --git a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
index 90ac250df70..77f0dc8e813 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
@@ -195,9 +195,9 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
           [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 1):
         self.write_coordination_events[expected_element].set()
         self.assertEqual(expected_element * expected_element,
-                         sess.run(self.next_element))
+                         self.evaluate(self.next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testSingleThreaded(self):
     self._testSingleThreaded()
@@ -235,10 +235,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       for expected_element in self._interleave(
           [[3] * 3, [7] * 7, [4] * 4] * self.repeat_count, 2, 1):
         self.write_coordination_events[expected_element].set()
-        output = sess.run(self.next_element)
+        output = self.evaluate(self.next_element)
         self.assertEqual(expected_element * expected_element, output)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def _testTwoThreadsNoContention(self, sloppy=False):
     # num_threads > 1.
@@ -262,7 +262,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         self.write_coordination_events[expected_element].set()
         if done_first_event:  # First event starts the worker threads.
           self.read_coordination_events[expected_element].acquire()
-        actual_element = sess.run(self.next_element)
+        actual_element = self.evaluate(self.next_element)
         if not done_first_event:
           self.read_coordination_events[expected_element].acquire()
           done_first_event = True
@@ -270,7 +270,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
                          "At index %s: %s expected, got: %s" %
                          (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testTwoThreadsNoContention(self):
     self._testTwoThreadsNoContention()
@@ -309,7 +309,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         else:
           self.write_coordination_events[expected_element].set()
         time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
-        actual_element = sess.run(self.next_element)
+        actual_element = self.evaluate(self.next_element)
         if not done_first_event:
           done_first_event = True
           self.assertTrue(
@@ -318,7 +318,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
                          "At index %s: %s expected, got: %s" %
                          (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testTwoThreadsNoContentionWithRaces(self):
     self._testTwoThreadsNoContentionWithRaces()
@@ -348,7 +348,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         self.write_coordination_events[expected_element].set()
         if done_first_event:  # First event starts the worker threads.
           self.read_coordination_events[expected_element].acquire()
-        actual_element = sess.run(self.next_element)
+        actual_element = self.evaluate(self.next_element)
         if not done_first_event:
           done_first_event = True
           self.read_coordination_events[expected_element].acquire()
@@ -356,7 +356,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
                          "At index %s: %s expected, got: %s" %
                          (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testTwoThreadsNoContentionBlockLength(self):
     self._testTwoThreadsNoContentionBlockLength()
@@ -396,7 +396,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         else:
           self.write_coordination_events[expected_element].set()
         time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
-        actual_element = sess.run(self.next_element)
+        actual_element = self.evaluate(self.next_element)
         if not done_first_event:
           done_first_event = True
           self.assertTrue(
@@ -405,7 +405,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
                          "At index %s: %s expected, got: %s" %
                          (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testTwoThreadsNoContentionWithRacesAndBlocking(self):
     self._testTwoThreadsNoContentionWithRacesAndBlocking()
@@ -428,7 +428,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
               self.prefetch_input_elements: 0,
           })
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testEmptyInput(self):
     self._testEmptyInput()
@@ -451,7 +451,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
               self.prefetch_input_elements: 0,
           })
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testNonEmptyInputIntoEmptyOutputs(self):
     self._testNonEmptyInputIntoEmptyOutputs()
@@ -484,7 +484,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         # presence of finishing iterators.
         if done_first_event and not (sloppy and (i in race_indices)):
           self.read_coordination_events[expected_element].acquire()
-        actual_element = sess.run(self.next_element)
+        actual_element = self.evaluate(self.next_element)
         if not done_first_event or (sloppy and (i in race_indices)):
           done_first_event = True
           self.read_coordination_events[expected_element].acquire()
@@ -520,10 +520,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       ]
       for element in mis_ordering:
         self.write_coordination_events[element].set()
-        self.assertEqual(element * element, sess.run(self.next_element))
+        self.assertEqual(element * element, self.evaluate(self.next_element))
         self.assertTrue(self.read_coordination_events[element].acquire(False))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testBlockLengthWithContentionSloppy(self):
     with self.cached_session() as sess:
@@ -549,7 +549,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         self.write_coordination_events[expected_element].set()
         if done_first_event:  # First event starts the worker threads.
           self.read_coordination_events[expected_element].acquire()
-        actual_element = sess.run(self.next_element)
+        actual_element = self.evaluate(self.next_element)
         if not done_first_event:
           self.read_coordination_events[expected_element].acquire()
           done_first_event = True
@@ -557,7 +557,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
                          "At index %s: %s expected, got: %s" %
                          (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def _testEarlyExit(self, sloppy=False):
     # Exiting without consuming all input should not block
@@ -575,7 +575,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
           })
       for i in range(4, 7):
         self.write_coordination_events[i].set()
-      elem = sess.run(self.next_element)  # Start all workers
+      elem = self.evaluate(self.next_element)  # Start all workers
       # Allow the one successful worker to progress beyond the py_func again.
       elem = int(math.sqrt(elem))
       self.write_coordination_events[elem].set()
@@ -608,7 +608,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       output_values = []
       for _ in range(30):
-        output_values.append(sess.run(iterator.get_next()))
+        output_values.append(self.evaluate(iterator.get_next()))
 
     expected_values = self._interleave(
         [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 2)
@@ -637,13 +637,13 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     get_next = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(init_op)
+      self.evaluate(init_op)
       for i in range(10):
         for j in range(2):
           expected = [i, 0] if j % 2 == 0 else [0, -i]
-          self.assertAllEqual(expected, sess.run(get_next))
+          self.assertAllEqual(expected, self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   def testErrorsInOutputFn(self):
     with self.cached_session() as sess:
@@ -668,15 +668,15 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
           self.error = ValueError()
           self.write_coordination_events[expected_element].set()
           with self.assertRaises(errors.InvalidArgumentError):
-            sess.run(self.next_element)
+            self.evaluate(self.next_element)
         else:
           self.write_coordination_events[expected_element].set()
-          actual_element = sess.run(self.next_element)
+          actual_element = self.evaluate(self.next_element)
           self.assertEqual(expected_element * expected_element, actual_element,
                            "At index %s: %s expected, got: %s" %
                            (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testErrorsInInputFn(self):
 
@@ -720,14 +720,14 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
           self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
         if expected_element == 5:
           with self.assertRaises(errors.InvalidArgumentError):
-            sess.run(self.next_element)
+            self.evaluate(self.next_element)
         else:
-          actual_element = sess.run(self.next_element)
+          actual_element = self.evaluate(self.next_element)
           self.assertEqual(expected_element, actual_element,
                            "At index %s: %s expected, got: %s" %
                            (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testErrorsInInterleaveFn(self):
 
@@ -769,14 +769,14 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
           self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
         if expected_element == 5:
           with self.assertRaises(errors.InvalidArgumentError):
-            sess.run(self.next_element)
+            self.evaluate(self.next_element)
         else:
-          actual_element = sess.run(self.next_element)
+          actual_element = self.evaluate(self.next_element)
           self.assertEqual(expected_element, actual_element,
                            "At index %s: %s expected, got: %s" %
                            (i, expected_element, actual_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(self.next_element)
+        self.evaluate(self.next_element)
 
   def testShutdownRace(self):
     dataset = dataset_ops.Dataset.range(20)
@@ -796,10 +796,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       for _ in range(2):
         elements = []
-        sess.run(iterator.initializer)
+        self.evaluate(iterator.initializer)
         try:
           while True:
-            elements.extend(sess.run(next_element))
+            elements.extend(self.evaluate(next_element))
         except errors.OutOfRangeError:
           pass
         results.append(elements)
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
index f73725366c4..8fc18e1ccd3 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
@@ -57,9 +57,9 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testPrefetchToSameDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -87,9 +87,9 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testPrefetchDictToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
@@ -117,9 +117,9 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        self.assertEqual({"a": i}, sess.run(next_element))
+        self.assertEqual({"a": i}, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testPrefetchSparseTensorsToDevice(self):
     def make_tensor(i):
@@ -150,12 +150,12 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
       for i in range(10):
-        actual = sess.run(next_element)
+        actual = self.evaluate(next_element)
         self.assertAllEqual([i], actual.values)
         self.assertAllEqual([[0, 0]], actual.indices)
         self.assertAllEqual([2, 2], actual.dense_shape)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testPrefetchToDeviceGpu(self):
     if not test_util.is_gpu_available():
@@ -170,9 +170,9 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testPrefetchToDeviceWithReInit(self):
     host_dataset = dataset_ops.Dataset.range(10)
@@ -199,14 +199,14 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
 
     worker_config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=worker_config) as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
+        self.assertEqual(i, self.evaluate(next_element))
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testPrefetchToDeviceGpuWithReInit(self):
     if not test_util.is_gpu_available():
@@ -220,14 +220,14 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
     next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(5):
-        self.assertEqual(i, sess.run(next_element))
-      sess.run(iterator.initializer)
+        self.assertEqual(i, self.evaluate(next_element))
+      self.evaluate(iterator.initializer)
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/scan_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
index 0730455431f..dc8a7bca270 100644
--- a/tensorflow/python/data/experimental/kernel_tests/scan_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
@@ -60,9 +60,9 @@ class ScanTest(test_base.DatasetTestBase):
                  feed_dict={start: start_val, step: step_val, take: take_val})
         for expected, _ in zip(
             itertools.count(start_val, step_val), range(take_val)):
-          self.assertEqual(expected, sess.run(next_element))
+          self.assertEqual(expected, self.evaluate(next_element))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
+          self.evaluate(next_element)
 
   @test_util.run_in_graph_and_eager_modes
   def testFibonacci(self):
@@ -110,9 +110,9 @@ class ScanTest(test_base.DatasetTestBase):
                  feed_dict={start: start_val, step: step_val, take: take_val})
         for expected, _ in zip(
             itertools.count(start_val, step_val), range(take_val)):
-          self.assertEqual(expected, sess.run(next_element).values[0])
+          self.assertEqual(expected, self.evaluate(next_element).values[0])
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
+          self.evaluate(next_element)
 
   def testChangingStateShape(self):
     # Test the fixed-point shape invariant calculations: start with
@@ -136,11 +136,11 @@ class ScanTest(test_base.DatasetTestBase):
 
     with self.cached_session() as sess:
       for i in range(5):
-        (longer_vector_val, larger_rank_val), _ = sess.run(next_element)
+        (longer_vector_val, larger_rank_val), _ = self.evaluate(next_element)
         self.assertAllEqual([0] * (2**i), longer_vector_val)
         self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testIncorrectStateType(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
index ef99d01c73c..aeb338dfd5e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/range_dataset_serialization_test.py
@@ -71,36 +71,36 @@ class RangeDatasetSerializationTest(
     with ops.Graph().as_default() as g:
       init_op, get_next, save_op, _ = _build_graph(start, stop)
       with self.session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
+        self.evaluate(variables.global_variables_initializer())
+        self.evaluate(init_op)
         for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
+          self.assertEqual(i, self.evaluate(get_next))
+        self.evaluate(save_op)
 
     with ops.Graph().as_default() as g:
       init_op, get_next, _, restore_op = _build_graph(start, stop)
       with self.session(graph=g) as sess:
-        sess.run(init_op)
-        sess.run(restore_op)
+        self.evaluate(init_op)
+        self.evaluate(restore_op)
         for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
+          self.assertEqual(i, self.evaluate(get_next))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+          self.evaluate(get_next)
 
     # Saving and restoring in same session.
     with ops.Graph().as_default() as g:
       init_op, get_next, save_op, restore_op = _build_graph(start, stop)
       with self.session(graph=g) as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(init_op)
+        self.evaluate(variables.global_variables_initializer())
+        self.evaluate(init_op)
         for i in range(start, break_point):
-          self.assertEqual(i, sess.run(get_next))
-        sess.run(save_op)
-        sess.run(restore_op)
+          self.assertEqual(i, self.evaluate(get_next))
+        self.evaluate(save_op)
+        self.evaluate(restore_op)
         for i in range(break_point, stop):
-          self.assertEqual(i, sess.run(get_next))
+          self.assertEqual(i, self.evaluate(get_next))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+          self.evaluate(get_next)
 
   def _build_range_dataset(self, start, stop):
     return dataset_ops.Dataset.range(start, stop)
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
index 88d5c896c9f..12fa0989d07 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/serialization_integration_test.py
@@ -60,9 +60,9 @@ class SerializationIntegrationTest(test.TestCase):
       init_ops, get_next_ops, saver = self._build_graph(num_pipelines,
                                                         num_outputs)
       with self.session(graph=g) as sess:
-        sess.run(init_ops)
+        self.evaluate(init_ops)
         for _ in range(break_point):
-          output = sess.run(get_next_ops)
+          output = self.evaluate(get_next_ops)
           for i in range(num_pipelines):
             all_outputs[i].append(output[i])
         saver.save(sess, self._ckpt_path())
@@ -73,7 +73,7 @@ class SerializationIntegrationTest(test.TestCase):
       with self.session(graph=g) as sess:
         saver.restore(sess, self._ckpt_path())
         for _ in range(num_outputs - break_point):
-          output = sess.run(get_next_ops)
+          output = self.evaluate(get_next_ops)
           for i in range(num_pipelines):
             all_outputs[i].append(output[i])
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
index a04f1ddafce..e753a7a15be 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/shuffle_dataset_serialization_test.py
@@ -138,9 +138,9 @@ class ShuffleDatasetSerializationTest(
           saver = saver_lib.Saver(allow_empty=True)
           with self.session(graph=g) as sess:
             self._save(sess, saver)
-            expected = [sess.run(get_next_ops) for _ in range(num_outputs)]
+            expected = [self.evaluate(get_next_ops) for _ in range(num_outputs)]
             self._restore(saver, sess)
-            actual = [sess.run(get_next_ops) for _ in range(num_outputs)]
+            actual = [self.evaluate(get_next_ops) for _ in range(num_outputs)]
             self.match(expected, actual)
 
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index c208963a861..2e8b93feaf0 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -38,10 +38,10 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     outputs = []
     with self.cached_session() as sess:
       for _ in range(num_outputs):
-        outputs.append(sess.run(get_next))
+        outputs.append(self.evaluate(get_next))
       if verify_exhausted:
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+          self.evaluate(get_next)
     return outputs
 
   def testCorrectOutput(self):
@@ -108,7 +108,7 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
           shuffle_ops.shuffle_and_repeat(buffer_size=21))
       get_next_op = ds.make_one_shot_iterator().get_next()
       with self.session(graph=g) as sess:
-        sess.run(get_next_op)
+        self.evaluate(get_next_op)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/sleep_test.py b/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
index bf53acc82a8..1a6d5522ef4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
@@ -38,14 +38,14 @@ class SleepTest(test_base.DatasetTestBase):
     next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       start_time = time.time()
       for i in range(10):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       end_time = time.time()
       self.assertGreater(end_time - start_time, (10 * sleep_microseconds) / 1e6)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
index a2c11696387..eb66927ee5c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
@@ -39,10 +39,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                             "ORDER BY first_name DESC"
             })
         for _ in range(2):  # Dataset is repeated. See setUp.
-          self.assertEqual((b"John", b"Doe", b"Hi!"), sess.run(get_next))
-          self.assertEqual((b"Jane", b"Moe", b"Hi again!"), sess.run(get_next))
+          self.assertEqual((b"John", b"Doe", b"Hi!"), self.evaluate(get_next))
+          self.assertEqual((b"Jane", b"Moe", b"Hi again!"),
+                           self.evaluate(get_next))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+          self.evaluate(get_next)
 
   # Test that SqlDataset works on a join query.
   def testReadResultSetJoinQuery(self):
@@ -58,9 +59,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "ON students.first_name = people.first_name "
                   "AND students.last_name = people.last_name"
           })
-      self.assertEqual((b"John", b"California", b"Hi!"), sess.run(get_next))
+      self.assertEqual((b"John", b"California", b"Hi!"),
+                       self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that SqlDataset can read a database entry with a null-terminator
   # in the middle of the text and place the entry in a `string` tensor.
@@ -75,10 +77,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "SELECT first_name, last_name, favorite_nonsense_word "
                   "FROM students ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", b"Doe", b"n\0nsense"), sess.run(get_next))
-      self.assertEqual((b"Jane", b"Moe", b"nonsense\0"), sess.run(get_next))
+      self.assertEqual((b"John", b"Doe", b"n\0nsense"), self.evaluate(get_next))
+      self.assertEqual((b"Jane", b"Moe", b"nonsense\0"),
+                       self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that SqlDataset works when used on two different queries.
   # Because the output types of the dataset must be determined at graph-creation
@@ -93,21 +96,22 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, last_name, motto FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", b"Doe", b"Hi!"), sess.run(get_next))
-      self.assertEqual((b"Jane", b"Moe", b"Hi again!"), sess.run(get_next))
+      self.assertEqual((b"John", b"Doe", b"Hi!"), self.evaluate(get_next))
+      self.assertEqual((b"Jane", b"Moe", b"Hi again!"), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
       sess.run(
           init_op,
           feed_dict={
               self.query: "SELECT first_name, last_name, state FROM people "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", b"Doe", b"California"), sess.run(get_next))
+      self.assertEqual((b"John", b"Doe", b"California"),
+                       self.evaluate(get_next))
       self.assertEqual((b"Benjamin", b"Franklin", b"Pennsylvania"),
-                       sess.run(get_next))
+                       self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that an `OutOfRangeError` is raised on the first call to
   # `get_next_str_only` if result set is empty.
@@ -122,7 +126,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "WHERE first_name = 'Nonexistent'"
           })
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that an error is raised when `driver_name` is invalid.
   def testReadResultSetWithInvalidDriverName(self):
@@ -151,7 +155,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "ORDER BY first_name DESC"
           })
       with self.assertRaises(errors.UnknownError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that an error is raised when there is a syntax error in `query`.
   def testReadResultSetOfQueryWithSyntaxError(self):
@@ -166,7 +170,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "ORDER BY first_name DESC"
           })
       with self.assertRaises(errors.UnknownError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that an error is raised when the number of columns in `query`
   # does not match the length of `output_types`.
@@ -181,7 +185,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "ORDER BY first_name DESC"
           })
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that no results are returned when `query` is an insert query rather
   # than a select query. In particular, the error refers to the number of
@@ -199,7 +203,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "VALUES ('Foo', 'Bar', 'Baz'), ('Fizz', 'Buzz', 'Fizzbuzz')"
           })
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int8` tensor.
@@ -212,10 +216,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, desk_number FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 9), sess.run(get_next))
-      self.assertEqual((b"Jane", 127), sess.run(get_next))
+      self.assertEqual((b"John", 9), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int8` tensor.
@@ -230,9 +234,9 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "FROM students "
                           "WHERE first_name = 'John' ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 0, -2), sess.run(get_next))
+      self.assertEqual((b"John", 0, -2), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int8` tensor.
@@ -246,11 +250,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "SELECT desk_number, favorite_negative_number FROM students "
                   "ORDER BY first_name DESC"
           })
-      self.assertEqual((9, -2), sess.run(get_next))
+      self.assertEqual((9, -2), self.evaluate(get_next))
       # Max and min values of int8
-      self.assertEqual((127, -128), sess.run(get_next))
+      self.assertEqual((127, -128), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int16` tensor.
@@ -263,10 +267,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, desk_number FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 9), sess.run(get_next))
-      self.assertEqual((b"Jane", 127), sess.run(get_next))
+      self.assertEqual((b"John", 9), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int16` tensor.
@@ -281,9 +285,9 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "FROM students "
                           "WHERE first_name = 'John' ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 0, -2), sess.run(get_next))
+      self.assertEqual((b"John", 0, -2), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int16` tensor.
@@ -297,11 +301,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "FROM students ORDER BY first_name DESC"
           })
       # Max value of int16
-      self.assertEqual((b"John", 32767), sess.run(get_next))
+      self.assertEqual((b"John", 32767), self.evaluate(get_next))
       # Min value of int16
-      self.assertEqual((b"Jane", -32768), sess.run(get_next))
+      self.assertEqual((b"Jane", -32768), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int32` tensor.
@@ -314,8 +318,8 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, desk_number FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 9), sess.run(get_next))
-      self.assertEqual((b"Jane", 127), sess.run(get_next))
+      self.assertEqual((b"John", 9), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int32` tensor.
@@ -328,10 +332,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, income FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 0), sess.run(get_next))
-      self.assertEqual((b"Jane", -20000), sess.run(get_next))
+      self.assertEqual((b"John", 0), self.evaluate(get_next))
+      self.assertEqual((b"Jane", -20000), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int32` tensor.
@@ -345,11 +349,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "ORDER BY first_name DESC"
           })
       # Max value of int32
-      self.assertEqual((b"John", 2147483647), sess.run(get_next))
+      self.assertEqual((b"John", 2147483647), self.evaluate(get_next))
       # Min value of int32
-      self.assertEqual((b"Jane", -2147483648), sess.run(get_next))
+      self.assertEqual((b"Jane", -2147483648), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a numeric `varchar` from a SQLite database
   # table and place it in an `int32` tensor.
@@ -362,10 +366,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, school_id FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 123), sess.run(get_next))
-      self.assertEqual((b"Jane", 1000), sess.run(get_next))
+      self.assertEqual((b"John", 123), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 1000), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer from a SQLite database table
   # and place it in an `int64` tensor.
@@ -378,10 +382,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, desk_number FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 9), sess.run(get_next))
-      self.assertEqual((b"Jane", 127), sess.run(get_next))
+      self.assertEqual((b"John", 9), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int64` tensor.
@@ -394,10 +398,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, income FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 0), sess.run(get_next))
-      self.assertEqual((b"Jane", -20000), sess.run(get_next))
+      self.assertEqual((b"John", 0), self.evaluate(get_next))
+      self.assertEqual((b"Jane", -20000), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int64` tensor.
@@ -412,11 +416,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "ORDER BY first_name DESC"
           })
       # Max value of int64
-      self.assertEqual((b"John", 9223372036854775807), sess.run(get_next))
+      self.assertEqual((b"John", 9223372036854775807), self.evaluate(get_next))
       # Min value of int64
-      self.assertEqual((b"Jane", -9223372036854775808), sess.run(get_next))
+      self.assertEqual((b"Jane", -9223372036854775808), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in a `uint8` tensor.
@@ -429,10 +433,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, desk_number FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 9), sess.run(get_next))
-      self.assertEqual((b"Jane", 127), sess.run(get_next))
+      self.assertEqual((b"John", 9), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read the minimum and maximum uint8 values from a
   # SQLite database table and place them in `uint8` tensors.
@@ -446,11 +450,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "ORDER BY first_name DESC"
           })
       # Min value of uint8
-      self.assertEqual((b"John", 0), sess.run(get_next))
+      self.assertEqual((b"John", 0), self.evaluate(get_next))
       # Max value of uint8
-      self.assertEqual((b"Jane", 255), sess.run(get_next))
+      self.assertEqual((b"Jane", 255), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer from a SQLite database table
   # and place it in a `uint16` tensor.
@@ -463,10 +467,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, desk_number FROM students "
                           "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", 9), sess.run(get_next))
-      self.assertEqual((b"Jane", 127), sess.run(get_next))
+      self.assertEqual((b"John", 9), self.evaluate(get_next))
+      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read the minimum and maximum uint16 values from a
   # SQLite database table and place them in `uint16` tensors.
@@ -480,11 +484,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                           "ORDER BY first_name DESC"
           })
       # Min value of uint16
-      self.assertEqual((b"John", 0), sess.run(get_next))
+      self.assertEqual((b"John", 0), self.evaluate(get_next))
       # Max value of uint16
-      self.assertEqual((b"Jane", 65535), sess.run(get_next))
+      self.assertEqual((b"Jane", 65535), self.evaluate(get_next))
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run(get_next)
+      self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a 0-valued and 1-valued integer from a
   # SQLite database table and place them as `True` and `False` respectively
@@ -499,10 +503,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "SELECT first_name, registration_complete FROM students "
                   "ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", True), sess.run(get_next))
-      self.assertEqual((b"Jane", False), sess.run(get_next))
+      self.assertEqual((b"John", True), self.evaluate(get_next))
+      self.assertEqual((b"Jane", False), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read an integer that is not 0-valued or 1-valued
   # from a SQLite database table and place it as `True` in a `bool` tensor.
@@ -515,10 +519,10 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
               self.query: "SELECT first_name, favorite_medium_sized_number "
                           "FROM students ORDER BY first_name DESC"
           })
-      self.assertEqual((b"John", True), sess.run(get_next))
-      self.assertEqual((b"Jane", True), sess.run(get_next))
+      self.assertEqual((b"John", True), self.evaluate(get_next))
+      self.assertEqual((b"Jane", True), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a float from a SQLite database table
   # and place it in a `float64` tensor.
@@ -533,10 +537,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "SELECT first_name, last_name, victories FROM townspeople "
                   "ORDER BY first_name"
           })
-      self.assertEqual((b"George", b"Washington", 20.0), sess.run(get_next))
-      self.assertEqual((b"John", b"Adams", -19.95), sess.run(get_next))
+      self.assertEqual((b"George", b"Washington", 20.0),
+                       self.evaluate(get_next))
+      self.assertEqual((b"John", b"Adams", -19.95), self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a float from a SQLite database table beyond
   # the precision of 64-bit IEEE, without throwing an error. Test that
@@ -555,13 +560,13 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
       self.assertEqual(
           (b"George", b"Washington",
            1331241.321342132321324589798264627463827647382647382643874),
-          sess.run(get_next))
+          self.evaluate(get_next))
       self.assertEqual(
           (b"John", b"Adams",
            1331241321342132321324589798264627463827647382647382643874.0),
-          sess.run(get_next))
+          self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
   # Test that `SqlDataset` can read a float from a SQLite database table,
   # representing the largest integer representable as a 64-bit IEEE float
@@ -579,11 +584,11 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
                   "ORDER BY first_name"
           })
       self.assertNotEqual((b"George", b"Washington", 9007199254740992.0),
-                          sess.run(get_next))
+                          self.evaluate(get_next))
       self.assertNotEqual((b"John", b"Adams", 9007199254740991.0),
-                          sess.run(get_next))
+                          self.evaluate(get_next))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+        self.evaluate(get_next)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index d5f265d8a88..e8160069333 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -70,18 +70,18 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       expected_sum = 0.0
       for i in range(100):
         self.assertAllEqual(
-            np.array([i] * i, dtype=np.int64), sess.run(next_element))
-        summary_str = sess.run(summary_t)
+            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
+        summary_str = self.evaluate(summary_t)
         self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1))
         expected_sum += i * 8.0
         self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-      summary_str = sess.run(summary_t)
+        self.evaluate(next_element)
+      summary_str = self.evaluate(summary_t)
       self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
       self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
 
@@ -95,14 +95,15 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(100):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "record_latency", float(i + 1))
+            self.evaluate(summary_t), "record_latency", float(i + 1))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
+        self.evaluate(next_element)
+      self._assertSummaryHasCount(
+          self.evaluate(summary_t), "record_latency", 100.0)
 
   def testPrefetchBufferUtilization(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
@@ -114,11 +115,11 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(100):
         self.assertAllEqual(
-            np.array([i] * i, dtype=np.int64), sess.run(next_element))
-        summary_str = sess.run(summary_t)
+            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
+        summary_str = self.evaluate(summary_t)
         self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                     float(i + 1))
         self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity")
@@ -126,8 +127,8 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization",
                                     0, 1)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-      summary_str = sess.run(summary_t)
+        self.evaluate(next_element)
+      summary_str = self.evaluate(summary_t)
       self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
                                   100)
 
@@ -141,17 +142,17 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(10):
         self.assertAllEqual(
-            np.array([i] * i, dtype=np.int64), sess.run(next_element))
-        summary_str = sess.run(summary_t)
+            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
+        summary_str = self.evaluate(summary_t)
         self._assertSummaryHasScalarValue(summary_str,
                                           "Prefetch::buffer_capacity", 0)
         self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size",
                                           0)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testFilteredElementsStats(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
@@ -163,20 +164,21 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.test_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(34):
-        self.assertEqual(i * 3, sess.run(next_element))
+        self.assertEqual(i * 3, self.evaluate(next_element))
         if i is not 0:
           self._assertSummaryHasScalarValue(
-              sess.run(summary_t), "Filter::dropped_elements", float(i * 2))
+              self.evaluate(summary_t), "Filter::dropped_elements",
+              float(i * 2))
         self._assertSummaryHasScalarValue(
-            sess.run(summary_t), "Filter::filtered_elements", float(i + 1))
+            self.evaluate(summary_t), "Filter::filtered_elements", float(i + 1))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
       self._assertSummaryHasScalarValue(
-          sess.run(summary_t), "Filter::dropped_elements", 67.0)
+          self.evaluate(summary_t), "Filter::dropped_elements", 67.0)
       self._assertSummaryHasScalarValue(
-          sess.run(summary_t), "Filter::filtered_elements", 34.0)
+          self.evaluate(summary_t), "Filter::filtered_elements", 34.0)
 
   def testMapBufferUtilization(self, dataset_transformation):
 
@@ -257,15 +259,16 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
     with self.cached_session() as sess:
       for j in range(5):
-        sess.run(iterator.initializer)
+        self.evaluate(iterator.initializer)
         for i in range(100):
-          self.assertEqual(i, sess.run(next_element))
+          self.assertEqual(i, self.evaluate(next_element))
           self._assertSummaryHasCount(
-              sess.run(summary_t), "record_latency", float((j * 100) + i + 1))
+              self.evaluate(summary_t), "record_latency",
+              float((j * 100) + i + 1))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
+          self.evaluate(next_element)
         self._assertSummaryHasCount(
-            sess.run(summary_t), "record_latency", (j + 1) * 100.0)
+            self.evaluate(summary_t), "record_latency", (j + 1) * 100.0)
 
   def testNoAggregatorRegistered(self, dataset_transformation):
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -274,11 +277,11 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     next_element = iterator.get_next()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(100):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testMultipleTags(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
@@ -291,18 +294,19 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(100):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "record_latency", float(i + 1))
+            self.evaluate(summary_t), "record_latency", float(i + 1))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "record_latency_2", float(i + 1))
+            self.evaluate(summary_t), "record_latency_2", float(i + 1))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
+        self.evaluate(next_element)
       self._assertSummaryHasCount(
-          sess.run(summary_t), "record_latency_2", 100.0)
+          self.evaluate(summary_t), "record_latency", 100.0)
+      self._assertSummaryHasCount(
+          self.evaluate(summary_t), "record_latency_2", 100.0)
 
   def testRepeatedTags(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
@@ -315,14 +319,15 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for i in range(100):
-        self.assertEqual(i, sess.run(next_element))
+        self.assertEqual(i, self.evaluate(next_element))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "record_latency", float(2 * (i + 1)))
+            self.evaluate(summary_t), "record_latency", float(2 * (i + 1)))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
+        self.evaluate(next_element)
+      self._assertSummaryHasCount(
+          self.evaluate(summary_t), "record_latency", 200.0)
 
   def testMultipleIteratorsSameAggregator(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
@@ -335,14 +340,15 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.cached_session() as sess:
-      sess.run([iterator_0.initializer, iterator_1.initializer])
+      self.evaluate([iterator_0.initializer, iterator_1.initializer])
       for i in range(100):
-        self.assertEqual(i * 2, sess.run(next_element))
+        self.assertEqual(i * 2, self.evaluate(next_element))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "record_latency", float(2 * (i + 1)))
+            self.evaluate(summary_t), "record_latency", float(2 * (i + 1)))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
+        self.evaluate(next_element)
+      self._assertSummaryHasCount(
+          self.evaluate(summary_t), "record_latency", 200.0)
 
   def testMultipleDatasetWithPrefixes(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
@@ -358,19 +364,19 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     summary_t = aggregator.get_summary()
 
     with self.test_session() as sess:
-      sess.run([iterator_0.initializer, iterator_1.initializer])
+      self.evaluate([iterator_0.initializer, iterator_1.initializer])
       for i in range(100):
-        self.assertEqual(i * 2, sess.run(next_element))
+        self.assertEqual(i * 2, self.evaluate(next_element))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "dataset1_record_latency", float(i + 1))
+            self.evaluate(summary_t), "dataset1_record_latency", float(i + 1))
         self._assertSummaryHasCount(
-            sess.run(summary_t), "dataset2_record_latency", float(i + 1))
+            self.evaluate(summary_t), "dataset2_record_latency", float(i + 1))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
       self._assertSummaryHasCount(
-          sess.run(summary_t), "dataset1_record_latency", 100.0)
+          self.evaluate(summary_t), "dataset1_record_latency", 100.0)
       self._assertSummaryHasCount(
-          sess.run(summary_t), "dataset2_record_latency", 100.0)
+          self.evaluate(summary_t), "dataset2_record_latency", 100.0)
 
 
 @parameterized.named_parameters(
@@ -417,20 +423,21 @@ class FeatureStatsDatasetTest(
     summary_t = aggregator.get_summary()
 
     with self.test_session() as sess:
-      sess.run(iterator.initializer)
+      self.evaluate(iterator.initializer)
       for _ in range(num_output):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
       self._assertSummaryHasCount(
-          sess.run(summary_t), "record_stats_features", total_records)
+          self.evaluate(summary_t), "record_stats_features", total_records)
       self._assertSummaryHasCount(
-          sess.run(summary_t), "record_stats_feature-values", total_records)
+          self.evaluate(summary_t), "record_stats_feature-values",
+          total_records)
       self._assertSummaryHasSum(
-          sess.run(summary_t), "record_stats_features", total_records * 4)
+          self.evaluate(summary_t), "record_stats_features", total_records * 4)
       self._assertSummaryHasSum(
-          sess.run(summary_t), "record_stats_feature-values",
+          self.evaluate(summary_t), "record_stats_feature-values",
           self._sum_keywords(1) * num_epochs + 3 * total_records)
 
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
index f9b800fe679..cb94bb41443 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
@@ -47,9 +47,9 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.cached_session() as sess:
       sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
       for i in range(4):
-        self.assertEqual(i, sess.run(next_elem))
+        self.assertEqual(i, self.evaluate(next_elem))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_elem)
+        self.evaluate(next_elem)
 
   def testUnbatchScalarDataset(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
@@ -65,10 +65,10 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        self.assertEqual((i,) * 3, sess.run(op))
+        self.assertEqual((i,) * 3, self.evaluate(op))
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
+        self.evaluate(op)
 
   def testUnbatchDatasetWithStrings(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
@@ -85,10 +85,10 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        self.assertEqual((i, compat.as_bytes(str(i)), i), sess.run(op))
+        self.assertEqual((i, compat.as_bytes(str(i)), i), self.evaluate(op))
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
+        self.evaluate(op)
 
   def testUnbatchDatasetWithSparseTensor(self):
     st = sparse_tensor.SparseTensorValue(
@@ -104,12 +104,12 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        st_row = sess.run(next_element)
+        st_row = self.evaluate(next_element)
         self.assertEqual([i], st_row.indices)
         self.assertEqual([i], st_row.values)
         self.assertEqual([10], st_row.dense_shape)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testUnbatchDatasetWithDenseAndSparseTensor(self):
     st = sparse_tensor.SparseTensorValue(
@@ -125,13 +125,13 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        dense_elem, st_row = sess.run(next_element)
+        dense_elem, st_row = self.evaluate(next_element)
         self.assertEqual(i, dense_elem)
         self.assertEqual([i], st_row.indices)
         self.assertEqual([i], st_row.values)
         self.assertEqual([10], st_row.dense_shape)
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testUnbatchSingleElementTupleDataset(self):
     data = tuple([(math_ops.range(10),) for _ in range(3)])
@@ -147,10 +147,10 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       for i in range(10):
-        self.assertEqual(((i,),) * 3, sess.run(op))
+        self.assertEqual(((i,),) * 3, self.evaluate(op))
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
+        self.evaluate(op)
 
   def testUnbatchMultiElementTupleDataset(self):
     data = tuple([(math_ops.range(10 * i, 10 * i + 10),
@@ -168,10 +168,10 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.cached_session() as sess:
       for i in range(10):
         self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
-                         sess.run(op))
+                         self.evaluate(op))
 
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(op)
+        self.evaluate(op)
 
   def testUnbatchEmpty(self):
     data = dataset_ops.Dataset.from_tensors(
@@ -183,7 +183,7 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testUnbatchStaticShapeMismatch(self):
     data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
@@ -208,7 +208,7 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
               ph2: np.arange(8).astype(np.int32)
           })
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
       # No 0th dimension (i.e. scalar value) for one component.
       sess.run(
@@ -218,7 +218,7 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
               ph2: 7
           })
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
index 847cff26b0d..91f4bc84e99 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
@@ -49,13 +49,13 @@ class UniqueTest(test_base.DatasetTestBase):
     with self.cached_session() as sess:
       for test_case, expected in test_cases:
         current_test_case = test_case
-        sess.run(iterator.initializer)
+        self.evaluate(iterator.initializer)
         for element in expected:
           if dtype == dtypes.string:
             element = compat.as_bytes(element)
-          self.assertAllEqual(element, sess.run(next_element))
+          self.assertAllEqual(element, self.evaluate(next_element))
         with self.assertRaises(errors.OutOfRangeError):
-          sess.run(next_element)
+          self.evaluate(next_element)
 
   def testSimpleInt(self):
     for dtype in [dtypes.int32, dtypes.int64]:
diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index 2ca9961585c..886c9acc03b 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -41,7 +41,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
 
   def testBasic(self):
     dataset = dataset_ops.Dataset.range(10)
@@ -51,13 +51,13 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i, self.evaluate(elem_on_1))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
   def testOneOnSameDevice(self):
     with ops.device("/cpu:0"):
@@ -68,13 +68,13 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i, self.evaluate(elem_on_1))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
   def testRepeatDevices(self):
     with ops.device("/cpu:0"):
@@ -86,17 +86,17 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 20, 4):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-        self.assertEqual(i + 2, sess.run(elem_on_3))
-        self.assertEqual(i + 3, sess.run(elem_on_4))
+        self.assertEqual(i, self.evaluate(elem_on_1))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
+        self.assertEqual(i + 2, self.evaluate(elem_on_3))
+        self.assertEqual(i + 3, self.evaluate(elem_on_4))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
-        sess.run(elem_on_3)
-        sess.run(elem_on_4)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
+        self.evaluate(elem_on_3)
+        self.evaluate(elem_on_4)
 
   def testNotFullyDivisible(self):
     dataset = dataset_ops.Dataset.range(9)
@@ -106,14 +106,14 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 8, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
-      self.assertEqual(8, sess.run(elem_on_1))
+        self.assertEqual(i, self.evaluate(elem_on_1))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
+      self.assertEqual(8, self.evaluate(elem_on_1))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
   def testGetNextAsOptional(self):
     dataset = dataset_ops.Dataset.range(9)
@@ -127,7 +127,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 8, 2):
         elem_on_1_has_value, elem_on_1_value = sess.run(
             [elem_on_1_has_value_t, elem_on_1_t])
@@ -141,12 +141,12 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
           [elem_on_1_has_value_t, elem_on_1_t])
       self.assertTrue(elem_on_1_has_value)
       self.assertEqual(8, elem_on_1_value)
-      self.assertFalse(sess.run(elem_on_1_has_value_t))
-      self.assertFalse(sess.run(elem_on_2_has_value_t))
+      self.assertFalse(self.evaluate(elem_on_1_has_value_t))
+      self.assertFalse(self.evaluate(elem_on_2_has_value_t))
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(elem_on_1_t)
+        self.evaluate(elem_on_1_t)
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(elem_on_2_t)
+        self.evaluate(elem_on_2_t)
 
   def testUneven(self):
     dataset = dataset_ops.Dataset.range(10)
@@ -156,14 +156,14 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i, self.evaluate(elem_on_1))
       for i in range(0, 10, 2):
-        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
   def testMultipleInitializations(self):
     with ops.device("/cpu:0"):
@@ -180,7 +180,8 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
     with self.test_session(config=config) as sess:
       for i in range(1000):
         sess.run(init_op, feed_dict={epoch: i})
-        self.assertEqual([(i, 0), (i, 1)], sess.run([elem_on_1, elem_on_2]))
+        self.assertEqual([(i, 0), (i, 1)], self.evaluate([elem_on_1,
+                                                          elem_on_2]))
 
   def testBasicGpu(self):
     if not test_util.is_gpu_available():
@@ -193,13 +194,13 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i, self.evaluate(elem_on_1))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
   def testUnevenGpu(self):
     if not test_util.is_gpu_available():
@@ -212,14 +213,14 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
+        self.assertEqual(i, self.evaluate(elem_on_1))
       for i in range(0, 10, 2):
-        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
   def testGetNextAsOptionalGpu(self):
     if not test_util.is_gpu_available():
@@ -236,7 +237,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 8, 2):
         elem_on_1_has_value, elem_on_1_value = sess.run(
             [elem_on_1_has_value_t, elem_on_1_t])
@@ -250,12 +251,12 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
           [elem_on_1_has_value_t, elem_on_1_t])
       self.assertTrue(elem_on_1_has_value)
       self.assertEqual(8, elem_on_1_value)
-      self.assertFalse(sess.run(elem_on_1_has_value_t))
-      self.assertFalse(sess.run(elem_on_2_has_value_t))
+      self.assertFalse(self.evaluate(elem_on_1_has_value_t))
+      self.assertFalse(self.evaluate(elem_on_2_has_value_t))
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(elem_on_1_t)
+        self.evaluate(elem_on_1_t)
       with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(elem_on_2_t)
+        self.evaluate(elem_on_2_t)
 
   def testOptimization(self):
     dataset = dataset_ops.Dataset.range(10)
@@ -273,13 +274,13 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config) as sess:
-      sess.run(multi_device_iterator.initializer)
+      self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
-        self.assertEqual(i, sess.run(elem_on_1))
-        self.assertEqual(i + 1, sess.run(elem_on_2))
+        self.assertEqual(i, self.evaluate(elem_on_1))
+        self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(elem_on_1)
-        sess.run(elem_on_2)
+        self.evaluate(elem_on_1)
+        self.evaluate(elem_on_2)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/util/convert_test.py b/tensorflow/python/data/util/convert_test.py
index 89c3afb2969..3058e2b3f60 100644
--- a/tensorflow/python/data/util/convert_test.py
+++ b/tensorflow/python/data/util/convert_test.py
@@ -30,47 +30,52 @@ class ConvertTest(test.TestCase):
 
   def testInteger(self):
     resp = convert.optional_param_to_tensor("foo", 3)
-    with self.cached_session() as sess:
-      self.assertEqual(3, sess.run(resp))
+    self.assertEqual(3, self.evaluate(resp))
 
   def testIntegerDefault(self):
     resp = convert.optional_param_to_tensor("foo", None)
-    with self.cached_session() as sess:
-      self.assertEqual(0, sess.run(resp))
+    self.assertEqual(0, self.evaluate(resp))
 
   def testStringDefault(self):
     resp = convert.optional_param_to_tensor("bar", None, "default",
                                             dtypes.string)
-    with self.cached_session() as sess:
-      self.assertEqual(compat.as_bytes("default"), sess.run(resp))
+    self.assertEqual(compat.as_bytes("default"), self.evaluate(resp))
 
   def testString(self):
     resp = convert.optional_param_to_tensor("bar", "value", "default",
                                             dtypes.string)
-    with self.cached_session() as sess:
-      self.assertEqual(compat.as_bytes("value"), sess.run(resp))
+    self.assertEqual(compat.as_bytes("value"), self.evaluate(resp))
 
   def testPartialShapeToTensorKnownDimension(self):
-    with self.cached_session() as sess:
-      self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor(
-          tensor_shape.TensorShape([1]))))
-      self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor((1,))))
-      self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor([1])))
-      self.assertAllEqual([1], sess.run(convert.partial_shape_to_tensor(
-          constant_op.constant([1], dtype=dtypes.int64))))
+    self.assertAllEqual([1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                tensor_shape.TensorShape([1]))))
+    self.assertAllEqual([1], self.evaluate(
+        convert.partial_shape_to_tensor((1,))))
+    self.assertAllEqual([1], self.evaluate(
+        convert.partial_shape_to_tensor([1])))
+    self.assertAllEqual([1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                constant_op.constant([1], dtype=dtypes.int64))))
 
   def testPartialShapeToTensorUnknownDimension(self):
-    with self.cached_session() as sess:
-      self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor(
-          tensor_shape.TensorShape([None]))))
-      self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor(
-          (None,))))
-      self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor(
-          [None])))
-      self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor(
-          [-1])))
-      self.assertAllEqual([-1], sess.run(convert.partial_shape_to_tensor(
-          constant_op.constant([-1], dtype=dtypes.int64))))
+    self.assertAllEqual([-1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                tensor_shape.TensorShape([None]))))
+    self.assertAllEqual([-1],
+                        self.evaluate(convert.partial_shape_to_tensor((None,))))
+    self.assertAllEqual([-1],
+                        self.evaluate(convert.partial_shape_to_tensor([None])))
+    self.assertAllEqual([-1],
+                        self.evaluate(convert.partial_shape_to_tensor([-1])))
+    self.assertAllEqual([-1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                constant_op.constant([-1],
+                                                     dtype=dtypes.int64))))
 
     with self.assertRaisesRegexp(
         ValueError, r"The given shape .* must be a 1-D tensor of tf.int64 "
@@ -84,42 +89,63 @@ class ConvertTest(test.TestCase):
       convert.partial_shape_to_tensor(constant_op.constant([1., 1.]))
 
   def testPartialShapeToTensorMultipleDimensions(self):
-    with self.cached_session() as sess:
-      self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor(
-          tensor_shape.TensorShape([3, 6]))))
-      self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor(
-          (3, 6))))
-      self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor(
-          [3, 6])))
-      self.assertAllEqual([3, 6], sess.run(convert.partial_shape_to_tensor(
-          constant_op.constant([3, 6], dtype=dtypes.int64))))
+    self.assertAllEqual([3, 6],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                tensor_shape.TensorShape([3, 6]))))
+    self.assertAllEqual([3, 6],
+                        self.evaluate(convert.partial_shape_to_tensor((3, 6))))
+    self.assertAllEqual([3, 6],
+                        self.evaluate(convert.partial_shape_to_tensor([3, 6])))
+    self.assertAllEqual([3, 6],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                constant_op.constant([3, 6],
+                                                     dtype=dtypes.int64))))
 
-      self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor(
-          tensor_shape.TensorShape([3, None]))))
-      self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor(
-          (3, None))))
-      self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor(
-          [3, None])))
-      self.assertAllEqual([3, -1], sess.run(convert.partial_shape_to_tensor(
-          constant_op.constant([3, -1], dtype=dtypes.int64))))
+    self.assertAllEqual([3, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                tensor_shape.TensorShape([3, None]))))
+    self.assertAllEqual([3, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor((3, None))))
+    self.assertAllEqual([3, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor([3, None])))
+    self.assertAllEqual([3, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                constant_op.constant([3, -1],
+                                                     dtype=dtypes.int64))))
 
-      self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor(
-          tensor_shape.TensorShape([None, None]))))
-      self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor(
-          (None, None))))
-      self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor(
-          [None, None])))
-      self.assertAllEqual([-1, -1], sess.run(convert.partial_shape_to_tensor(
-          constant_op.constant([-1, -1], dtype=dtypes.int64))))
+    self.assertAllEqual([-1, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                tensor_shape.TensorShape([None, None]))))
+    self.assertAllEqual([-1, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor((None, None))))
+    self.assertAllEqual([-1, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor([None, None])))
+    self.assertAllEqual([-1, -1],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                constant_op.constant([-1, -1],
+                                                     dtype=dtypes.int64))))
 
   def testPartialShapeToTensorScalar(self):
-    with self.cached_session() as sess:
-      self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor(
-          tensor_shape.TensorShape([]))))
-      self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor(())))
-      self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor([])))
-      self.assertAllEqual([], sess.run(convert.partial_shape_to_tensor(
-          constant_op.constant([], dtype=dtypes.int64))))
+    self.assertAllEqual([],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                tensor_shape.TensorShape([]))))
+    self.assertAllEqual([], self.evaluate(convert.partial_shape_to_tensor(())))
+    self.assertAllEqual([], self.evaluate(convert.partial_shape_to_tensor([])))
+    self.assertAllEqual([],
+                        self.evaluate(
+                            convert.partial_shape_to_tensor(
+                                constant_op.constant([], dtype=dtypes.int64))))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index f197a9e4dce..5aa7d1bb4c3 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -1583,7 +1583,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
       x = variables.VariableV1([1, 3, 3, 7], name="x")
       _, idx = array_ops.unique(x, name="x_unique")
       idx_times_two = math_ops.multiply(idx, 2, name="idx_times_two")
-      sess.run(x.initializer)
+      self.evaluate(x.initializer)
 
       run_options = config_pb2.RunOptions(output_partition_graphs=True)
       debug_utils.watch_graph(
diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
index 1f67f8a0d4e..34030c0adca 100644
--- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
+++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py
@@ -126,8 +126,8 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
       u = variables.Variable([12.0], name="u")
       v = variables.Variable([30.0], name="v")
       w = math_ops.add(u, v, name="w")
-      sess.run(u.initializer)
-      sess.run(v.initializer)
+      self.evaluate(u.initializer)
+      self.evaluate(v.initializer)
 
       self._compareOriginalAndReconstructedGraphDefs(
           sess, w, expected_output=[42.0])
@@ -139,7 +139,7 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
         b = math_ops.add(a, a, name="b")
       with ops.control_dependencies([a, b]):
         c = math_ops.multiply(b, b, name="c")
-      sess.run(a.initializer)
+      self.evaluate(a.initializer)
 
       self._compareOriginalAndReconstructedGraphDefs(
           sess, c, expected_output=400.0)
@@ -150,8 +150,8 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
       y = variables.Variable(20.0, name="y")
       cond = control_flow_ops.cond(
           x > y, lambda: math_ops.add(x, 1), lambda: math_ops.add(y, 1))
-      sess.run(x.initializer)
-      sess.run(y.initializer)
+      self.evaluate(x.initializer)
+      self.evaluate(y.initializer)
 
       self._compareOriginalAndReconstructedGraphDefs(
           sess, cond, expected_output=21.0)
@@ -173,8 +173,8 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase):
       toy_loss = x * (u - v)
       train_op = gradient_descent.GradientDescentOptimizer(
           learning_rate=0.1).minimize(toy_loss, name="train_op")
-      sess.run(u.initializer)
-      sess.run(v.initializer)
+      self.evaluate(u.initializer)
+      self.evaluate(v.initializer)
 
       self._compareOriginalAndReconstructedGraphDefs(sess, train_op)
 
diff --git a/tensorflow/python/debug/lib/session_debug_multi_gpu_test.py b/tensorflow/python/debug/lib/session_debug_multi_gpu_test.py
index b0dc25851ca..8eef45392f2 100644
--- a/tensorflow/python/debug/lib/session_debug_multi_gpu_test.py
+++ b/tensorflow/python/debug/lib/session_debug_multi_gpu_test.py
@@ -67,7 +67,7 @@ class SessionDebugMultiGPUTest(test_util.TensorFlowTestCase):
         u1 = math_ops.multiply(v, v, name="u1")
       w = math_ops.subtract(u1, u0, name="w")
 
-      sess.run(v.initializer)
+      self.evaluate(v.initializer)
 
       run_options = config_pb2.RunOptions(output_partition_graphs=True)
       debug_utils.watch_graph(run_options, sess.graph,
diff --git a/tensorflow/python/debug/lib/source_utils_test.py b/tensorflow/python/debug/lib/source_utils_test.py
index 4a8d4eaa99f..a16d68329a3 100644
--- a/tensorflow/python/debug/lib/source_utils_test.py
+++ b/tensorflow/python/debug/lib/source_utils_test.py
@@ -109,8 +109,8 @@ class SourceHelperTest(test_util.TensorFlowTestCase):
       self.w = math_ops.matmul(self.u, self.v, name="w")
       self.w_line_number = line_number_above()
 
-      sess.run(self.u.initializer)
-      sess.run(self.v.initializer)
+      self.evaluate(self.u.initializer)
+      self.evaluate(self.v.initializer)
 
       run_options = config_pb2.RunOptions(output_partition_graphs=True)
       debug_utils.watch_graph(
diff --git a/tensorflow/python/distribute/input_ops_test.py b/tensorflow/python/distribute/input_ops_test.py
index cbb93e89952..2689dbbec86 100644
--- a/tensorflow/python/distribute/input_ops_test.py
+++ b/tensorflow/python/distribute/input_ops_test.py
@@ -92,9 +92,9 @@ class AutoShardDatasetTest(test.TestCase):
     with self.cached_session() as sess:
       for f in range(self._shard_index, self._num_files, self._num_shards):
         for r in range(self._num_records):
-          self.assertAllEqual(record_fn(r, f), sess.run(next_element))
+          self.assertAllEqual(record_fn(r, f), self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testTFRecordDataset(self):
     dataset = readers.TFRecordDataset(self._createTFRecordFiles())
@@ -138,10 +138,10 @@ class AutoShardDatasetTest(test.TestCase):
       actual, expected = [], []
       for f in range(self._shard_index, self._num_files, self._num_shards):
         for r in range(self._num_records):
-          actual.append(sess.run(next_element))
+          actual.append(self.evaluate(next_element))
           expected.append(self._record(r, f))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
       self.assertAllEqual(expected, actual)
 
   def testComplexPipeline(self):
@@ -171,9 +171,9 @@ class AutoShardDatasetTest(test.TestCase):
       num_iterations = (self._num_files * self._num_records * num_epochs) // (
           self._num_shards * batch_size)
       for _ in range(num_iterations):
-        actual.extend(sess.run(next_element))
+        actual.extend(self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
       expected = []
       for f in range(0, self._num_files, self._num_shards):
@@ -205,12 +205,13 @@ class AutoShardDatasetTest(test.TestCase):
     with self.cached_session() as sess:
       for f in range(self._shard_index, self._num_files, self._num_shards):
         for r in range(self._num_records):
-          self.assertAllEqual(self._record(r, f), sess.run(next_element))
+          self.assertAllEqual(self._record(r, f), self.evaluate(next_element))
       for f in range(self._shard_index, self._num_files, self._num_shards):
         for r in range(self._num_records):
-          self.assertAllEqual(self._text_line(r, f), sess.run(next_element))
+          self.assertAllEqual(
+              self._text_line(r, f), self.evaluate(next_element))
       with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+        self.evaluate(next_element)
 
   def testTextLineReader(self):
     dataset = readers.TextLineDataset(self._createTextFiles())
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index f0f71a219e6..54991344b75 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -149,9 +149,9 @@ class DefFunctionTest(test.TestCase):
 
       result = fn(3.0)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllEqual(sess.run(state[0]), 2.0)
-      self.assertAllEqual(sess.run(result), 6.0)
+      self.assertAllEqual(self.evaluate(result), 6.0)
 
   def testLegacyGraphModeVariablesNonTrivialInitializer(self):
     with ops.Graph().as_default(), self.test_session() as sess:
@@ -168,9 +168,9 @@ class DefFunctionTest(test.TestCase):
 
       result = fn(3.0)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllEqual(sess.run(state[0]), 6.0)
-      self.assertAllEqual(sess.run(result), 18.0)
+      self.assertAllEqual(self.evaluate(result), 18.0)
 
   def testLegacyGraphModeInputDependentInitializerFails(self):
     with ops.Graph().as_default():
diff --git a/tensorflow/python/eager/function_gradients_test.py b/tensorflow/python/eager/function_gradients_test.py
index d4f8aaa7e33..1ba596573f9 100644
--- a/tensorflow/python/eager/function_gradients_test.py
+++ b/tensorflow/python/eager/function_gradients_test.py
@@ -78,7 +78,7 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase):
       c = constant_op.constant([[2.]])
       f_c = f(c)
       g, = gradients_impl.gradients(f_c, c)
-      self.assertAllEqual(sess.run(g).values, [[1.0]])
+      self.assertAllEqual(self.evaluate(g).values, [[1.0]])
 
   def testNoSymGradNestedDefun(self):
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index b58b09140de..a206b1f7911 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -564,7 +564,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       variables.global_variables_initializer().run()
       call = def_function.function(o.call)
       op = call()
-      self.assertAllEqual(sess.run(op), 2.0)
+      self.assertAllEqual(self.evaluate(op), 2.0)
 
   def testGraphModeManyFunctions(self):
     with ops.Graph().as_default(), self.cached_session():
@@ -1732,7 +1732,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
       function.register(cpu_boost, x)
       y = gpu_boost(x)
-      y_value = sess.run(y)
+      y_value = self.evaluate(y)
 
       if test.is_gpu_available():
         self.assertEqual(y_value, 5.0)
diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py
index e9b11c39604..2c70d668103 100644
--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -1027,7 +1027,7 @@ class CrossedColumnTest(test.TestCase):
     outputs = _transform_features(features, [price_cross_wire])
     output = outputs[price_cross_wire]
     with self.cached_session() as sess:
-      output_val = sess.run(output)
+      output_val = self.evaluate(output)
       self.assertAllEqual(
           [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [1, 3]], output_val.indices)
       for val in output_val.values:
@@ -1886,7 +1886,8 @@ class LinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          self.evaluate(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     price = fc._numeric_column('price')
@@ -2525,7 +2526,8 @@ class _LinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          self.evaluate(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     price = fc._numeric_column('price')
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index 115763f656e..23131e22ede 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -1188,7 +1188,7 @@ class CrossedColumnTest(test.TestCase):
     outputs = fc._transform_features_v2(features, [price_cross_wire], None)
     output = outputs[price_cross_wire]
     with self.cached_session() as sess:
-      output_val = sess.run(output)
+      output_val = self.evaluate(output)
       self.assertAllEqual(
           [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [1, 3]], output_val.indices)
       for val in output_val.values:
@@ -2088,7 +2088,8 @@ class LinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))
+      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]],
+                          self.evaluate(net))
 
       coord.request_stop()
       coord.join(threads)
@@ -2124,7 +2125,8 @@ class LinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          self.evaluate(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     price = fc.numeric_column('price')
@@ -2843,7 +2845,8 @@ class OldLinearModelTest(test.TestCase):
       sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
       sess.run(bias.assign([5.]))
 
-      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net))
+      self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]],
+                          self.evaluate(net))
 
   def test_with_1d_unknown_shape_sparse_tensor(self):
     price = fc.numeric_column('price')
diff --git a/tensorflow/python/framework/file_system_test.py b/tensorflow/python/framework/file_system_test.py
index 6901715e5d0..066d34e781c 100644
--- a/tensorflow/python/framework/file_system_test.py
+++ b/tensorflow/python/framework/file_system_test.py
@@ -42,7 +42,7 @@ class FileSystemTest(test.TestCase):
       queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
       queue.enqueue_many([["test://foo"]]).run()
       queue.close().run()
-      key, value = sess.run(reader.read(queue))
+      key, value = self.evaluate(reader.read(queue))
     self.assertEqual(key, compat.as_bytes("test://foo"))
     self.assertEqual(value, compat.as_bytes("AAAAAAAAAA"))
 
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 971219d5b05..1a17a480502 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -102,7 +102,7 @@ class FunctionTest(test.TestCase):
       call = MyIdentityFunc([18.0])
       self.assertEqual("MyIdentity", call.op.name)
       with session.Session() as sess:
-        self.assertAllEqual([18.0], sess.run(call))
+        self.assertAllEqual([18.0], self.evaluate(call))
 
   def testIdentityImplicitDeref(self):
 
@@ -116,8 +116,8 @@ class FunctionTest(test.TestCase):
       self.assertEqual("MyIdentity", call.op.name)
       for cfg in _OptimizerOptions():
         with session.Session(config=cfg) as sess:
-          sess.run(var.initializer)
-          self.assertAllEqual([18.0], sess.run(call))
+          self.evaluate(var.initializer)
+          self.assertAllEqual([18.0], self.evaluate(call))
 
   def testIdentityOutputName(self):
 
@@ -130,7 +130,7 @@ class FunctionTest(test.TestCase):
       call = MyIdentityFunc([18.0])
       self.assertEqual("MyIdentity", call.op.name)
       with session.Session() as sess:
-        self.assertAllEqual([18.0], sess.run(call))
+        self.assertAllEqual([18.0], self.evaluate(call))
 
   def testTooManyOutputNames(self):
 
@@ -158,7 +158,7 @@ class FunctionTest(test.TestCase):
       call = APlus2B([1.0], [2.0])
       self.assertEqual("APlus2B", call.op.name)
       with session.Session() as sess:
-        self.assertAllEqual([5.0], sess.run(call))
+        self.assertAllEqual([5.0], self.evaluate(call))
 
   def testFunctionWithNoOutput(self):
 
@@ -187,7 +187,7 @@ class FunctionTest(test.TestCase):
       call = APlus2B([1.0], [2.0])
       self.assertEqual("APlus2B", call.op.name)
       with session.Session() as sess:
-        self.assertAllEqual([5.0], sess.run(call))
+        self.assertAllEqual([5.0], self.evaluate(call))
 
   def testDefineFunctionDuplicateOutputs(self):
 
@@ -224,8 +224,8 @@ class FunctionTest(test.TestCase):
       call_g = XSquarePlusOneGrad([2.0], [0.1])
 
       with session.Session() as sess:
-        self.assertAllClose([5.0], sess.run(call_f))
-        self.assertAllClose([0.4], sess.run(call_g))
+        self.assertAllClose([5.0], self.evaluate(call_f))
+        self.assertAllClose([0.4], self.evaluate(call_g))
 
   def testTanhSymGrad(self):
 
@@ -365,7 +365,7 @@ class FunctionTest(test.TestCase):
       else:
         dx, dy = gradients_impl.gradients([z], [x, y])
       with session.Session() as sess:
-        dx_val, dy_val = sess.run([dx, dy])
+        dx_val, dy_val = self.evaluate([dx, dy])
         self.assertEqual([2.0], dx_val)
         self.assertEqual([0.0], dy_val)
 
@@ -387,7 +387,7 @@ class FunctionTest(test.TestCase):
       call = AConstant()
       self.assertEqual("AConstant", call.op.name)
       with session.Session() as sess:
-        self.assertAllEqual([42], sess.run(call))
+        self.assertAllEqual([42], self.evaluate(call))
 
   def testDefineFunctionNames(self):
 
@@ -468,7 +468,7 @@ class FunctionTest(test.TestCase):
 
       loop = control_flow_ops.while_loop(lambda x: x < 1e5, Body, [1.0])
 
-      ans = sess.run(loop)
+      ans = self.evaluate(loop)
       self.assertAllClose(ans, 131072.)
 
   def testControlFlowStrictness(self):
@@ -650,8 +650,8 @@ class FunctionTest(test.TestCase):
       # pylint: enable=unexpected-keyword-arg
       self.assertEqual("next", call2.op.name)
       with session.Session() as sess:
-        self.assertAllEqual([1], sess.run(call1))
-        self.assertAllEqual([0], sess.run(call2))
+        self.assertAllEqual([1], self.evaluate(call1))
+        self.assertAllEqual([0], self.evaluate(call2))
 
   def testNestedFunction(self):
 
@@ -794,7 +794,7 @@ class FunctionTest(test.TestCase):
       y = Foo()
 
     with self.session(graph=g) as sess:
-      self.assertEqual(sess.run(y), 10)
+      self.assertEqual(self.evaluate(y), 10)
 
   def testCaptureInCond(self):
     g = ops.Graph()
@@ -809,8 +809,8 @@ class FunctionTest(test.TestCase):
       z = Foo(False)
 
     with self.session(graph=g) as sess:
-      self.assertEqual(sess.run(y), 1)
-      self.assertEqual(sess.run(z), 2)
+      self.assertEqual(self.evaluate(y), 1)
+      self.assertEqual(self.evaluate(z), 2)
 
   def testStableName(self):
 
@@ -854,7 +854,7 @@ class FunctionTest(test.TestCase):
       z = Bar(x)
 
     with self.session(graph=g) as sess:
-      v0, v1 = sess.run([y, z])
+      v0, v1 = self.evaluate([y, z])
       self.assertAllEqual(v0, 20.)
       self.assertAllEqual(v1, 20.)
 
@@ -900,7 +900,7 @@ class FunctionTest(test.TestCase):
     self.assertEqual(global_vars[0].name, "linear/w:0")
 
     with session.Session() as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       output_val = sess.run(
           output_op, feed_dict={input_op: np.random.rand(32, 100)})
       self.assertEqual(output_val.shape, (32, 100))
@@ -928,7 +928,7 @@ class FunctionTest(test.TestCase):
     self.assertEqual(global_vars[0].name, "vs1/var:0")
 
     with session.Session() as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       out1, out2 = sess.run(
           [out1_op, out2_op], feed_dict={input_op: np.linspace(1, 10, 10)})
       self.assertAllEqual(out1, np.linspace(2, 11, 10))
@@ -991,8 +991,8 @@ class FunctionTest(test.TestCase):
     result_2 = Bar(constant_op.constant(100, dtype=dtypes.int64))
 
     with session.Session() as sess:
-      self.assertEqual(4.0, sess.run(result_1))
-      self.assertEqual(100, sess.run(result_2))
+      self.assertEqual(4.0, self.evaluate(result_1))
+      self.assertEqual(100, self.evaluate(result_2))
       self.assertEqual((4.0, 100), sess.run((result_1, result_2)))
 
   def testStatefulFunction(self):
@@ -1052,8 +1052,8 @@ class FunctionTest(test.TestCase):
     for config in _OptimizerOptions():
       config.device_count["CPU"] = 2
       with session.Session(config=config) as sess:
-        self.assertEqual(42.0, sess.run(f_0))
-        self.assertEqual(44.0, sess.run(f_1))
+        self.assertEqual(42.0, self.evaluate(f_0))
+        self.assertEqual(44.0, self.evaluate(f_1))
         self.assertEqual((42.0, 44.0), sess.run((f_0, f_1)))
 
   def testGuaranteedConstsAreCaptured(self):
@@ -1076,7 +1076,7 @@ class FunctionTest(test.TestCase):
       return output
 
     with self.session(use_gpu=False) as sess:
-      sess.run(var.initializer)
+      self.evaluate(var.initializer)
       _ = sess.run(CapturesGuaranteedConst(), {also_not_const: 1.0})
 
   def testSameFunctionDifferentGrads(self):
@@ -1127,7 +1127,7 @@ class FunctionTest(test.TestCase):
       dx2, = gradients_impl.gradients(ys=[y2], xs=[x2])
 
     with self.session(graph=g) as sess:
-      v0, v1, v2 = sess.run([dx0, dx1, dx2])
+      v0, v1, v2 = self.evaluate([dx0, dx1, dx2])
 
     self.assertAllEqual(v0, 2.)
     self.assertAllEqual(v1, 101.)
@@ -1532,7 +1532,7 @@ class UnrollLSTMTest(test.TestCase):
       tf_logging.info("time: %f txt size: %d gdef bin size: %d", finish - start,
                       len(str(gdef)), len(gdef.SerializeToString()))
       with g.as_default(), session.Session(config=cfg) as sess:
-        return sess.run(m)
+        return self.evaluate(m)
 
     mv0 = RunForward("complete")
     for cfg in _OptimizerOptions():
@@ -1561,7 +1561,7 @@ class UnrollLSTMTest(test.TestCase):
       tf_logging.info("time: %f txt size: %d gdef bin size: %d", finish - start,
                       len(str(gdef)), len(gdef.SerializeToString()))
       with g.as_default(), session.Session(config=cfg) as sess:
-        return sess.run(dw)
+        return self.evaluate(dw)
 
     d0 = RunForwardBackward("complete")
     for cfg in _OptimizerOptions():
@@ -1651,8 +1651,8 @@ class ModuleFunctionTest(test.TestCase):
       y = LinearWithCApi(a, b, c)
       z = Linear2WithCApi(a, b, c, d, e)
       with session.Session() as sess:
-        self.assertAllEqual([[1]], sess.run(y))
-        self.assertAllEqual([[5]], sess.run(z))
+        self.assertAllEqual([[1]], self.evaluate(y))
+        self.assertAllEqual([[5]], self.evaluate(z))
 
 
 class VariableHoistingTest(test.TestCase):
@@ -1704,8 +1704,8 @@ class VariableHoistingTest(test.TestCase):
     self.assertEqual("Foo/b", b.op.name)
 
     with self.session(graph=g) as sess:
-      sess.run(variables.global_variables_initializer())
-      w, b, x, y0, loss, dw, db = sess.run([w, b, x, y0, loss, dw, db])
+      self.evaluate(variables.global_variables_initializer())
+      w, b, x, y0, loss, dw, db = self.evaluate([w, b, x, y0, loss, dw, db])
 
     self.assertAllEqual(w.shape, (64, 64))
     self.assertAllClose(np.sum(w), 2050.44)
diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py
index 563a177dd06..10a01c71f2c 100644
--- a/tensorflow/python/framework/graph_util_test.py
+++ b/tensorflow/python/framework/graph_util_test.py
@@ -210,8 +210,8 @@ class DeviceFunctionsTest(test.TestCase):
 
       with session.Session() as sess:
         init = variables.variables_initializer([variable_node])
-        sess.run(init)
-        output = sess.run(output_node)
+        self.evaluate(init)
+        output = self.evaluate(output_node)
         self.assertNear(4.0, output, 0.00001)
         variable_graph_def = sess.graph.as_graph_def()
 
@@ -242,8 +242,8 @@ class DeviceFunctionsTest(test.TestCase):
         output_node = math_ops_lib.multiply(
             variable_node, 2.0, name="output_node")
         with session.Session() as sess:
-          sess.run(variable_node.initializer)
-          output = sess.run(output_node)
+          self.evaluate(variable_node.initializer)
+          output = self.evaluate(output_node)
           self.assertNear(2.0, output, 0.00001)
           variable_graph_def = sess.graph.as_graph_def()
           # First get the constant_graph_def when variable_names_whitelist is
@@ -256,7 +256,7 @@ class DeviceFunctionsTest(test.TestCase):
 
           # Then initialize the unused variable, and get another
           # constant_graph_def when variable_names_whitelist is not set.
-          sess.run(another_variable.initializer)
+          self.evaluate(another_variable.initializer)
           constant_graph_def_without_variable_whitelist = (
               graph_util.convert_variables_to_constants(
                   sess, variable_graph_def, ["output_node"]))
@@ -295,7 +295,7 @@ class DeviceFunctionsTest(test.TestCase):
             ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"])
       with session.Session() as sess:
         output_node = sess.graph.get_tensor_by_name("output_node:0")
-        output = sess.run(output_node)
+        output = self.evaluate(output_node)
         self.assertNear(2.0, output, 0.00001)
 
   def create_node_def(self, op, name, inputs):
diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py
index fc7367649e1..66e80b55852 100644
--- a/tensorflow/python/framework/importer_test.py
+++ b/tensorflow/python/framework/importer_test.py
@@ -397,11 +397,11 @@ class ImportGraphDefTest(test.TestCase):
       # Run the imported graph.
       # TODO(b/76173421): make this work (currently DCHECKS)
       # with self.cached_session() as sess:
-      #   sess.run(imported_init)
-      #   self.assertEqual(sess.run(imported_var), 1.0)
-      #   self.assertEqual(sess.run(imported_assign), 2.0)
-      #   self.assertEqual(list(sess.run(imported_shape)), [])
-      #   self.assertEqual(list(sess.run(new_var_shape)), [])
+      #   self.evaluate(imported_init)
+      #   self.assertEqual(self.evaluate(imported_var), 1.0)
+      #   self.assertEqual(self.evaluate(imported_assign), 2.0)
+      #   self.assertEqual(list(self.evaluate(imported_shape)), [])
+      #   self.assertEqual(list(self.evaluate(new_var_shape)), [])
 
   def testWhileLoop(self):
     # Produce GraphDef containing while loop.
@@ -418,7 +418,7 @@ class ImportGraphDefTest(test.TestCase):
                                               return_elements=[r.name])
       self.assertEqual(imported_r.name, "import/" + r.name)
       with self.cached_session() as sess:
-        self.assertEqual(sess.run(imported_r), 10)
+        self.assertEqual(self.evaluate(imported_r), 10)
 
   def testImportWhileLoopInCond(self):
     # Produce GraphDef containing while loop.
@@ -458,7 +458,7 @@ class ImportGraphDefTest(test.TestCase):
           lambda i: i < 2, ImportFn, [0],
           shape_invariants=[tensor_shape.TensorShape(None)])
       with self.cached_session() as sess:
-        self.assertEqual(sess.run(out), 10)
+        self.assertEqual(self.evaluate(out), 10)
 
   def testTypeMismatchInGraphDef(self):
     # TODO(skyewm): improve error message
diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py
index 84e7f361bb5..cc93f8b1b87 100644
--- a/tensorflow/python/framework/meta_graph_test.py
+++ b/tensorflow/python/framework/meta_graph_test.py
@@ -492,8 +492,8 @@ class ScopedMetaGraphTest(test.TestCase):
       init_op = variables.global_variables_initializer()
       grad = gradients_impl.gradients([output], [var])
       with session.Session() as sess:
-        sess.run(init_op)
-        expected_grad_value = sess.run(grad)
+        self.evaluate(init_op)
+        expected_grad_value = self.evaluate(grad)
 
     # Restore the MetaGraphDef into a new Graph with an import scope.
     with ops.Graph().as_default():
@@ -518,8 +518,8 @@ class ScopedMetaGraphTest(test.TestCase):
       init_op = variables.global_variables_initializer()
 
       with session.Session() as sess:
-        sess.run(init_op)
-        actual_grad_value = sess.run(grad)
+        self.evaluate(init_op)
+        actual_grad_value = self.evaluate(grad)
         self.assertEqual(expected_grad_value, actual_grad_value)
 
   def testImportWhileLoopInWhileLoop(self):
@@ -544,8 +544,8 @@ class ScopedMetaGraphTest(test.TestCase):
       _, x = control_flow_ops.while_loop(lambda i, x: i < 2, body, [0, 0.0],
                                          name="")
       with session.Session() as sess:
-        sess.run(variables.global_variables_initializer())
-        sess.run(x)
+        self.evaluate(variables.global_variables_initializer())
+        self.evaluate(x)
 
   def testScopedImportUnderNameScope(self):
     graph = ops.Graph()
@@ -868,8 +868,8 @@ class MetaGraphWithVariableScopeTest(test.TestCase):
       _, update_op = metrics.mean(values)
 
       initializer = variables.local_variables_initializer()
-      sess.run(initializer)
-      sess.run(update_op)
+      self.evaluate(initializer)
+      self.evaluate(update_op)
 
     meta_graph.export_scoped_meta_graph(
         filename=meta_graph_filename, graph=graph)
@@ -880,7 +880,7 @@ class MetaGraphWithVariableScopeTest(test.TestCase):
     with self.session(graph=graph) as sess:
       meta_graph.import_scoped_meta_graph(meta_graph_filename)
       initializer = variables.local_variables_initializer()
-      sess.run(initializer)
+      self.evaluate(initializer)
 
     # Verifies that importing an old meta_graph where "local_variables"
     # collection is of node_list type works, but cannot build initializer
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 3957d1de53d..32a24521ad0 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -503,7 +503,7 @@ class OperationTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(
           errors.InvalidArgumentError,
           "Graph is invalid, contains a cycle with 2 nodes"):
-        sess.run(x)
+        self.evaluate(x)
 
   def testUpdateInput(self):
     g = ops.Graph()
@@ -517,21 +517,21 @@ class OperationTest(test_util.TensorFlowTestCase):
     self.assertEquals(x.consumers(), [])
     self.assertEquals(y.consumers(), [z.op, z.op])
     with session.Session(graph=g) as sess:
-      self.assertEquals(sess.run(z), 4)
+      self.assertEquals(self.evaluate(z), 4)
 
     z.op._update_input(0, x)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [x, y])
     self.assertEquals(x.consumers(), [z.op])
     self.assertEquals(y.consumers(), [z.op])
     with session.Session(graph=g) as sess:
-      self.assertEquals(sess.run(z), 3)
+      self.assertEquals(self.evaluate(z), 3)
 
     z.op._update_input(1, y)  # pylint: disable=protected-access
     self.assertEquals(list(z.op.inputs), [x, y])
     self.assertEquals(x.consumers(), [z.op])
     self.assertEquals(y.consumers(), [z.op])
     with session.Session(graph=g) as sess:
-      self.assertEquals(sess.run(z), 3)
+      self.assertEquals(self.evaluate(z), 3)
 
   def testUpdateInputGraphError(self):
     g_0 = ops.Graph()
@@ -557,7 +557,7 @@ class OperationTest(test_util.TensorFlowTestCase):
           errors.InvalidArgumentError,
           "Input 0 of node add was passed string from Const_1:0 incompatible "
           "with expected int32"):
-        sess.run(z)
+        self.evaluate(z)
 
   def testUpdateInputShapeError(self):
     g = ops.Graph()
@@ -2390,7 +2390,7 @@ class GraphTest(test_util.TensorFlowTestCase):
       c = math_ops.add(a, b)
     # Create a session we can delete
     with session.Session(graph=g) as sess:
-      sess.run(c)
+      self.evaluate(c)
     # Delete all references and trigger gc
     del g
     del a
@@ -2406,7 +2406,7 @@ class GraphTest(test_util.TensorFlowTestCase):
         math_ops.add([1, 2], [1, 2, 3])
       a = constant_op.constant(1)
       with session.Session() as sess:
-        sess.run(a)
+        self.evaluate(a)
 
   def testRunnableAfterInvalidShapeWithKernelLabelMap(self):
     g = ops.Graph()
@@ -2416,7 +2416,7 @@ class GraphTest(test_util.TensorFlowTestCase):
           test_ops.kernel_label_required(1)
       a = constant_op.constant(1)
       with session.Session() as sess:
-        sess.run(a)
+        self.evaluate(a)
 
 
 class AttrScopeTest(test_util.TensorFlowTestCase):
diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py
index b8a9672b06d..174ada9fe11 100644
--- a/tensorflow/python/framework/smart_cond_test.py
+++ b/tensorflow/python/framework/smart_cond_test.py
@@ -109,8 +109,8 @@ class SmartCaseTest(test_util.TensorFlowTestCase):
                               exclusive=True)
     with session.Session() as sess:
       # No feed_dict necessary
-      self.assertEqual(sess.run(y), 1)
-      self.assertEqual(sess.run(z), 1)
+      self.assertEqual(self.evaluate(y), 1)
+      self.assertEqual(self.evaluate(z), 1)
 
   def testFalse(self):
     conditions = [(False, raise_exception)]
@@ -121,8 +121,8 @@ class SmartCaseTest(test_util.TensorFlowTestCase):
                               default=lambda: constant_op.constant(1),
                               exclusive=True)
     with session.Session() as sess:
-      self.assertEqual(sess.run(y), 1)
-      self.assertEqual(sess.run(z), 1)
+      self.assertEqual(self.evaluate(y), 1)
+      self.assertEqual(self.evaluate(z), 1)
 
   def testMix(self):
     x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py
index 2f7591abbd0..9ee1bd75a53 100644
--- a/tensorflow/python/framework/sparse_tensor_test.py
+++ b/tensorflow/python/framework/sparse_tensor_test.py
@@ -50,7 +50,7 @@ class SparseTensorTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(indices, value.indices)
         self.assertAllEqual(values, value.values)
         self.assertAllEqual(shape, value.dense_shape)
-        sess_run_value = sess.run(sp)
+        sess_run_value = self.evaluate(sp)
         self.assertAllEqual(sess_run_value.indices, value.indices)
         self.assertAllEqual(sess_run_value.values, value.values)
         self.assertAllEqual(sess_run_value.dense_shape, value.dense_shape)
diff --git a/tensorflow/python/framework/subscribe_test.py b/tensorflow/python/framework/subscribe_test.py
index cab426844d4..5322204ce67 100644
--- a/tensorflow/python/framework/subscribe_test.py
+++ b/tensorflow/python/framework/subscribe_test.py
@@ -66,9 +66,9 @@ class SubscribeTest(test_util.TensorFlowTestCase):
     self.assertTrue(c.op in d.op.control_inputs)
 
     with self.cached_session() as sess:
-      c_out = sess.run([c])
-      n_out = sess.run([n])
-      d_out = sess.run([d])
+      c_out = self.evaluate([c])
+      n_out = self.evaluate([n])
+      d_out = self.evaluate([d])
 
     self.assertEqual(n_out, [-2])
     self.assertEqual(c_out, [2])
@@ -145,8 +145,8 @@ class SubscribeTest(test_util.TensorFlowTestCase):
                             lambda t: script_ops.py_func(sub, [t], [t.dtype]))
 
     with self.cached_session() as sess:
-      c_out = sess.run([c])
-      d_out = sess.run([d])
+      c_out = self.evaluate([c])
+      d_out = self.evaluate([d])
 
     self.assertEqual(c_out, [42])
     self.assertEqual(d_out, [11])
@@ -205,7 +205,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
 
     # Expect the three side effect graphs to have been evaluated.
     with self.cached_session() as sess:
-      sess.run([c_sub])
+      self.evaluate([c_sub])
     self.assertIn('graph1', shared)
     self.assertIn('graph2', shared)
     self.assertIn('graph3', shared)
@@ -229,20 +229,20 @@ class SubscribeTest(test_util.TensorFlowTestCase):
 
     with self.cached_session() as sess:
       # Initialize the variables first.
-      sess.run([v1.initializer])
-      sess.run([v2.initializer])
+      self.evaluate([v1.initializer])
+      self.evaluate([v2.initializer])
 
       # Expect the side effects to be triggered when evaluating the add op as
       # it will read the value of the variable.
-      sess.run([add])
+      self.evaluate([add])
       self.assertEqual(1, len(shared))
 
       # Expect the side effect not to be triggered when evaluating the assign
       # op as it will not access the 'read' output of the variable.
-      sess.run([assign_v1])
+      self.evaluate([assign_v1])
       self.assertEqual(1, len(shared))
 
-      sess.run([add])
+      self.evaluate([add])
       self.assertEqual(2, len(shared))
 
       # Make sure the values read from the variable match the expected ones.
@@ -273,7 +273,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
     self.assertFalse(subscribe._is_subscribed_identity(tensor_array.handle))
 
     with self.cached_session() as sess:
-      sess.run([reader])
+      self.evaluate([reader])
     self.assertEqual(0, len(shared))
 
   def testMultipleOutputs(self):
@@ -304,7 +304,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
                         lambda t: script_ops.py_func(sub, [t], [t.dtype]))
 
     with self.cached_session() as sess:
-      sess.run([neg])
+      self.evaluate([neg])
 
     # All three ops have been processed.
     self.assertEqual(3, len(shared))
@@ -375,7 +375,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
     self.assertIsNot(context(subscriptions[0]), context(subscriptions[1]))
 
     with self.cached_session() as sess:
-      sess.run(cond)
+      self.evaluate(cond)
 
     self.assertEqual(3, len(results))
 
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
index bdf759f2204..87d65c8c466 100644
--- a/tensorflow/python/framework/tensor_util_test.py
+++ b/tensorflow/python/framework/tensor_util_test.py
@@ -771,7 +771,7 @@ class TensorUtilTest(test.TestCase):
     with self.cached_session() as sess:
       ma = MockArray(np.array([10, 20, 30]))
       t = ops.convert_to_tensor(ma)
-      a = sess.run(t)
+      a = self.evaluate(t)
       self.assertEquals(np.int64, a.dtype)
       self.assertAllClose(np.array([10, 20, 30], dtype=np.int64), a)
 
diff --git a/tensorflow/python/grappler/constant_folding_test.py b/tensorflow/python/grappler/constant_folding_test.py
index ab1d0ed25b9..30c1e146814 100644
--- a/tensorflow/python/grappler/constant_folding_test.py
+++ b/tensorflow/python/grappler/constant_folding_test.py
@@ -61,7 +61,7 @@ class ConstantFoldingTest(test.TestCase):
           back_prop=False,
           parallel_iterations=1)
       with session.Session() as sess:
-        y_v = sess.run(y)
+        y_v = self.evaluate(y)
         self.assertAllEqual(np.zeros([10, 20, 30]), y_v)
 
 
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 7b68d5e80da..55ccfbb93c3 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -241,7 +241,7 @@ class LayoutOptimizerTest(test.TestCase):
       if restore:
         saver.restore(sess, checkpoint_path)
       else:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
 
       np.random.seed(0)
       for _ in range(2):
@@ -262,7 +262,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = _two_layer_model(x)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -365,7 +365,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(pad)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -396,7 +396,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -425,7 +425,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(cast)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -456,7 +456,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(squeeze)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -486,7 +486,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(squeeze)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -516,7 +516,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(squeeze)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -545,7 +545,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -574,7 +574,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -603,7 +603,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -632,7 +632,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -662,7 +662,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -691,7 +691,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reduce_sum)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -724,7 +724,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(concat)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -835,7 +835,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(reverse)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -905,7 +905,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(select)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -966,7 +966,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(select)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -1179,7 +1179,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(s)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -1214,7 +1214,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = array_ops.identity(s)
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -1347,7 +1347,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = _loop()
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -1374,7 +1374,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = _loop_with_branch()
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -1398,7 +1398,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = _loop_with_vec_and_4d()
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
@@ -1422,7 +1422,7 @@ class LayoutOptimizerTest(test.TestCase):
       output = _model_with_second_port()
 
       with session.Session(config=_get_config(False)) as sess:
-        output_val_ref = sess.run(output)
+        output_val_ref = self.evaluate(output)
 
       with session.Session(config=_get_config()) as sess:
         metadata = config_pb2.RunMetadata()
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index 98cbb1a4b62..d233629cbbd 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -231,10 +231,10 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
       train_op = graph.get_operation_by_name(train_op_name)
       loss_op = graph.get_tensor_by_name(loss_op_name)
       with session.Session(config=config, graph=graph) as sess:
-        sess.run(init_op)
-        sess.run(train_op)
-        sess.run(train_op)
-        return sess.run(loss_op)
+        self.evaluate(init_op)
+        self.evaluate(train_op)
+        self.evaluate(train_op)
+        return self.evaluate(loss_op)
 
   def testRecomputationRewritingNoErrors(self):
     """Tests that graph output is not significantly different with rewriting."""
@@ -295,8 +295,8 @@ class MemoryOptimizerRecomputeTest(test.TestCase):
           rewrite_options=manual_memory_config)
       session_config = config_pb2.ConfigProto(graph_options=graph_options)
       with session.Session(config=session_config) as sess:
-        sess.run(init_op)
-        sess.run(train_op)
+        self.evaluate(init_op)
+        self.evaluate(train_op)
 
   def testHintDoesRewrite(self):
     graph = self._annotated_graph()[0]
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index a727e99f66b..48fdd56e9f6 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -136,7 +136,7 @@ class BackendUtilsTest(test.TestCase):
       x = keras.Input((3,))
       y = keras.layers.BatchNormalization()(x)
       if not context.executing_eagerly():
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         sess.run(y, feed_dict={x: np.random.random((2, 3))})
 
   def test_learning_phase_scope(self):
diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index 7172571f7cc..b1449069e32 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@@ -1013,8 +1013,8 @@ class RNNTest(test.TestCase):
         inputs, _ = cell(inputs, initial_state)
         output = inputs
         if not context.executing_eagerly():
-          sess.run(variables_lib.global_variables_initializer())
-          output = sess.run(output)
+          self.evaluate(variables_lib.global_variables_initializer())
+          output = self.evaluate(output)
         return output
 
     random_seed.set_random_seed(12345)
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 74e5d4d4ce1..f049b10721a 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -322,19 +322,19 @@ class KerasMetricsTest(test.TestCase):
       m = metrics.Mean()
       v = array_ops.placeholder(dtypes.float32)
       w = array_ops.placeholder(dtypes.float32)
-      sess.run(variables.variables_initializer(m.variables))
+      self.evaluate(variables.variables_initializer(m.variables))
 
       # check __call__()
       result_t = m(v, sample_weight=w)
       result = sess.run(result_t, feed_dict=({v: 100, w: 0.5}))
-      self.assertEqual(sess.run(m.total), 50)
-      self.assertEqual(sess.run(m.count), 0.5)
+      self.assertEqual(self.evaluate(m.total), 50)
+      self.assertEqual(self.evaluate(m.count), 0.5)
       self.assertEqual(result, 50 / 0.5)
 
       # check update_state() and result()
       result = sess.run(result_t, feed_dict=({v: [1, 5], w: [1, 0.2]}))
-      self.assertAlmostEqual(sess.run(m.total), 52, 2)  # 50 + 1 + 5 * 0.2
-      self.assertAlmostEqual(sess.run(m.count), 1.7, 2)  # 0.5 + 1.2
+      self.assertAlmostEqual(self.evaluate(m.total), 52, 2)  # 50 + 1 + 5 * 0.2
+      self.assertAlmostEqual(self.evaluate(m.count), 1.7, 2)  # 0.5 + 1.2
       self.assertAlmostEqual(result, 52 / 1.7, 2)
 
   @test_util.run_in_graph_and_eager_modes
diff --git a/tensorflow/python/keras/optimizer_v2/ftrl_test.py b/tensorflow/python/keras/optimizer_v2/ftrl_test.py
index c14cf75c269..ca8c33dfa6d 100644
--- a/tensorflow/python/keras/optimizer_v2/ftrl_test.py
+++ b/tensorflow/python/keras/optimizer_v2/ftrl_test.py
@@ -54,7 +54,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllClose([0.0, 0.0], v0_val)
         self.assertAllClose([0.0, 0.0], v1_val)
 
@@ -62,7 +62,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(3):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-2.60260963, -4.29698515]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -90,14 +90,14 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run 3 steps FTRL
         for _ in range(3):
           update.run()
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-2.55607247, -3.98729396]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -137,14 +137,14 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run 10 steps FTRL
         for _ in range(10):
           update.run()
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-7.66718769, -10.91273689]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -166,7 +166,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
@@ -174,7 +174,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(10):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.24059935, -0.46829352]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -203,7 +203,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
@@ -211,7 +211,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(10):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.22578995, -0.44345796]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -239,7 +239,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([[1.0], [2.0]], v0_val)
         self.assertAllCloseAccordingToType([[4.0], [3.0]], v1_val)
 
@@ -247,7 +247,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(10):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([[-0.22578995], [2.]], v0_val)
         self.assertAllCloseAccordingToType([[4.], [-0.13229476]], v1_val)
 
@@ -275,7 +275,7 @@ class FtrlOptimizerTest(test.TestCase):
         update1 = opt1.apply_gradients([(grads1, var1)])
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([1.0, 2.0], v1_val)
 
@@ -284,7 +284,7 @@ class FtrlOptimizerTest(test.TestCase):
           update0.run()
           update1.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         # var0 is experiencing L2 shrinkage so it should be smaller than var1
         # in magnitude.
         self.assertTrue((v0_val**2 < v1_val**2).all())
@@ -313,7 +313,7 @@ class FtrlOptimizerTest(test.TestCase):
     variables.global_variables_initializer().run()
 
     sess = ops.get_default_session()
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     if is_sparse:
       self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val)
       self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val)
@@ -325,7 +325,7 @@ class FtrlOptimizerTest(test.TestCase):
     for _ in range(steps):
       update.run()
 
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     return v0_val, v1_val
 
   # When variables are initialized with Zero, FTRL-Proximal has two properties:
diff --git a/tensorflow/python/kernel_tests/accumulate_n_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py
index ae24cf8f14a..c7f11f854d1 100644
--- a/tensorflow/python/kernel_tests/accumulate_n_test.py
+++ b/tensorflow/python/kernel_tests/accumulate_n_test.py
@@ -65,7 +65,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase):
             for _ in range(0, num_inputs)
         ]
         accum_n = math_ops.accumulate_n(input_vars)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         accum_n_grad = gradients.gradients(accum_n, input_vars)
         self.assertAllEqual(
             np.repeat(1.0, num_inputs),  # d/dx (x + y + ...) = 1
diff --git a/tensorflow/python/kernel_tests/aggregate_ops_test.py b/tensorflow/python/kernel_tests/aggregate_ops_test.py
index 0f15319cb59..874d6166580 100644
--- a/tensorflow/python/kernel_tests/aggregate_ops_test.py
+++ b/tensorflow/python/kernel_tests/aggregate_ops_test.py
@@ -61,7 +61,7 @@ class AddNTest(test.TestCase):
       for dtype in self._supported_types():
         for count in range(1, self._MAX_N + 1):
           data = [self._buildData((2, 2), dtype) for _ in range(count)]
-          actual = sess.run(math_ops.add_n(data))
+          actual = self.evaluate(math_ops.add_n(data))
           expected = np.sum(np.vstack(
               [np.expand_dims(d, 0) for d in data]), axis=0)
           tol = 5e-3 if dtype == dtypes.float16 else 5e-7
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index d345138ec76..afc158f6975 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -833,7 +833,7 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase):
       index = constant_op.constant(1, dtype=dtypes.int64)
       b = 2. * a[index]
       grad, = gradients_impl.gradients(b, a)
-      self.assertAllEqual(sess.run(grad), [0., 2., 0.])
+      self.assertAllEqual(self.evaluate(grad), [0., 2., 0.])
 
 
 class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
@@ -846,7 +846,7 @@ class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
               math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
               shape=(4, 1, 1)))
       varshape = variables.Variable([6, 4, 4], dtype=dtypes.int32)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       begin = constant_op.constant([0, 0, 0])
       end = constant_op.constant([4, 1, 1])
       strides = constant_op.constant([1, 1, 1])
@@ -859,7 +859,7 @@ class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
           math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
           shape=(4, 1, 1))
       original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       begin = constant_op.constant([0, 0, 0], dtype=dtypes.int64)
       end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
       strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
@@ -873,7 +873,7 @@ class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
           math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
           shape=(4, 1, 1))
       original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       begin = constant_op.constant([0, 0, 0], dtype=dtypes.int32)
       end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
       strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
@@ -1042,7 +1042,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase):
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
     v = resource_variable_ops.ResourceVariable(init_val)
     with self.cached_session() as sess:
-      sess.run(v.initializer)
+      self.evaluate(v.initializer)
       with self.assertRaises(ValueError):
         sess.run(v[:].assign(too_large_val))
       with self.assertRaises(ValueError):
@@ -1269,7 +1269,7 @@ class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
             initializer=init_ops.constant_initializer(10.0),
             use_resource=use_resource)
         guarantee_a = array_ops.guarantee_const(a)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertEqual(10.0, guarantee_a.eval())
 
   def testResourceRejection(self):
@@ -1279,7 +1279,7 @@ class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
           initializer=init_ops.constant_initializer(10.0),
           use_resource=True)
       guarantee_a = array_ops.guarantee_const(a.handle)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
                                                "cannot be a resource variable"):
         guarantee_a.eval()
diff --git a/tensorflow/python/kernel_tests/attention_ops_test.py b/tensorflow/python/kernel_tests/attention_ops_test.py
index 14db06b7837..00dba9996dd 100644
--- a/tensorflow/python/kernel_tests/attention_ops_test.py
+++ b/tensorflow/python/kernel_tests/attention_ops_test.py
@@ -85,7 +85,7 @@ class ExtractGlimpseTest(test.TestCase):
 
     # Evaluate the TensorFlow Graph.
     with self.cached_session() as sess:
-      value_rows, value_cols = sess.run([glimpse_rows, glimpse_cols])
+      value_rows, value_cols = self.evaluate([glimpse_rows, glimpse_cols])
 
     # Check dimensions of returned glimpse.
     self.assertEqual(value_rows.shape[1], glimpse_sizes[0])
diff --git a/tensorflow/python/kernel_tests/barrier_ops_test.py b/tensorflow/python/kernel_tests/barrier_ops_test.py
index 4d36b3a4658..495bbe7b341 100644
--- a/tensorflow/python/kernel_tests/barrier_ops_test.py
+++ b/tensorflow/python/kernel_tests/barrier_ops_test.py
@@ -229,7 +229,7 @@ class BarrierTest(test.TestCase):
       insert_ops = [b.insert_many(0, [k], [v]) for k, v in zip(keys, values)]
       take_t = b.take_many(10)
 
-      sess.run(insert_ops)
+      self.evaluate(insert_ops)
       self.assertEquals(size_t.eval(), [10])
 
       indices_val, keys_val, values_val = sess.run(
@@ -491,9 +491,9 @@ class BarrierTest(test.TestCase):
       b = data_flow_ops.Barrier(
           (dtypes.float32, dtypes.float32), shapes=((), ()), name="B")
       take_t = b.take_many(1, allow_small_batch=True)
-      sess.run(b.close(cancel))
+      self.evaluate(b.close(cancel))
       with self.assertRaisesOpError("is closed and has insufficient elements"):
-        sess.run(take_t)
+        self.evaluate(take_t)
 
   def testClosedEmptyBarrierTakeManyAllowSmallBatchRaises(self):
     self._testClosedEmptyBarrierTakeManyAllowSmallBatchRaises(cancel=False)
diff --git a/tensorflow/python/kernel_tests/base64_ops_test.py b/tensorflow/python/kernel_tests/base64_ops_test.py
index 1b399942efb..bb903d827f2 100644
--- a/tensorflow/python/kernel_tests/base64_ops_test.py
+++ b/tensorflow/python/kernel_tests/base64_ops_test.py
@@ -93,7 +93,7 @@ class Base64OpsTest(test_util.TensorFlowTestCase):
         decoded = string_ops.decode_base64(encoded)
 
         with self.cached_session() as sess:
-          encoded_value, decoded_value = sess.run([encoded, decoded])
+          encoded_value, decoded_value = self.evaluate([encoded, decoded])
 
         self.assertEqual(encoded_value.shape, msg.shape)
         self.assertEqual(decoded_value.shape, msg.shape)
diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py
index ac5cbc810a9..cd330481214 100644
--- a/tensorflow/python/kernel_tests/basic_gpu_test.py
+++ b/tensorflow/python/kernel_tests/basic_gpu_test.py
@@ -44,13 +44,13 @@ class GPUBinaryOpsTest(test.TestCase):
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = tf_func(inx, iny)
-      tf_gpu = sess.run(out)
+      tf_gpu = self.evaluate(out)
 
     with self.cached_session(use_gpu=False) as sess:
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       out = tf_func(inx, iny)
-      tf_cpu = sess.run(out)
+      tf_cpu = self.evaluate(out)
 
     self.assertAllClose(tf_cpu, tf_gpu)
 
@@ -96,7 +96,7 @@ class MathBuiltinUnaryTest(test.TestCase):
     with self.cached_session(use_gpu=use_gpu) as sess:
       inx = ops.convert_to_tensor(x)
       ofunc = tf_func(inx)
-      tf_out = sess.run(ofunc)
+      tf_out = self.evaluate(ofunc)
     self.assertAllClose(np_out, tf_out)
 
   def _inv(self, x):
@@ -148,7 +148,7 @@ class MathBuiltinUnaryTest(test.TestCase):
       iny = ops.convert_to_tensor(y + 0.1)
       ofunc = inx / iny
       out_func2 = math_ops.floor(ofunc)
-      tf_out = sess.run(out_func2)
+      tf_out = self.evaluate(out_func2)
 
     self.assertAllClose(np_out, tf_out)
 
diff --git a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
index 12afb6a2ad8..1a7b1a7e90e 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
@@ -98,8 +98,8 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
           quantile_accumulator_handle, num_features=2)
       quantiles = boosted_trees_ops.boosted_trees_bucketize(
           [self._feature_0, self._feature_1], buckets)
-      sess.run(summary_op)
-      sess.run(flush_op)
+      self.evaluate(summary_op)
+      self.evaluate(flush_op)
       self.assertAllClose(self._feature_0_boundaries, buckets[0].eval())
       self.assertAllClose(self._feature_1_boundaries, buckets[1].eval())
 
@@ -132,8 +132,8 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
           quantile_accumulator_handle_1, num_features=1)
       quantiles = boosted_trees_ops.boosted_trees_bucketize(
           [self._feature_0, self._feature_1], bucket_0 + bucket_1)
-      sess.run([summary_op_0, summary_op_1])
-      sess.run([flush_op_0, flush_op_1])
+      self.evaluate([summary_op_0, summary_op_1])
+      self.evaluate([flush_op_0, flush_op_1])
       self.assertAllClose(self._feature_0_boundaries, bucket_0[0].eval())
       self.assertAllClose(self._feature_1_boundaries, bucket_1[0].eval())
 
@@ -158,7 +158,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
                                             self._example_weights)
       with ops.control_dependencies([summaries]):
         flush = accumulator.flush()
-      sess.run(flush)
+      self.evaluate(flush)
       self.assertAllClose(self._feature_0_boundaries, buckets[0].eval())
       self.assertAllClose(self._feature_1_boundaries, buckets[1].eval())
       save.save(sess, save_path)
@@ -185,12 +185,12 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
 
       summaries = accumulator.add_summaries([self._feature_0, self._feature_1],
                                             self._example_weights)
-      sess.run(summaries)
+      self.evaluate(summaries)
       buckets = accumulator.get_bucket_boundaries()
       self.assertAllClose([], buckets[0].eval())
       self.assertAllClose([], buckets[1].eval())
       save.save(sess, save_path)
-      sess.run(accumulator.flush())
+      self.evaluate(accumulator.flush())
       self.assertAllClose(self._feature_0_boundaries, buckets[0].eval())
       self.assertAllClose(self._feature_1_boundaries, buckets[1].eval())
 
diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
index cc3984015da..e1036b0b754 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
@@ -65,16 +65,16 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
           min_node_weight=0,
           max_splits=max_splits)
 
-      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
+      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
       self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]],
-                          sess.run(gains_list))
-      self.assertAllEqual([[1, 1], [1, 1]], sess.run(thresholds_list))
+                          self.evaluate(gains_list))
+      self.assertAllEqual([[1, 1], [1, 1]], self.evaluate(thresholds_list))
       # The left node contrib will be later added to the previous node value to
       # make the left node value, and the same for right node contrib.
       self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]],
-                          sess.run(left_node_contribs_list))
+                          self.evaluate(left_node_contribs_list))
       self.assertAllClose([[[-.592593], [-.75]], [[-.076923], [.568966]]],
-                          sess.run(right_node_contribs_list))
+                          self.evaluate(right_node_contribs_list))
 
   def testCalculateBestGainsWithL2(self):
     """Testing Gain calculation with L2."""
@@ -113,16 +113,16 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
           min_node_weight=0,
           max_splits=max_splits)
 
-      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
+      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
       self.assertAllClose([[0., 0.33931375], [0.01879096, 0.33931375]],
-                          sess.run(gains_list))
-      self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))
+                          self.evaluate(gains_list))
+      self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list))
       # The left node contrib will be later added to the previous node value to
       # make the left node value, and the same for right node contrib.
       self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
-                          sess.run(left_node_contribs_list))
+                          self.evaluate(left_node_contribs_list))
       self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
-                          sess.run(right_node_contribs_list))
+                          self.evaluate(right_node_contribs_list))
 
   def testCalculateBestGainsWithL1(self):
     """Testing Gain calculation with L1."""
@@ -162,18 +162,18 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
           min_node_weight=0,
           max_splits=max_splits)
 
-      self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))
+      self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list))
 
-      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
+      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
       self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]],
-                          sess.run(left_node_contribs_list))
+                          self.evaluate(left_node_contribs_list))
 
       self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]],
-                          sess.run(right_node_contribs_list))
+                          self.evaluate(right_node_contribs_list))
 
       # Gain should also include an adjustment of the gradient by l1.
       self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]],
-                          sess.run(gains_list))
+                          self.evaluate(gains_list))
 
   def testCalculateBestGainsWithTreeComplexity(self):
     """Testing Gain calculation with L2."""
@@ -214,18 +214,18 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
           min_node_weight=0,
           max_splits=max_splits)
 
-      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
+      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
 
       self.assertAllClose([[-3., -2.66068625], [-2.98120904, -2.66068625]],
-                          sess.run(gains_list))
+                          self.evaluate(gains_list))
 
-      self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))
+      self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list))
       # The left node contrib will be later added to the previous node value to
       # make the left node value, and the same for right node contrib.
       self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
-                          sess.run(left_node_contribs_list))
+                          self.evaluate(left_node_contribs_list))
       self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
-                          sess.run(right_node_contribs_list))
+                          self.evaluate(right_node_contribs_list))
 
   def testCalculateBestGainsWithMinNodeWeight(self):
     """Testing Gain calculation without any regularization."""
@@ -266,13 +266,13 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
 
       # We can't split node 1 on feature 1 and node 2 on feature 2 because of
       # the min node weight.
-      self.assertAllEqual([[2], [1]], sess.run(node_ids_list))
-      self.assertAllClose([[0.384314], [0.098013]], sess.run(gains_list))
-      self.assertAllEqual([[1], [1]], sess.run(thresholds_list))
+      self.assertAllEqual([[2], [1]], self.evaluate(node_ids_list))
+      self.assertAllClose([[0.384314], [0.098013]], self.evaluate(gains_list))
+      self.assertAllEqual([[1], [1]], self.evaluate(thresholds_list))
       self.assertAllClose([[[0.4852941]], [[-.6]]],
-                          sess.run(left_node_contribs_list))
+                          self.evaluate(left_node_contribs_list))
       self.assertAllClose([[[-0.75]], [[-0.014925]]],
-                          sess.run(right_node_contribs_list))
+                          self.evaluate(right_node_contribs_list))
 
   def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeturePossible(self):
     """Testing Gain calculation without any regularization."""
@@ -311,9 +311,9 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
            max_splits=max_splits)
 
       # We can't split either of the nodes on the first feature
-      self.assertEqual(2, len(sess.run(node_ids_list)))
-      self.assertAllEqual([], sess.run(node_ids_list)[0])
-      self.assertAllEqual([1], sess.run(node_ids_list)[1])
+      self.assertEqual(2, len(self.evaluate(node_ids_list)))
+      self.assertAllEqual([], self.evaluate(node_ids_list)[0])
+      self.assertAllEqual([1], self.evaluate(node_ids_list)[1])
 
       # Now check when we can't split on any feature
       (node_ids_list, _, _, _,
@@ -325,7 +325,7 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
            tree_complexity=0.0,
            min_node_weight=10,
            max_splits=max_splits)
-      self.assertAllEqual([[], []], sess.run(node_ids_list))
+      self.assertAllEqual([[], []], self.evaluate(node_ids_list))
 
   def testMakeStatsSummarySimple(self):
     """Simple test for MakeStatsSummary."""
diff --git a/tensorflow/python/kernel_tests/bucketize_op_test.py b/tensorflow/python/kernel_tests/bucketize_op_test.py
index 57413e6af50..f40ca825270 100644
--- a/tensorflow/python/kernel_tests/bucketize_op_test.py
+++ b/tensorflow/python/kernel_tests/bucketize_op_test.py
@@ -32,7 +32,7 @@ class BucketizationOpTest(test.TestCase):
         boundaries=[0, 3, 8, 11])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
     with self.session(use_gpu=True) as sess:
-      self.assertAllEqual(expected_out, sess.run(op))
+      self.assertAllEqual(expected_out, self.evaluate(op))
 
   def testFloat(self):
     op = math_ops._bucketize(
@@ -40,7 +40,7 @@ class BucketizationOpTest(test.TestCase):
         boundaries=[0., 3., 8., 11.])
     expected_out = [0, 1, 1, 2, 2, 3, 3, 4, 4]
     with self.session(use_gpu=True) as sess:
-      self.assertAllEqual(expected_out, sess.run(op))
+      self.assertAllEqual(expected_out, self.evaluate(op))
 
   def test2DInput(self):
     op = math_ops._bucketize(
@@ -48,7 +48,7 @@ class BucketizationOpTest(test.TestCase):
         boundaries=[0, 3, 8, 11])
     expected_out = [[0, 1, 1, 2, 2], [3, 3, 4, 4, 1]]
     with self.session(use_gpu=True) as sess:
-      self.assertAllEqual(expected_out, sess.run(op))
+      self.assertAllEqual(expected_out, self.evaluate(op))
 
   def testInvalidBoundariesOrder(self):
     op = math_ops._bucketize(
@@ -56,7 +56,7 @@ class BucketizationOpTest(test.TestCase):
     with self.session(use_gpu=True) as sess:
       with self.assertRaisesRegexp(
           errors_impl.InvalidArgumentError, "Expected sorted boundaries"):
-        sess.run(op)
+        self.evaluate(op)
 
   def testBoundariesNotList(self):
     with self.assertRaisesRegexp(
diff --git a/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py b/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py
index 46ab71537f5..031accee553 100644
--- a/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py
+++ b/tensorflow/python/kernel_tests/candidate_sampler_ops_test.py
@@ -97,7 +97,7 @@ class RangeSamplerOpsTest(test.TestCase):
           true_classes, self.NUM_TRUE, self.NUM_SAMPLED, True)
       accidental_hits = candidate_sampling_ops.compute_accidental_hits(
           true_classes, sampled_candidates, self.NUM_TRUE)
-      indices, ids, weights = sess.run(accidental_hits)
+      indices, ids, weights = self.evaluate(accidental_hits)
 
     self.assertEqual(1, accidental_hits[0].get_shape().ndims)
     self.assertEqual(1, accidental_hits[1].get_shape().ndims)
diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py
index bc49cd5a048..2cfe084d957 100644
--- a/tensorflow/python/kernel_tests/cast_op_test.py
+++ b/tensorflow/python/kernel_tests/cast_op_test.py
@@ -187,7 +187,7 @@ class CastOpTest(test.TestCase):
       y = variables.Variable(True, dtype=dtypes.bool)
       cast = math_ops.cast(y, x.dtype)
       variables.global_variables_initializer().run()
-      self.assertEqual(1.0, sess.run(cast))
+      self.assertEqual(1.0, self.evaluate(cast))
 
   def testGradients(self):
     t = [dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128]
@@ -229,7 +229,7 @@ class SaturateCastTest(test.TestCase):
               [lo, lo + 1, lo // 2, hi // 2, hi - 1, hi], dtype=in_type)
           y = math_ops.saturate_cast(x, dtype=out_type)
           self.assertEqual(y.dtype, out_type)
-          x, y = sess.run([x, y])
+          x, y = self.evaluate([x, y])
           correct = np.maximum(out_type.min, np.minimum(out_type.max, x))
           self.assertAllEqual(correct, y)
 
diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
index fa41a03b546..1a509a43d1f 100644
--- a/tensorflow/python/kernel_tests/cholesky_op_test.py
+++ b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -97,7 +97,7 @@ def TriAngInvCompositeGrad(l, grad):
 class CholeskyOpTest(test.TestCase):
 
   def _verifyCholeskyBase(self, sess, x, chol, verification):
-    chol_np, verification_np = sess.run([chol, verification])
+    chol_np, verification_np = self.evaluate([chol, verification])
     self.assertAllClose(x, verification_np)
     self.assertShapeEqual(x, chol)
     # Check that the cholesky is lower triangular, and has positive diagonal
@@ -183,7 +183,7 @@ class CholeskyOpTest(test.TestCase):
       matrix2 = math_ops.matmul(matrix2, matrix2, adjoint_a=True)
       c1 = linalg_ops.cholesky(matrix1)
       c2 = linalg_ops.cholesky(matrix2)
-      c1_val, c2_val = sess.run([c1, c2])
+      c1_val, c2_val = self.evaluate([c1, c2])
       self.assertAllClose(c1_val, c2_val)
 
 
diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py
index 149302831b1..27137f76bd1 100644
--- a/tensorflow/python/kernel_tests/concat_op_test.py
+++ b/tensorflow/python/kernel_tests/concat_op_test.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradient_checker
@@ -65,7 +66,7 @@ class ConcatOpTest(test.TestCase):
     self.assertAllEqual(result[:, 4:], params[p2])
 
   def testInt32GPU(self):
-    with self.session(use_gpu=True):
+    with test_util.use_gpu():
       p1 = np.random.rand(2, 3).astype("i")
       p2 = np.random.rand(2, 3).astype("i")
       x1 = constant_op.constant(p1)
@@ -76,13 +77,13 @@ class ConcatOpTest(test.TestCase):
     self.assertAllEqual(result[2:, :], p2)
 
   def testRefType(self):
-    with self.session(use_gpu=True):
+    with test_util.use_gpu():
       p1 = np.random.rand(4, 4).astype("f")
       p2 = np.random.rand(4, 4).astype("f")
       v1 = variables.Variable(p1)
       v2 = variables.Variable(p2)
       c = array_ops.concat([v1, v2], 0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       result = self.evaluate(c)
 
     self.assertEqual(result.shape, c.get_shape())
@@ -172,7 +173,7 @@ class ConcatOpTest(test.TestCase):
     # Test both positive and negative concat axis.
     # -2 and 1 correspond to the same axis for 3-dimensional tensors.
     for axis in [-2, 1]:
-      with self.cached_session(use_gpu=True):
+      with test_util.use_gpu():
         inp = []
         inp_tensors = []
         for x in [1, 2, 6]:
@@ -203,7 +204,7 @@ class ConcatOpTest(test.TestCase):
     self._testGradientsSimple(dtypes.complex64)
 
   def testGradientsFirstDim(self):
-    with self.session(use_gpu=True):
+    with test_util.use_gpu():
       inp = []
       inp_tensors = []
       for x in [1, 2, 6]:
@@ -230,7 +231,7 @@ class ConcatOpTest(test.TestCase):
     # Test both positive and negative concat axis.
     # -1 and 2 correspond to the same axis for 3-dimensional tensors.
     for axis in [-1, 2]:
-      with self.cached_session(use_gpu=True):
+      with test_util.use_gpu():
         inp = []
         inp_tensors = []
         for x in [1, 2, 6]:
@@ -261,7 +262,7 @@ class ConcatOpTest(test.TestCase):
     # Random dim to concat on
     concat_dim = np.random.randint(5)
     concat_dim_sizes = np.random.randint(1, 5, size=num_tensors)
-    with self.cached_session(use_gpu=True):
+    with test_util.use_gpu():
       inp = []
       inp_tensors = []
       for x in concat_dim_sizes:
@@ -358,7 +359,7 @@ class ConcatOpTest(test.TestCase):
   def testZeroSize(self):
     # Verify that concat doesn't crash and burn for zero size inputs
     np.random.seed(7)
-    with self.session(use_gpu=True) as sess:
+    with test_util.use_gpu():
       for shape0 in (), (2,):
         axis = len(shape0)
         for shape1 in (), (3,):
@@ -370,10 +371,10 @@ class ConcatOpTest(test.TestCase):
               # TODO(irving): Make tf.concat handle map, then drop list().
               xs = list(map(constant_op.constant, [x0, x1]))
               c = array_ops.concat(xs, axis)
-              self.assertAllEqual(c.eval(), correct)
+              self.assertAllEqual(self.evaluate(c), correct)
               # Check gradients
               dc = np.random.randn(*c.get_shape().as_list())
-              dxs = sess.run(gradients_impl.gradients(c, xs, dc))
+              dxs = self.evaluate(gradients_impl.gradients(c, xs, dc))
               self.assertAllEqual(dc, np.concatenate(dxs, axis=axis))
 
   def testTensorConcatDim0Grad(self):
@@ -473,18 +474,17 @@ class ConcatOpTest(test.TestCase):
   def testConcatTuple(self):
     c1 = np.random.rand(4, 4)
     c2 = np.random.rand(4, 4)
-    with self.cached_session():
-      concat_list_t = array_ops.concat([c1, c2], 0)
-      concat_tuple_t = array_ops.concat((c1, c2), 0)
-      self.assertAllEqual(concat_list_t.eval(), self.evaluate(concat_tuple_t))
+    concat_list_t = array_ops.concat([c1, c2], 0)
+    concat_tuple_t = array_ops.concat((c1, c2), 0)
+    self.assertAllEqual(
+        self.evaluate(concat_list_t), self.evaluate(concat_tuple_t))
 
   def testConcatNoScalars(self):
-    with self.cached_session():
-      scalar = constant_op.constant(7)
-      dim = array_ops.placeholder(dtypes.int32)
-      with self.assertRaisesRegexp(
-          ValueError, r"Can't concatenate scalars \(use tf\.stack instead\)"):
-        array_ops.concat([scalar, scalar, scalar], dim)
+    scalar = constant_op.constant(7)
+    dim = array_ops.placeholder(dtypes.int32)
+    with self.assertRaisesRegexp(
+        ValueError, r"Can't concatenate scalars \(use tf\.stack instead\)"):
+      array_ops.concat([scalar, scalar, scalar], dim)
 
   # important as gpu implementation could fail if
   # shared memory is not large for all the inputs
@@ -523,21 +523,21 @@ class ConcatOpTest(test.TestCase):
           self.assertAllEqual(result[index], params[p[i]])
 
   def testConcatEmpty(self):
-    with self.session(use_gpu=True):
+    with test_util.use_gpu():
       t1 = []
       t2 = []
-      output = gen_array_ops.concat_v2([t1, t2], 0).eval()
-      self.assertFalse(output)  # Checks that output is empty
+      output = gen_array_ops.concat_v2([t1, t2], 0)
+      self.assertFalse(self.evaluate(output))  # Checks that output is empty
 
   def testConcatInvalidAxis(self):
     with self.assertRaises(ValueError):
-      with self.session(use_gpu=True):
+      with test_util.use_gpu():
         t1 = [1]
         t2 = [2]
         gen_array_ops.concat_v2([t1, t2], 1).eval()
 
   def testConcatNegativeAxis(self):
-    with self.session(use_gpu=True):
+    with test_util.use_gpu():
       t1 = [[1, 2, 3], [4, 5, 6]]
       t2 = [[7, 8, 9], [10, 11, 12]]
 
@@ -608,7 +608,7 @@ class ConcatOpTest(test.TestCase):
 
   def testConcatAxisType(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.cached_session(use_gpu=True):
+      with test_util.use_gpu():
         t1 = [[1, 2, 3], [4, 5, 6]]
         t2 = [[7, 8, 9], [10, 11, 12]]
 
@@ -621,65 +621,61 @@ class ConcatOpTest(test.TestCase):
 class ConcatOffsetTest(test.TestCase):
 
   def testBasic(self):
-    with self.session(use_gpu=True) as sess:
+    with test_util.use_gpu():
       cdim = constant_op.constant(1, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 5], dtypes.int32)
       s2 = constant_op.constant([2, 20, 5], dtypes.int32)
       off = gen_array_ops.concat_offset(cdim, [s0, s1, s2])
-      ans = sess.run(off)
+      ans = self.evaluate(off)
       self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]])
 
   def testNotVector(self):
-    with self.cached_session() as sess:
-      cdim = constant_op.constant(1, dtypes.int32)
-      s0 = constant_op.constant([[2, 3, 5]], dtypes.int32)
-      s1 = constant_op.constant([[2, 7, 5]], dtypes.int32)
-      off = gen_array_ops.concat_offset(cdim, [s0, s1])
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   r"should be a vector"):
-        sess.run(off)
+    cdim = constant_op.constant(1, dtypes.int32)
+    s0 = constant_op.constant([[2, 3, 5]], dtypes.int32)
+    s1 = constant_op.constant([[2, 7, 5]], dtypes.int32)
+    off = gen_array_ops.concat_offset(cdim, [s0, s1])
+    with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                 r"should be a vector"):
+      self.evaluate(off)
 
   def testConcatDimOutOfRange(self):
-    with self.cached_session() as sess:
-      cdim = constant_op.constant(4, dtypes.int32)
-      s0 = constant_op.constant([2, 3, 5], dtypes.int32)
-      s1 = constant_op.constant([2, 7, 5], dtypes.int32)
-      off = gen_array_ops.concat_offset(cdim, [s0, s1])
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   r"Concat dim is out of range: 4 vs. 3"):
-        sess.run(off)
+    cdim = constant_op.constant(4, dtypes.int32)
+    s0 = constant_op.constant([2, 3, 5], dtypes.int32)
+    s1 = constant_op.constant([2, 7, 5], dtypes.int32)
+    off = gen_array_ops.concat_offset(cdim, [s0, s1])
+    with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                 r"Concat dim is out of range: 4 vs. 3"):
+      self.evaluate(off)
 
   def testDimMismatch(self):
-    with self.cached_session() as sess:
-      cdim = constant_op.constant(1, dtypes.int32)
-      s0 = constant_op.constant([2, 3, 5], dtypes.int32)
-      s1 = constant_op.constant([2, 7, 5, 10], dtypes.int32)
-      off = gen_array_ops.concat_offset(cdim, [s0, s1])
-      with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   r"should contain 3 elem"):
-        sess.run(off)
+    cdim = constant_op.constant(1, dtypes.int32)
+    s0 = constant_op.constant([2, 3, 5], dtypes.int32)
+    s1 = constant_op.constant([2, 7, 5, 10], dtypes.int32)
+    off = gen_array_ops.concat_offset(cdim, [s0, s1])
+    with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
+                                 r"should contain 3 elem"):
+      self.evaluate(off)
 
   def testSizeMismatch(self):
-    with self.cached_session() as sess:
-      cdim = constant_op.constant(1, dtypes.int32)
-      s0 = constant_op.constant([2, 3, 5], dtypes.int32)
-      s1 = constant_op.constant([2, 7, 10], dtypes.int32)
-      off = gen_array_ops.concat_offset(cdim, [s0, s1])
-      with self.assertRaisesRegexp(
-          errors_impl.InvalidArgumentError,
-          r"All dimensions except 1 must match. Input 1 has shape \[2 7 10\] "
-          r"and doesn't match input 0 with shape \[2 3 5\]."):
-        sess.run(off)
+    cdim = constant_op.constant(1, dtypes.int32)
+    s0 = constant_op.constant([2, 3, 5], dtypes.int32)
+    s1 = constant_op.constant([2, 7, 10], dtypes.int32)
+    off = gen_array_ops.concat_offset(cdim, [s0, s1])
+    with self.assertRaisesRegexp(
+        errors_impl.InvalidArgumentError,
+        r"All dimensions except 1 must match. Input 1 has shape \[2 7 10\] "
+        r"and doesn't match input 0 with shape \[2 3 5\]."):
+      self.evaluate(off)
 
   def testNegativeDim(self):
-    with self.session(use_gpu=True) as sess:
+    with test_util.use_gpu():
       cdim = constant_op.constant(-2, dtypes.int32)
       s0 = constant_op.constant([2, 3, 5], dtypes.int32)
       s1 = constant_op.constant([2, 7, 5], dtypes.int32)
       s2 = constant_op.constant([2, 20, 5], dtypes.int32)
       off = gen_array_ops.concat_offset(cdim, [s0, s1, s2])
-      ans = sess.run(off)
+      ans = self.evaluate(off)
       self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]])
 
       cdim = constant_op.constant(-3, dtypes.int32)
@@ -687,7 +683,7 @@ class ConcatOffsetTest(test.TestCase):
       s1 = constant_op.constant([1, 3, 5], dtypes.int32)
       s2 = constant_op.constant([3, 3, 5], dtypes.int32)
       off = gen_array_ops.concat_offset(cdim, [s0, s1, s2])
-      ans = sess.run(off)
+      ans = self.evaluate(off)
       self.assertAllEqual(ans, [[0, 0, 0], [2, 0, 0], [3, 0, 0]])
 
 
diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
index 893cb7cce33..7ee1a4bc327 100644
--- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
@@ -111,7 +111,7 @@ class ConditionalAccumulatorTest(test.TestCase):
         for e in elems:
           q.apply_grad((e,)).run()
 
-        result = sess.run(q.take_grad(1))
+        result = self.evaluate(q.take_grad(1))
 
         self.assertEqual(sum(elems) / len(elems), result)
 
@@ -424,7 +424,7 @@ class ConditionalAccumulatorTest(test.TestCase):
       takeg_t = q.take_grad(1)
 
       def apply_grad(accum_op):
-        sess.run(accum_op)
+        self.evaluate(accum_op)
 
       threads = [
           self.checkedThread(
@@ -451,14 +451,14 @@ class ConditionalAccumulatorTest(test.TestCase):
       def apply_grad():
         for accum_op in accum_ops:
           time.sleep(1.0)
-          sess.run(accum_op)
+          self.evaluate(accum_op)
 
       apply_grad_thread = self.checkedThread(target=apply_grad)
 
       results = []
 
       def take_grad():
-        results.append(sess.run(takeg_t))
+        results.append(self.evaluate(takeg_t))
 
       threads = [self.checkedThread(target=take_grad) for _ in range(10)]
 
@@ -485,12 +485,12 @@ class ConditionalAccumulatorTest(test.TestCase):
       def apply_grad():
         time.sleep(1.0)
         for accum_op in accum_ops:
-          sess.run(accum_op)
+          self.evaluate(accum_op)
 
       return_array = []
 
       def take_grad():
-        return_array.append(sess.run(takeg_t))
+        return_array.append(self.evaluate(takeg_t))
 
       accum_thread = self.checkedThread(target=apply_grad)
       takeg_thread = self.checkedThread(target=take_grad)
@@ -503,7 +503,7 @@ class ConditionalAccumulatorTest(test.TestCase):
 
   def _blocking_takeg(self, sess, takeg_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(takeg_op)
+      self.evaluate(takeg_op)
 
   def testAccumulatorCancel(self):
     with self.cached_session() as sess:
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 9a198d445f6..37654abd18e 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -593,7 +593,7 @@ class ControlFlowTest(test.TestCase):
       fn1 = lambda: [math_ops.add(x, 1), math_ops.add(x, 2)]
       fn2 = lambda: [y, y]
       r = control_flow_ops.cond(pred, fn1, fn2)
-      self.assertAllEqual([11, 12], sess.run(r))
+      self.assertAllEqual([11, 12], self.evaluate(r))
 
   def testCondListOutput(self):
     with self.cached_session() as sess:
@@ -603,7 +603,7 @@ class ControlFlowTest(test.TestCase):
       fn1 = lambda: [math_ops.add(x, y), math_ops.add(x, y)]
       fn2 = lambda: [y, y]
       r = control_flow_ops.cond(pred, fn1, fn2)
-      test_result = sess.run(r)
+      test_result = self.evaluate(r)
       self.assertListEqual([210, 210], test_result)
 
   def testTupleOutput(self):
@@ -614,7 +614,7 @@ class ControlFlowTest(test.TestCase):
       fn1 = lambda: (math_ops.add(x, y), math_ops.add(x, y))
       fn2 = lambda: (y, y)
       r = control_flow_ops.cond(pred, fn1, fn2)
-      test_result = sess.run(r)
+      test_result = self.evaluate(r)
       self.assertTupleEqual((210, 210), test_result)
 
   def testDictOutput(self):
@@ -625,7 +625,7 @@ class ControlFlowTest(test.TestCase):
       fn1 = lambda: {"a": math_ops.add(x, y), "b": math_ops.add(x, y)}
       fn2 = lambda: {"a": y, "b": y}
       r = control_flow_ops.cond(pred, fn1, fn2)
-      test_result = sess.run(r)
+      test_result = self.evaluate(r)
       self.assertDictEqual({"a": 210, "b": 210}, test_result)
 
   def testEmbeddedListOutput(self):
@@ -638,7 +638,7 @@ class ControlFlowTest(test.TestCase):
       # Pass strict=True flag as cond_v2 allows for tensors to be
       # in nested output structures as singletons
       r = control_flow_ops.cond(pred, fn1, fn2, strict=True)
-      test_result = sess.run(r)
+      test_result = self.evaluate(r)
       self.assertListEqual([[210, 210]], test_result)
 
   def testEmbeddedTupleOutput(self):
@@ -649,7 +649,7 @@ class ControlFlowTest(test.TestCase):
       fn1 = lambda: ((math_ops.add(x, y), math_ops.add(x, y)))
       fn2 = lambda: ((y, y))
       r = control_flow_ops.cond(pred, fn1, fn2)
-      test_result = sess.run(r)
+      test_result = self.evaluate(r)
       self.assertTupleEqual(((210, 210)), test_result)
 
   def testEmbeddedDictOutput(self):
@@ -662,7 +662,7 @@ class ControlFlowTest(test.TestCase):
       fn2 = lambda: {"a": {"c": y},
                      "b": {"d": y}}
       r = control_flow_ops.cond(pred, fn1, fn2)
-      test_result = sess.run(r)
+      test_result = self.evaluate(r)
       self.assertDictEqual({"a": {"c": 210}, "b": {"d": 210}}, test_result)
 
   def testCheckNestedOutputStruct(self):
@@ -677,7 +677,7 @@ class ControlFlowTest(test.TestCase):
       with self.assertRaisesRegexp(
           ValueError, v2_msg if control_flow_ops.ENABLE_COND_V2 else v1_msg):
         r = control_flow_ops.cond(pred, fn1, fn2)
-        test_result = sess.run(r)
+        self.evaluate(r)
 
   def testCondRef(self):
 
@@ -731,7 +731,7 @@ class ControlFlowTest(test.TestCase):
       with ops.control_dependencies([v_t_op]):
         orig_v = array_ops.identity(v)
       merged_op = control_flow_ops.merge([assign_v, orig_v])
-      self.assertAllEqual([1.0], sess.run(merged_op.output))
+      self.assertAllEqual([1.0], self.evaluate(merged_op.output))
 
   def testCondSwitchIdentity(self):
     # Make sure the recv identity is not removed by optimization.
@@ -745,7 +745,7 @@ class ControlFlowTest(test.TestCase):
         return control_flow_ops.Assert(False, ["Wrong branch!!!"])
 
       r = control_flow_ops.cond(pred, fn1, fn2)
-      sess.run(r)
+      self.evaluate(r)
 
   def testCondRecvIdentity(self):
     # Make sure the switch identity is not removed by optimization.
@@ -761,7 +761,7 @@ class ControlFlowTest(test.TestCase):
           return control_flow_ops.Assert(False, ["Wrong branch!!!"])
 
       r = control_flow_ops.cond(pred, fn1, fn2)
-      sess.run(r)
+      self.evaluate(r)
 
   def testCondGrad_1(self):
     with self.cached_session():
@@ -1050,7 +1050,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(r[0].dtype, dtypes.int32)
       self.assertEqual(r[1].dtype, dtypes.int32_ref)
 
-      value_i, value_x = sess.run(r)
+      value_i, value_x = self.evaluate(r)
 
     self.assertEqual(100, value_i)
     self.assertEqual(0, value_x)
@@ -1642,7 +1642,7 @@ class ControlFlowTest(test.TestCase):
       with ops.control_dependencies([control_flow_ops.no_op()]):
         loop = control_flow_ops.while_loop(cond, body,
                                            (constant_op.constant(5),))
-      self.assertEqual(0, sess.run(loop))
+      self.assertEqual(0, self.evaluate(loop))
 
   @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
   def testWhileCondWithControl_1(self):
@@ -2055,7 +2055,7 @@ class ControlFlowTest(test.TestCase):
         self.assertFalse(gpu_dev_name in dev)
 
     with self.session(graph=graph) as sess:
-      self.assertAllClose(1024.0, sess.run(r))
+      self.assertAllClose(1024.0, self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/116351701 (colocation)")
   def testWhileGrad_ColocateGradients(self):
@@ -2133,7 +2133,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [v], parallel_iterations=p_iters)
 
       grad_a, grad_v = gradients_impl.gradients(r, [a, v])
-      grad_a_val, grad_v_val = sess.run([grad_a, grad_v])
+      grad_a_val, grad_v_val = self.evaluate([grad_a, grad_v])
       self.assertAllClose(216.0, grad_a_val)
       self.assertAllClose(81.0, grad_v_val)
 
@@ -2264,7 +2264,7 @@ class ControlFlowTest(test.TestCase):
     i, x = control_flow_ops.while_loop(lambda i, x: i < 3, outer_body, [0, 0.0])
 
     with self.cached_session() as sess:
-      i_val, x_val = sess.run([i, x])
+      i_val, x_val = self.evaluate([i, x])
       self.assertEqual(i_val, 3)
       self.assertAllClose(x_val, 1.0)
 
@@ -2293,7 +2293,7 @@ class ControlFlowTest(test.TestCase):
 
       r_flattened = nest.flatten(r)
       self.assertEqual([100.0, 1.0, 102.0, 3.0, 4.0 + 100 * 2.0],
-                       sess.run(r_flattened))
+                       self.evaluate(r_flattened))
 
   def testWhile_NestedBadArityFails(self):
     with self.cached_session():
@@ -2547,8 +2547,8 @@ class ControlFlowTest(test.TestCase):
       res = outer_loop(inp)
       optimizer = adam.AdamOptimizer(learning_rate=0.001)
       train_op = optimizer.minimize(math_ops.reduce_mean(math_ops.square(res)))
-      sess.run(variables.global_variables_initializer())
-      sess.run(train_op)
+      self.evaluate(variables.global_variables_initializer())
+      self.evaluate(train_op)
       self.assertAllClose(2.999, self.evaluate(var))
 
   def _testWhileCondGrad_Simple(self, use_gpu):
@@ -2607,11 +2607,11 @@ class ControlFlowTest(test.TestCase):
           [i0.get_shape(), tensor_shape.TensorShape([None, 2])])
       s = math_ops.reduce_sum(h)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       optimizer = gradient_descent.GradientDescentOptimizer(0.01)
       op = optimizer.minimize(s)
-      sess.run(op)
-      self.assertAllClose([[0.98000002, 1.98000002]], sess.run(x))
+      self.evaluate(op)
+      self.assertAllClose([[0.98000002, 1.98000002]], self.evaluate(x))
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileWithRefsWithGradients_1(self):
@@ -2705,7 +2705,7 @@ class ControlFlowTest(test.TestCase):
 
       output_grad = control_flow_ops.while_loop(
           c, b, [i0, constant_op.constant(0.0)])
-      self.assertAllClose(600.0, sess.run(output_grad)[1])
+      self.assertAllClose(600.0, self.evaluate(output_grad)[1])
 
   def testWhileAndTensorArray(self):
     with self.cached_session() as sess:
@@ -2724,7 +2724,7 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(c, b, [n0, y0], parallel_iterations=1)
       r = gradients_impl.gradients(r, param)[0]
-      self.assertAllClose(107520.0, sess.run(r))
+      self.assertAllClose(107520.0, self.evaluate(r))
 
   def testWhileGrad_StopGrad(self):
     with self.cached_session():
@@ -2857,8 +2857,8 @@ class ControlFlowTest(test.TestCase):
     dy_dq, = gradients_impl.gradients(y, q)
     self.assertIsNotNone(dy_dq)
     with self.cached_session() as sess:
-      sess.run(q.initializer)
-      self.assertAllClose([0., 0.], sess.run(dy_dq))
+      self.evaluate(q.initializer)
+      self.assertAllClose([0., 0.], self.evaluate(dy_dq))
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   def testWhileGradientWithNontrainablePath2(self):
@@ -2875,8 +2875,8 @@ class ControlFlowTest(test.TestCase):
     dy_dq, = gradients_impl.gradients(y, q)
     self.assertIsNotNone(dy_dq)
     with self.cached_session() as sess:
-      sess.run(q.initializer)
-      self.assertAllClose([1., 1.], sess.run(dy_dq))
+      self.evaluate(q.initializer)
+      self.assertAllClose([1., 1.], self.evaluate(dy_dq))
 
   @test_util.disable_control_flow_v2("b/115920078 (gradients)")
   def testIssue16504(self):
@@ -3033,19 +3033,19 @@ class ControlFlowTest(test.TestCase):
           ((x > y, a), (x > y, b)), default=c, exclusive=True)
 
       variables.global_variables_initializer().run()
-      self.assertAllEqual(sess.run([v0, v1, v2]), [-1] * 3)
+      self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1] * 3)
       self.assertEqual(2, self.evaluate(r2))
-      self.assertAllEqual(sess.run([v0, v1, v2]), [-1, -1, 2])
+      self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1, -1, 2])
 
       variables.global_variables_initializer().run()
-      self.assertAllEqual(sess.run([v0, v1, v2]), [-1] * 3)
+      self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1] * 3)
       self.assertEqual(1, self.evaluate(r1))
-      self.assertAllEqual(sess.run([v0, v1, v2]), [-1, 1, -1])
+      self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1, 1, -1])
 
       variables.global_variables_initializer().run()
-      self.assertAllEqual(sess.run([v0, v1, v2]), [-1] * 3)
+      self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1] * 3)
       self.assertEqual(0, self.evaluate(r0))
-      self.assertAllEqual(sess.run([v0, v1, v2]), [0, -1, -1])
+      self.assertAllEqual(self.evaluate([v0, v1, v2]), [0, -1, -1])
 
   @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
   def testOneOpCond(self):
@@ -3083,7 +3083,7 @@ class ControlFlowTest(test.TestCase):
 
       # Fetching v directly will result in an uninitialized error
       with self.assertRaisesOpError("Attempting to use uninitialized value"):
-        sess.run([c, v])
+        self.evaluate([c, v])
 
       # Use a control dependency to ensure init_variable is run
       # while asking for c
@@ -3091,7 +3091,7 @@ class ControlFlowTest(test.TestCase):
           name="real_tensor",
           output_tensor=v._ref(),  # pylint: disable=protected-access
           dependencies=[v.initializer])
-      c_val, real_v_val = sess.run([c, real_v])
+      c_val, real_v_val = self.evaluate([c, real_v])
 
     # Ensure the result of 'real_c' is the same as 'c'
     self.assertAllEqual(10, c_val)
@@ -3184,7 +3184,7 @@ class ControlFlowTest(test.TestCase):
 
       # Runs "init" before fetching v1 and v2.
       init.run()
-      v1_val, v2_val = sess.run([v1, v2])
+      v1_val, v2_val = self.evaluate([v1, v2])
 
     # Ensure that v1 and v2 are initialized
     self.assertAllClose([0.0], v1_val)
@@ -3295,7 +3295,7 @@ class ControlFlowTest(test.TestCase):
 
       result = control_flow_ops.while_loop(condition, body,
                                            [constant_op.constant(4)])
-      self.assertEqual(10, sess.run(result))
+      self.assertEqual(10, self.evaluate(result))
 
       # Ensure that we cannot run a tensor that escapes the loop body
       # accidentally.
@@ -3345,7 +3345,7 @@ class ControlFlowTest(test.TestCase):
       cond = constant_op.constant(True, dtypes.bool)
       v_f, v_t = control_flow_ops.switch(constant_qint, cond)
       result = control_flow_ops.merge([v_f, v_t])
-      sess.run(result)
+      self.evaluate(result)
 
   def testQIntRefSwitchMerge(self):
     with self.cached_session(use_gpu=test.is_gpu_available()) as sess:
@@ -3353,12 +3353,12 @@ class ControlFlowTest(test.TestCase):
           shape=[1], dtype=dtypes.qint8, name="v", container="", shared_name="")
       assign_op = state_ops.assign(
           var_qint, constant_op.constant(np.array([42]), dtypes.qint8))
-      sess.run(assign_op)
+      self.evaluate(assign_op)
 
       cond = constant_op.constant(True, dtypes.bool)
       v_f, v_t = control_flow_ops.ref_switch(var_qint, cond)
       result = control_flow_ops.ref_merge([v_f, v_t])
-      sess.run(result)
+      self.evaluate(result)
 
   def testUInt64SwitchMerge(self):
     with self.cached_session(force_gpu=test.is_gpu_available()) as sess:
@@ -3366,7 +3366,7 @@ class ControlFlowTest(test.TestCase):
       cond = constant_op.constant(True, dtypes.bool)
       v_f, v_t = control_flow_ops.switch(constant_uint64, cond)
       result = control_flow_ops.merge([v_f, v_t])
-      sess.run(result)
+      self.evaluate(result)
 
   def testQIntArgAndRet(self):
 
@@ -3377,7 +3377,7 @@ class ControlFlowTest(test.TestCase):
     with self.cached_session(force_gpu=test.is_gpu_available()) as sess:
       qint = constant_op.constant(np.array([42]), dtypes.qint8)
       result = func(qint)
-      sess.run(result)
+      self.evaluate(result)
 
 
 class ControlFlowContextCheckTest(test.TestCase):
@@ -3682,7 +3682,7 @@ class WhileOpBenchmark(test.Benchmark):
     with session.Session() as sess, ops.device(default_device):
       # Get the initial id i, input x, and kernel.
       i, x, kernel = self._getInitVariables()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       if static_unroll:
         for _ in xrange(steps):
@@ -3701,11 +3701,11 @@ class WhileOpBenchmark(test.Benchmark):
 
       for _ in xrange(3):
         # exclude warm up time
-        sess.run(r)
+        self.evaluate(r)
 
       start_time = time.time()
       for _ in xrange(num_iters):
-        sess.run(r)
+        self.evaluate(r)
       return (time.time() - start_time) / num_iters
 
   def benchmarkWhileOpCrossDevicePlacement(self):
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index 3924e135752..3ec5c29df7d 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -109,7 +109,7 @@ class Conv3DTest(test.TestCase):
         results.append(result)
 
       with self.cached_session() as sess:
-        values = sess.run(results)
+        values = self.evaluate(results)
         for value in values:
           print("expected = ", expected)
           print("actual = ", value)
@@ -184,8 +184,8 @@ class Conv3DTest(test.TestCase):
         computed_results.append(computed)
         tolerance = 1e-2 if use_gpu else 1e-5
         with self.cached_session() as sess:
-          expected_values = sess.run(expected_results)
-          computed_values = sess.run(computed_results)
+          expected_values = self.evaluate(expected_results)
+          computed_values = self.evaluate(computed_results)
           for e_value, c_value in zip(expected_values, computed_values):
             print("expected = ", e_value)
             print("actual = ", c_value)
@@ -715,8 +715,8 @@ class Conv3DTest(test.TestCase):
         expected_grad = gradients_impl.gradients(expected, t1
                                                  if mode == "input" else t2)[0]
         # "values" consists of two tensors for two backprops
-        actual_value = sess.run(actual_grad)
-        expected_value = sess.run(expected_grad)
+        actual_value = self.evaluate(actual_grad)
+        expected_value = self.evaluate(expected_grad)
         self.assertShapeEqual(actual_value, actual_grad)
         self.assertShapeEqual(expected_value, expected_grad)
       print("expected = ", expected_value)
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index 835cc1504d1..2f6f3bb383b 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -908,8 +908,8 @@ class Conv2DTest(test.TestCase):
         conv = gradients_impl.gradients(conv_forward, t1)[0]
         conv_2 = gradients_impl.gradients(conv_forward_2, t1)[0]
         # "values" consists of two tensors for two backprops
-        value = sess.run(conv)
-        value_2 = sess.run(conv_2)
+        value = self.evaluate(conv)
+        value_2 = self.evaluate(conv_2)
         self.assertShapeEqual(value, conv)
         self.assertShapeEqual(value_2, conv_2)
       tf_logging.info("expected = ", value_2)
@@ -961,8 +961,8 @@ class Conv2DTest(test.TestCase):
           conv_forward_2 = test_util.NCHWToNHWC(conv_forward_2)
         conv = gradients_impl.gradients(conv_forward, t2)[0]
         conv_2 = gradients_impl.gradients(conv_forward, t2)[0]
-        value = sess.run(conv)
-        value_2 = sess.run(conv_2)
+        value = self.evaluate(conv)
+        value_2 = self.evaluate(conv_2)
         self.assertShapeEqual(value, conv)
         self.assertShapeEqual(value_2, conv_2)
       tf_logging.info("expected = ", value_2)
@@ -1545,7 +1545,7 @@ class DepthwiseConv2DTest(test.TestCase):
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
       conv = nn_impl.depthwise_conv2d(
           t1, t2, strides=[1, stride, stride, 1], padding=padding)
-      value = sess.run(conv)
+      value = self.evaluate(conv)
     tf_logging.info("value = ", value)
     self.assertArrayNear(expected, np.ravel(value), 1e-5)
     self.assertShapeEqual(value, conv)
@@ -1667,7 +1667,7 @@ class SeparableConv2DTest(test.TestCase):
       if data_format == "NCHW":
         conv = array_ops.transpose(conv, [0, 2, 3, 1])
 
-      value = sess.run(conv)
+      value = self.evaluate(conv)
     tf_logging.info("value = ", value)
     self.assertArrayNear(expected, np.ravel(value), 1e-3)
     self.assertShapeEqual(value, conv)
@@ -1774,10 +1774,10 @@ class DeepConv2DTest(test.TestCase):
       conv = nn_ops.conv2d(t1, t2, strides=strides, padding=padding)
 
       os.environ["TF_USE_DEEP_CONV2D"] = "0"
-      values_expect = sess.run([conv])
+      values_expect = self.evaluate([conv])
 
       os.environ["TF_USE_DEEP_CONV2D"] = "1"
-      values_test = sess.run([conv])
+      values_test = self.evaluate([conv])
 
       self.assertAllClose(values_expect, values_test, rtol=1e-5, atol=1e-5)
 
diff --git a/tensorflow/python/kernel_tests/ctc_loss_op_test.py b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
index b38776ec5bb..36cae2846cd 100644
--- a/tensorflow/python/kernel_tests/ctc_loss_op_test.py
+++ b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
@@ -98,12 +98,12 @@ class CTCLossTest(test.TestCase):
       self.assertShapeEqual(grad_truth, grad)
 
       if expected_err_re is None:
-        (tf_loss, tf_grad) = sess.run([loss, grad])
+        (tf_loss, tf_grad) = self.evaluate([loss, grad])
         self.assertAllClose(tf_loss, loss_truth, atol=1e-6)
         self.assertAllClose(tf_grad, grad_truth, atol=1e-6)
       else:
         with self.assertRaisesOpError(expected_err_re):
-          sess.run([loss, grad])
+          self.evaluate([loss, grad])
 
   def testBasic(self):
     """Test two batch entries."""
@@ -266,7 +266,7 @@ class CTCLossTest(test.TestCase):
           sequence_length=seq_lens,
           time_major=False)
 
-      (tf_loss, tf_loss_transposed) = sess.run([loss, loss_transposed])
+      (tf_loss, tf_loss_transposed) = self.evaluate([loss, loss_transposed])
       self.assertAllEqual(tf_loss, tf_loss_transposed)
 
   def testInvalidSecondGradient(self):
@@ -332,9 +332,10 @@ class CTCLossTestV2(test.TestCase):
 
     def assert_same_loss_and_grads(loss):
       with self.cached_session() as sess:
-        self.assertAllClose(*sess.run([loss, ref_loss]))
+        self.assertAllClose(*self.evaluate([loss, ref_loss]))
         grad = gradients_impl.gradients(loss, [logits])
-        self.assertAllClose(*sess.run([grad, ref_grad]), rtol=2e-06, atol=2e-06)
+        self.assertAllClose(
+            *self.evaluate([grad, ref_grad]), rtol=2e-06, atol=2e-06)
 
     assert_same_loss_and_grads(
         ctc_ops.ctc_loss_v2(
@@ -391,9 +392,11 @@ class CTCLossTestV2(test.TestCase):
 
       with self.cached_session() as sess:
         for _ in range(32):
-          self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss]))
-          self.assertAllClose(*sess.run([ctc_loss_grads, tf_nn_ctc_grads]),
-                              rtol=2e-06, atol=2e-06)
+          self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
+          self.assertAllClose(
+              *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
+              rtol=2e-06,
+              atol=2e-06)
 
   def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
     random_seed.set_random_seed(5)
@@ -442,9 +445,11 @@ class CTCLossTestV2(test.TestCase):
 
     with self.cached_session() as sess:
       for _ in range(32):
-        self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss]))
-        self.assertAllClose(*sess.run([ctc_loss_grads, tf_nn_ctc_grads]),
-                            rtol=2e-06, atol=2e-06)
+        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
+        self.assertAllClose(
+            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
+            rtol=2e-06,
+            atol=2e-06)
 
   def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self):
     random_seed.set_random_seed(5)
@@ -496,9 +501,11 @@ class CTCLossTestV2(test.TestCase):
 
     with self.cached_session() as sess:
       for _ in range(32):
-        self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss]))
-        self.assertAllClose(*sess.run([ctc_loss_grads, tf_nn_ctc_grads]),
-                            rtol=2e-06, atol=2e-06)
+        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
+        self.assertAllClose(
+            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
+            rtol=2e-06,
+            atol=2e-06)
 
   def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self):
     with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
@@ -542,9 +549,11 @@ class CTCLossTestV2(test.TestCase):
 
       with self.cached_session() as sess:
         for _ in range(32):
-          self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss]))
-          self.assertAllClose(*sess.run([ctc_loss_grads, tf_nn_ctc_grads]),
-                              rtol=2e-06, atol=2e-06)
+          self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
+          self.assertAllClose(
+              *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
+              rtol=2e-06,
+              atol=2e-06)
 
   def testCollapseRepeated(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
diff --git a/tensorflow/python/kernel_tests/cwise_ops_binary_test.py b/tensorflow/python/kernel_tests/cwise_ops_binary_test.py
index df166b61019..fc7d4572e2d 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_binary_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_binary_test.py
@@ -83,17 +83,17 @@ class BinaryOpTest(test.TestCase):
       out = tf_func(inx, iny)
       tf_cpu = self.evaluate(out)
       # Test that the op takes precedence over numpy operators.
-      np_left = tf_func(x, iny).eval()
-      np_right = tf_func(inx, y).eval()
+      np_left = self.evaluate(tf_func(x, iny))
+      np_right = self.evaluate(tf_func(inx, y))
 
       if also_compare_variables:
         var_x = variables.Variable(x)
         var_y = variables.Variable(y)
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         print(type(x), type(y), type(var_x), type(var_y))
         print(type(tf_func(x, var_y)), type(tf_func(var_x, y)))
-        np_var_left = tf_func(x, var_y).eval()
-        np_var_right = tf_func(var_x, y).eval()
+        np_var_left = self.evaluate(tf_func(x, var_y))
+        np_var_right = self.evaluate(tf_func(var_x, y))
 
     if np_ans.dtype != np.object:
       self.assertAllClose(np_ans, tf_cpu)
@@ -253,7 +253,7 @@ class BinaryOpTest(test.TestCase):
     var_x = variables.Variable(x)
     var_y = variables.Variable(y)
     with self.cached_session() as sess:
-      sess.run([var_x.initializer, var_y.initializer])
+      self.evaluate([var_x.initializer, var_y.initializer])
       left_result = (var_x * y).eval()
       right_result = (x * var_y).eval()
     np_result = x * y
@@ -385,7 +385,7 @@ class BinaryOpTest(test.TestCase):
     with self.test_session(use_gpu=False) as sess:
       cmp_eq = math_ops.equal(x, y)
       cmp_not_eq = math_ops.not_equal(x, y)
-      values = sess.run([cmp_eq, cmp_not_eq])
+      values = self.evaluate([cmp_eq, cmp_not_eq])
       self.assertAllEqual([[True, True], [False, False]], values[0])
       self.assertAllEqual([[False, False], [True, True]], values[1])
 
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index d7dbf5ab9ac..ab116c400a5 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -572,7 +572,7 @@ class MinMaxOpTest(test.TestCase):
       inx = ops.convert_to_tensor(x)
       iny = ops.convert_to_tensor(y)
       omin, omax = math_ops.minimum(inx, iny), math_ops.maximum(inx, iny)
-      tf_min, tf_max = sess.run([omin, omax])
+      tf_min, tf_max = self.evaluate([omin, omax])
     self.assertAllEqual(np_min, tf_min)
     self.assertAllEqual(np_max, tf_max)
 
@@ -662,8 +662,8 @@ class MathOpsOverloadTest(test.TestCase):
   def _compareUnary(self, x, dtype, np_func, tf_func):
     np_ans = np_func(x).astype(dtype.as_numpy_dtype)
     with self.test_session(use_gpu=False):
-      self.assertAllClose(np_ans,
-                          tf_func(ops.convert_to_tensor(x, dtype=dtype)).eval())
+      self.assertAllClose(
+          np_ans, self.evaluate(tf_func(ops.convert_to_tensor(x, dtype=dtype))))
 
   def testOverload(self):
     dtypes = [
@@ -736,7 +736,7 @@ class IsFiniteInfNanTest(test.TestCase):
       inx = ops.convert_to_tensor(x)
       ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
           inx), math_ops.is_nan(inx)
-      tf_finite, tf_inf, tf_nan = sess.run([ofinite, oinf, onan])
+      tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan])
     self.assertAllEqual(np_inf, tf_inf)
     self.assertAllEqual(np_nan, tf_nan)
     self.assertAllEqual(np_finite, tf_finite)
@@ -788,7 +788,7 @@ class RoundingTest(test.TestCase):
     y = np.rint(x) if y is None else np.asarray(y)
     with self.cached_session() as sess:
       tf_rint = math_ops.rint(x)
-      np_rint = sess.run(tf_rint)
+      np_rint = self.evaluate(tf_rint)
     self.assertAllEqual(y, np_rint)
     self.assertShapeEqual(y, tf_rint)
 
@@ -797,7 +797,7 @@ class RoundingTest(test.TestCase):
     with self.cached_session() as sess:
       inx = ops.convert_to_tensor(x)
       ofloor, oceil = math_ops.floor(inx), math_ops.ceil(inx)
-      tf_floor, tf_ceil = sess.run([ofloor, oceil])
+      tf_floor, tf_ceil = self.evaluate([ofloor, oceil])
     self.assertAllEqual(np_floor, tf_floor)
     self.assertAllEqual(np_ceil, tf_ceil)
     self.assertShapeEqual(np_floor, ofloor)
@@ -881,7 +881,7 @@ class ComplexMakeRealImagTest(test.TestCase):
         force_gpu=use_gpu and test_util.is_gpu_available()) as sess:
       inx = ops.convert_to_tensor(cplx)
       tf_angle = math_ops.angle(inx)
-      tf_angle_val = sess.run(tf_angle)
+      tf_angle_val = self.evaluate(tf_angle)
     self.assertAllEqual(np_angle, tf_angle_val)
     self.assertShapeEqual(np_angle, tf_angle)
 
diff --git a/tensorflow/python/kernel_tests/decode_image_op_test.py b/tensorflow/python/kernel_tests/decode_image_op_test.py
index 7a8743e11f0..267afdeb5e1 100644
--- a/tensorflow/python/kernel_tests/decode_image_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_image_op_test.py
@@ -40,7 +40,7 @@ class DecodeImageOpTest(test.TestCase):
       bmp0 = io_ops.read_file(path)
       image0 = image_ops.decode_image(bmp0)
       image1 = image_ops.decode_bmp(bmp0)
-      bmp0, image0, image1 = sess.run([bmp0, image0, image1])
+      bmp0, image0, image1 = self.evaluate([bmp0, image0, image1])
       self.assertEqual(len(bmp0), 4194)
       self.assertAllEqual(image0, image1)
 
@@ -56,7 +56,7 @@ class DecodeImageOpTest(test.TestCase):
       gif0 = io_ops.read_file(path)
       image0 = image_ops.decode_image(gif0)
       image1 = image_ops.decode_gif(gif0)
-      gif0, image0, image1 = sess.run([gif0, image0, image1])
+      gif0, image0, image1 = self.evaluate([gif0, image0, image1])
 
       self.assertEqual(image0.shape, shape)
       self.assertAllEqual(image0, image1)
@@ -85,7 +85,7 @@ class DecodeImageOpTest(test.TestCase):
       jpeg0 = io_ops.read_file(path)
       image0 = image_ops.decode_image(jpeg0)
       image1 = image_ops.decode_jpeg(jpeg0)
-      jpeg0, image0, image1 = sess.run([jpeg0, image0, image1])
+      jpeg0, image0, image1 = self.evaluate([jpeg0, image0, image1])
       self.assertEqual(len(jpeg0), 3771)
       self.assertEqual(image0.shape, (256, 128, 3))
       self.assertAllEqual(image0, image1)
@@ -104,7 +104,7 @@ class DecodeImageOpTest(test.TestCase):
           png0 = io_ops.read_file(path)
           image0 = image_ops.decode_image(png0, channels=channels)
           image1 = image_ops.decode_png(png0, channels=channels)
-          png0, image0, image1 = sess.run([png0, image0, image1])
+          png0, image0, image1 = self.evaluate([png0, image0, image1])
           self.assertEqual(image0.shape, (26, 51, channels or channels_in))
           self.assertAllEqual(image0, image1)
 
diff --git a/tensorflow/python/kernel_tests/decode_jpeg_op_test.py b/tensorflow/python/kernel_tests/decode_jpeg_op_test.py
index 66b3e0f22fd..f8fc28062f4 100644
--- a/tensorflow/python/kernel_tests/decode_jpeg_op_test.py
+++ b/tensorflow/python/kernel_tests/decode_jpeg_op_test.py
@@ -80,7 +80,7 @@ class DecodeJpegBenchmark(test.Benchmark):
           initializer=image_ops.encode_jpeg(tiled_image))
 
     with session.Session() as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       images = []
       for _ in xrange(parallelism):
         if crop_window is None:
@@ -105,11 +105,11 @@ class DecodeJpegBenchmark(test.Benchmark):
 
       for _ in xrange(3):
         # Skip warm up time.
-        sess.run(r)
+        self.evaluate(r)
 
       start_time = time.time()
       for _ in xrange(num_iters):
-        sess.run(r)
+        self.evaluate(r)
       end_time = time.time()
     return end_time - start_time
 
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
index 3ed7dba966b..0676664685d 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
@@ -43,7 +43,7 @@ class AssignOpTest(test.TestCase):
       variables.global_variables_initializer().run()
 
       def run_add(add_op):
-        sess.run(add_op)
+        self.evaluate(add_op)
 
       threads = [
           self.checkedThread(
@@ -70,7 +70,7 @@ class AssignOpTest(test.TestCase):
       variables.global_variables_initializer().run()
 
       def run_assign(assign_op):
-        sess.run(assign_op)
+        self.evaluate(assign_op)
 
       threads = [
           self.checkedThread(
@@ -103,7 +103,7 @@ class AssignOpTest(test.TestCase):
       p.initializer.run()
 
       def run_add(add_op):
-        sess.run(add_op)
+        self.evaluate(add_op)
 
       threads = [
           self.checkedThread(
@@ -131,7 +131,7 @@ class AssignOpTest(test.TestCase):
       p.initializer.run()
 
       def run_assign(assign_op):
-        sess.run(assign_op)
+        self.evaluate(assign_op)
 
       threads = [
           self.checkedThread(
diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py
index c4bed110803..19f145865fd 100644
--- a/tensorflow/python/kernel_tests/depthtospace_op_test.py
+++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py
@@ -277,7 +277,7 @@ class DepthToSpaceTest(test.TestCase):
       actual = array_ops.depth_to_space(t, block_size, data_format=data_format)
 
     with self.session(use_gpu=use_gpu) as sess:
-      actual_vals, expected_vals = sess.run([actual, expected])
+      actual_vals, expected_vals = self.evaluate([actual, expected])
       self.assertTrue(np.array_equal(actual_vals, expected_vals))
 
   def testAgainstTranspose(self):
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index f65d0be3675..f6d834c2f85 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -162,7 +162,7 @@ class DepthwiseConv2DTest(test.TestCase):
         conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1])
 
       try:
-        native_result = sess.run(conv_native)
+        native_result = self.evaluate(conv_native)
       except errors.InvalidArgumentError as e:
         # Grouped convolution kernel is only registered for cuDNN 7. Silently
         # return when we are running on an earlier version or without GPU.
@@ -174,7 +174,7 @@ class DepthwiseConv2DTest(test.TestCase):
 
       conv_interface = nn_impl.depthwise_conv2d(
           t1, t2, strides=[1, stride, stride, 1], padding=padding)
-      interface_result = sess.run(conv_interface)
+      interface_result = self.evaluate(conv_interface)
 
     tf_logging.info(
         "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f",
@@ -269,7 +269,7 @@ class DepthwiseConv2DTest(test.TestCase):
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
       conv = nn_ops.depthwise_conv2d_native(
           t1, t2, strides=[1, stride, stride, 1], padding=padding)
-      value = sess.run(conv)
+      value = self.evaluate(conv)
     tf_logging.info("value = %r", value)
     self.assertArrayNear(expected, np.ravel(value), 1e-5)
     self.assertShapeEqual(value, conv)
diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py
index 602ceb6ebd9..d6ef9e70b83 100644
--- a/tensorflow/python/kernel_tests/determinant_op_test.py
+++ b/tensorflow/python/kernel_tests/determinant_op_test.py
@@ -156,7 +156,7 @@ class DeterminantOpTest(test.TestCase):
       matrix2 = random_ops.random_normal([5, 5], seed=42)
       det1 = linalg_ops.matrix_determinant(matrix1)
       det2 = linalg_ops.matrix_determinant(matrix2)
-      det1_val, det2_val = sess.run([det1, det2])
+      det1_val, det2_val = self.evaluate([det1, det2])
       self.assertEqual(det1_val, det2_val)
 
 
diff --git a/tensorflow/python/kernel_tests/distributions/categorical_test.py b/tensorflow/python/kernel_tests/distributions/categorical_test.py
index f116c54bd16..9c593d2737a 100644
--- a/tensorflow/python/kernel_tests/distributions/categorical_test.py
+++ b/tensorflow/python/kernel_tests/distributions/categorical_test.py
@@ -287,7 +287,7 @@ class CategoricalTest(test.TestCase, parameterized.TestCase):
     }
 
     with self.cached_session() as sess:
-      run_result = sess.run(to_run)
+      run_result = self.evaluate(to_run)
 
     self.assertAllEqual(run_result["cat_prob"].shape,
                         run_result["norm_prob"].shape)
@@ -462,7 +462,7 @@ class CategoricalTest(test.TestCase, parameterized.TestCase):
           b = categorical.Categorical(logits=b_logits)
 
           kl = kullback_leibler.kl_divergence(a, b)
-          kl_val = sess.run(kl)
+          kl_val = self.evaluate(kl)
           # Make sure KL(a||a) is 0
           kl_same = sess.run(kullback_leibler.kl_divergence(a, a))
 
diff --git a/tensorflow/python/kernel_tests/distributions/special_math_test.py b/tensorflow/python/kernel_tests/distributions/special_math_test.py
index 6b6de8b1393..0f800b95fac 100644
--- a/tensorflow/python/kernel_tests/distributions/special_math_test.py
+++ b/tensorflow/python/kernel_tests/distributions/special_math_test.py
@@ -448,7 +448,7 @@ class LogCDFLaplaceTest(test.TestCase):
       actual = sm.log_cdf_laplace(grid)
       grad = gradients_impl.gradients(actual, grid)[0]
 
-      actual_, grad_ = sess.run([actual, grad])
+      actual_, grad_ = self.evaluate([actual, grad])
 
       # isfinite checks for NaN and Inf.
       self.assertAllTrue(np.isfinite(actual_))
@@ -467,7 +467,7 @@ class LogCDFLaplaceTest(test.TestCase):
       actual = sm.log_cdf_laplace(grid)
       grad = gradients_impl.gradients(actual, grid)[0]
 
-      actual_, grad_ = sess.run([actual, grad])
+      actual_, grad_ = self.evaluate([actual, grad])
 
       # isfinite checks for NaN and Inf.
       self.assertAllTrue(np.isfinite(actual_))
diff --git a/tensorflow/python/kernel_tests/distributions/util_test.py b/tensorflow/python/kernel_tests/distributions/util_test.py
index f4e651b25bb..d3fa513f059 100644
--- a/tensorflow/python/kernel_tests/distributions/util_test.py
+++ b/tensorflow/python/kernel_tests/distributions/util_test.py
@@ -805,7 +805,7 @@ class ReduceWeightedLogSumExp(test.TestCase):
       w = constant_op.constant(w_)
       actual, actual_sgn = du.reduce_weighted_logsumexp(
           logx, w, axis=-1, return_sign=True)
-      [actual_, actual_sgn_] = sess.run([actual, actual_sgn])
+      [actual_, actual_sgn_] = self.evaluate([actual, actual_sgn])
     self.assertAllEqual(expected, actual_)
     self.assertAllEqual([-1., -1, 1], actual_sgn_)
 
@@ -823,7 +823,7 @@ class ReduceWeightedLogSumExp(test.TestCase):
       w = constant_op.constant(w_)
       actual, actual_sgn = du.reduce_weighted_logsumexp(
           logx, w, axis=-1, return_sign=True, keep_dims=True)
-      [actual_, actual_sgn_] = sess.run([actual, actual_sgn])
+      [actual_, actual_sgn_] = self.evaluate([actual, actual_sgn])
     self.assertAllEqual(expected, actual_)
     self.assertAllEqual([[-1.], [-1], [1]], actual_sgn_)
 
diff --git a/tensorflow/python/kernel_tests/division_future_test.py b/tensorflow/python/kernel_tests/division_future_test.py
index e477bdc73b9..85c85809d3f 100644
--- a/tensorflow/python/kernel_tests/division_future_test.py
+++ b/tensorflow/python/kernel_tests/division_future_test.py
@@ -65,7 +65,7 @@ class DivisionTestCase(test.TestCase):
                 tf_floordiv = tf_x // tf_y
                 check(floordiv, tf_floordiv)
       # Do only one sess.run for speed
-      for f, (x, y) in zip(checks, sess.run(tensors)):
+      for f, (x, y) in zip(checks, self.evaluate(tensors)):
         f(x, y)
 
 
diff --git a/tensorflow/python/kernel_tests/division_past_test.py b/tensorflow/python/kernel_tests/division_past_test.py
index 63951b5b382..38bb18631ab 100644
--- a/tensorflow/python/kernel_tests/division_past_test.py
+++ b/tensorflow/python/kernel_tests/division_past_test.py
@@ -64,7 +64,7 @@ class DivisionTestCase(test.TestCase):
                 tf_floordiv = tf_x // tf_y
                 check(floordiv, tf_floordiv)
       # Do only one sess.run for speed
-      for f, (x, y) in zip(checks, sess.run(tensors)):
+      for f, (x, y) in zip(checks, self.evaluate(tensors)):
         f(x, y)
 
 
diff --git a/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py b/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
index c6558762809..6aa757e293e 100644
--- a/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
+++ b/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
@@ -87,7 +87,7 @@ class DrawBoundingBoxOpTest(test.TestCase):
       image = array_ops.expand_dims(image, 0)
       image = image_ops.draw_bounding_boxes(image, bboxes)
       with self.cached_session(use_gpu=False) as sess:
-        op_drawn_image = np.squeeze(sess.run(image), 0)
+        op_drawn_image = np.squeeze(self.evaluate(image), 0)
         self.assertAllEqual(test_drawn_image, op_drawn_image)
 
   def testDrawBoundingBoxRGBColorCycling(self):
diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
index 07da855a017..3622fde3f3a 100644
--- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
+++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py
@@ -40,7 +40,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([0, 13], partition_vals[0])
@@ -62,7 +62,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant([0, 0, 2, 3, 2, 1])
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([[0, 1, 2], [3, 4, 5]], partition_vals[0])
@@ -87,7 +87,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(2, len(partition_vals))
     self.assertAllEqual(part1, partition_vals[0])
@@ -109,7 +109,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=num_partitions)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(num_partitions, len(partition_vals))
     for i in range(num_partitions):
@@ -125,7 +125,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(2, len(partition_vals))
     self.assertAllEqual([3 + 4j, 7 + 8j], partition_vals[0])
@@ -138,7 +138,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = 3
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(4, len(partition_vals))
     self.assertAllEqual(np.array([], dtype=np.float64).reshape(-1, 4),
@@ -164,7 +164,7 @@ class DynamicPartitionTest(test.TestCase):
             outputs = data_flow_ops.dynamic_partition(
                 data_t, partitions_t, num_partitions=n)
             self.assertEqual(n, len(outputs))
-            outputs_val = sess.run(outputs)
+            outputs_val = self.evaluate(outputs)
             for i, output in enumerate(outputs_val):
               self.assertAllEqual(output, data[partitions == i])
 
@@ -183,7 +183,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(4, len(partition_vals))
     self.assertAllEqual([], partition_vals[0])
@@ -199,7 +199,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=3)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(3, len(partition_vals))
     self.assertAllEqual([[]], partition_vals[0])
@@ -215,7 +215,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(2, len(partition_vals))
     self.assertAllEqual([], partition_vals[0])
@@ -236,7 +236,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=2)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(2, len(partition_vals))
     self.assertAllEqual([6], partition_vals[0])
@@ -257,7 +257,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=5)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(5, len(partition_vals))
     self.assertAllEqual([5], partition_vals[0])
@@ -281,7 +281,7 @@ class DynamicPartitionTest(test.TestCase):
       indices = constant_op.constant(indices_list, dtype=dtypes.int32)
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=40)
-      partition_vals = sess.run(partitions)
+      partition_vals = self.evaluate(partitions)
 
     self.assertEqual(40, len(partition_vals))
     for i in range(40):
@@ -295,7 +295,7 @@ class DynamicPartitionTest(test.TestCase):
       partitions = data_flow_ops.dynamic_partition(
           data, indices, num_partitions=4)
       with self.assertRaisesOpError(r"partitions\[2\] = 99 is not in \[0, 4\)"):
-        sess.run(partitions)
+        self.evaluate(partitions)
 
   def testScalarIndexOutOfRange(self):
     with self.cached_session() as sess:
@@ -303,7 +303,7 @@ class DynamicPartitionTest(test.TestCase):
       data = np.zeros(5)
       partitions = data_flow_ops.dynamic_partition(data, bad, num_partitions=7)
       with self.assertRaisesOpError(r"partitions = 17 is not in \[0, 7\)"):
-        sess.run(partitions)
+        self.evaluate(partitions)
 
   def testHigherRankIndexOutOfRange(self):
     with self.cached_session() as sess:
@@ -335,7 +335,7 @@ class DynamicPartitionTest(test.TestCase):
     self.assertEqual(len(inds), x.shape[0])
     partitioned = data_flow_ops.dynamic_partition(x, inds, 16)
     with self.cached_session() as sess:
-      res = sess.run(partitioned)
+      res = self.evaluate(partitioned)
     self.assertEqual(res[-1].shape[0], 192)
 
 
diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py
index dba3409c9e1..39c0575cd55 100644
--- a/tensorflow/python/kernel_tests/embedding_ops_test.py
+++ b/tensorflow/python/kernel_tests/embedding_ops_test.py
@@ -294,7 +294,7 @@ class EmbeddingLookupTest(test.TestCase):
       variables.global_variables_initializer().run()
       params_values = [params[p_i.name] for p_i in p]
       # Test that the PartitionedVariable components equal the list in p
-      p_var_val = sess.run(list(p_variable))
+      p_var_val = self.evaluate(list(p_variable))
       # Actual test
       tf_result = embedding.eval(feed_dict=feed_dict)
     np_result, _, _ = _EmbeddingResult(params, id_vals, num_shards, vocab_size)
@@ -316,7 +316,7 @@ class EmbeddingLookupTest(test.TestCase):
       variables.global_variables_initializer().run()
       params_values = [params[p_i.name] for p_i in p]
       # Test that the PartitionedVariable components equal the list in p
-      p_var_val = sess.run(list(p_variable))
+      p_var_val = self.evaluate(list(p_variable))
       # Actual test
       print(ops.get_default_graph().as_graph_def())
       tf_result = self.evaluate(embedding)
diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py
index e3742f2e724..9655351a01e 100644
--- a/tensorflow/python/kernel_tests/fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/fifo_queue_test.py
@@ -159,7 +159,7 @@ class FIFOQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       threads = [
           self.checkedThread(
@@ -191,7 +191,7 @@ class FIFOQueueTest(test.TestCase):
       results = []
 
       def dequeue():
-        results.append(sess.run(dequeued_t))
+        results.append(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in enqueue_ops]
       for thread in threads:
@@ -240,13 +240,13 @@ class FIFOQueueTest(test.TestCase):
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
         for enqueue_op in enqueue_ops:
-          sess.run(enqueue_op)
+          self.evaluate(enqueue_op)
 
       results = []
 
       def dequeue():
         for _ in xrange(len(elems)):
-          results.append(sess.run(dequeued_t))
+          results.append(self.evaluate(dequeued_t))
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -269,7 +269,7 @@ class FIFOQueueTest(test.TestCase):
         enqueue_op.run()
 
       for i in xrange(len(elems)):
-        x_val, y_val = sess.run(dequeued_t)
+        x_val, y_val = self.evaluate(dequeued_t)
         x, y = elems[i]
         self.assertEqual([x], x_val)
         self.assertEqual([y], y_val)
@@ -356,7 +356,7 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       for i in range(8):
-        float_val, int_val = sess.run(dequeued_t)
+        float_val, int_val = self.evaluate(dequeued_t)
         self.assertEqual(float_elems[i % 4], float_val)
         self.assertAllEqual(int_elems[i % 4], int_val)
 
@@ -399,17 +399,17 @@ class FIFOQueueTest(test.TestCase):
 
       enqueue_op.run()
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[0:4], float_val)
       self.assertAllEqual(int_elems[0:4], int_val)
       self.assertEqual(float_val.shape, dequeued_t[0].get_shape())
       self.assertEqual(int_val.shape, dequeued_t[1].get_shape())
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[4:8], float_val)
       self.assertAllEqual(int_elems[4:8], int_val)
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       self.assertAllEqual(float_elems[8], float_val)
       self.assertAllEqual(int_elems[8], int_val)
       self.assertEqual(float_val.shape, dequeued_single_t[0].get_shape())
@@ -429,13 +429,13 @@ class FIFOQueueTest(test.TestCase):
 
       enqueue_op.run()
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[0:4], float_val)
       self.assertAllEqual(int_elems[0:4], int_val)
       self.assertEqual([None], dequeued_t[0].get_shape().as_list())
       self.assertEqual([None, 2], dequeued_t[1].get_shape().as_list())
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[4:8], float_val)
       self.assertAllEqual(int_elems[4:8], int_val)
 
@@ -529,7 +529,7 @@ class FIFOQueueTest(test.TestCase):
 
       # Enqueue 100 items in parallel on 10 threads.
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       threads = [self.checkedThread(target=enqueue) for _ in range(10)]
       for thread in threads:
@@ -552,7 +552,7 @@ class FIFOQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t))
+        dequeued_elems.extend(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in range(10)]
       for thread in threads:
@@ -576,7 +576,7 @@ class FIFOQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t))
+        dequeued_elems.extend(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in range(10)]
       for thread in threads:
@@ -596,11 +596,11 @@ class FIFOQueueTest(test.TestCase):
 
       def enqueue():
         for _ in xrange(100):
-          sess.run(enqueue_op)
+          self.evaluate(enqueue_op)
 
       def dequeue():
         for _ in xrange(100):
-          self.assertTrue(sess.run(dequeued_t) in (10.0, 20.0))
+          self.assertTrue(self.evaluate(dequeued_t) in (10.0, 20.0))
 
       enqueue_threads = [self.checkedThread(target=enqueue) for _ in range(10)]
       dequeue_threads = [self.checkedThread(target=dequeue) for _ in range(10)]
@@ -632,7 +632,7 @@ class FIFOQueueTest(test.TestCase):
 
       def dequeue():
         for i in xrange(250):
-          self.assertEqual(i, sess.run(dequeued_t))
+          self.assertEqual(i, self.evaluate(dequeued_t))
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -663,7 +663,7 @@ class FIFOQueueTest(test.TestCase):
       dequeuemany_t = q.dequeue_many(count_placeholder)
 
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       enqueue_thread = self.checkedThread(target=enqueue)
       enqueue_thread.start()
@@ -701,10 +701,10 @@ class FIFOQueueTest(test.TestCase):
         # The enqueue_op should run after the dequeue op has blocked.
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t).tolist())
+        dequeued_elems.extend(self.evaluate(dequeued_t).tolist())
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -728,10 +728,10 @@ class FIFOQueueTest(test.TestCase):
         # The enqueue_op should run after the dequeue op has blocked.
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t).tolist())
+        dequeued_elems.extend(self.evaluate(dequeued_t).tolist())
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -797,11 +797,11 @@ class FIFOQueueTest(test.TestCase):
 
       def dequeue():
         for elem in elems:
-          self.assertEqual([elem], sess.run(dequeued_t))
+          self.assertEqual([elem], self.evaluate(dequeued_t))
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -821,7 +821,7 @@ class FIFOQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -842,11 +842,11 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def dequeue():
-        self.assertAllEqual(elems, sess.run(dequeued_t))
+        self.assertAllEqual(elems, self.evaluate(dequeued_t))
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -867,11 +867,11 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def dequeue():
-        self.assertAllEqual(elems[:3], sess.run(dequeued_t))
+        self.assertAllEqual(elems[:3], self.evaluate(dequeued_t))
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -892,8 +892,8 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def dequeue():
-        self.assertAllEqual(elems[:3], sess.run(dequeued_t))
-        self.assertAllEqual(elems[3:], sess.run(dequeued_t))
+        self.assertAllEqual(elems[:3], self.evaluate(dequeued_t))
+        self.assertAllEqual(elems[3:], self.evaluate(dequeued_t))
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -913,16 +913,16 @@ class FIFOQueueTest(test.TestCase):
       cleanup_dequeue_t = q.dequeue()
 
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        self.assertAllEqual(elems[0:3], sess.run(dequeued_t))
+        self.assertAllEqual(elems[0:3], self.evaluate(dequeued_t))
         with self.assertRaises(errors_impl.OutOfRangeError):
-          sess.run(dequeued_t)
-        self.assertEqual(elems[3], sess.run(cleanup_dequeue_t))
+          self.evaluate(dequeued_t)
+        self.assertEqual(elems[3], self.evaluate(cleanup_dequeue_t))
 
       def close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       enqueue_thread = self.checkedThread(target=enqueue)
       enqueue_thread.start()
@@ -955,7 +955,7 @@ class FIFOQueueTest(test.TestCase):
 
       def dequeue():
         with self.assertRaises(errors_impl.OutOfRangeError):
-          sess.run([dequeued_a_t, dequeued_b_t])
+          self.evaluate([dequeued_a_t, dequeued_b_t])
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -968,7 +968,7 @@ class FIFOQueueTest(test.TestCase):
       # Test that the elements in the partially-dequeued batch are
       # restored in the correct order.
       for elem_a, elem_b in zip(elems_a, elems_b):
-        val_a, val_b = sess.run([cleanup_dequeue_a_t, cleanup_dequeue_b_t])
+        val_a, val_b = self.evaluate([cleanup_dequeue_a_t, cleanup_dequeue_b_t])
         self.assertEqual(elem_a, val_a)
         self.assertEqual(elem_b, val_b)
       self.assertEqual(0, q.size().eval())
@@ -983,7 +983,7 @@ class FIFOQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -1003,7 +1003,7 @@ class FIFOQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -1051,7 +1051,7 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread = self.checkedThread(target=blocking_enqueue)
       thread.start()
@@ -1074,7 +1074,7 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread = self.checkedThread(target=blocking_enqueue)
       thread.start()
@@ -1103,7 +1103,7 @@ class FIFOQueueTest(test.TestCase):
 
       def blocking_enqueue():
         # Expect the operation to succeed once the dequeue op runs.
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       enqueue_thread = self.checkedThread(target=blocking_enqueue)
       enqueue_thread.start()
@@ -1113,7 +1113,7 @@ class FIFOQueueTest(test.TestCase):
       time.sleep(0.1)
 
       def close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       close_thread = self.checkedThread(target=close)
       close_thread.start()
@@ -1138,7 +1138,7 @@ class FIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       enqueue_thread = self.checkedThread(target=blocking_enqueue)
       enqueue_thread.start()
@@ -1148,7 +1148,7 @@ class FIFOQueueTest(test.TestCase):
       time.sleep(0.1)
 
       def close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       close_thread = self.checkedThread(target=close)
       close_thread.start()
@@ -1266,19 +1266,19 @@ class FIFOQueueTest(test.TestCase):
 
   def _blockingDequeue(self, sess, dequeue_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_op)
+      self.evaluate(dequeue_op)
 
   def _blockingDequeueMany(self, sess, dequeue_many_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_many_op)
+      self.evaluate(dequeue_many_op)
 
   def _blockingEnqueue(self, sess, enqueue_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(enqueue_op)
+      self.evaluate(enqueue_op)
 
   def _blockingEnqueueMany(self, sess, enqueue_many_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(enqueue_many_op)
+      self.evaluate(enqueue_many_op)
 
   def testResetOfBlockingOperation(self):
     with self.cached_session() as sess:
@@ -1321,7 +1321,7 @@ class FIFOQueueTest(test.TestCase):
       def blocking_enqueue():
         enq_done.append(False)
         # This will fill the queue and then block until enough dequeues happen.
-        sess.run(enq)
+        self.evaluate(enq)
         enq_done.append(True)
 
       thread = self.checkedThread(target=blocking_enqueue)
@@ -1331,14 +1331,14 @@ class FIFOQueueTest(test.TestCase):
       results = []
       results.append(deq.eval())  # Will only complete after the enqueue starts.
       self.assertEqual(len(enq_done), 1)
-      self.assertEqual(sess.run(size_op), 5)
+      self.assertEqual(self.evaluate(size_op), 5)
 
       for _ in range(3):
         results.append(deq.eval())
 
       time.sleep(0.1)
       self.assertEqual(len(enq_done), 1)
-      self.assertEqual(sess.run(size_op), 5)
+      self.assertEqual(self.evaluate(size_op), 5)
 
       # This dequeue will unblock the thread.
       results.append(deq.eval())
@@ -1364,7 +1364,7 @@ class FIFOQueueTest(test.TestCase):
 
       def blocking_dequeue():
         # Will only complete after 4 enqueues complete.
-        results.extend(sess.run(deq))
+        results.extend(self.evaluate(deq))
 
       thread = self.checkedThread(target=blocking_dequeue)
       thread.start()
@@ -1373,7 +1373,7 @@ class FIFOQueueTest(test.TestCase):
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
         self.assertEqual(len(results), 0)
-        sess.run(enq)
+        self.evaluate(enq)
 
       # Enough enqueued to unblock the dequeue
       thread.join()
@@ -1405,7 +1405,7 @@ class FIFOQueueTest(test.TestCase):
       q.enqueue_many(input_tuple).run()
 
       output_tuple_t = q.dequeue_many(32)
-      output_tuple = sess.run(output_tuple_t)
+      output_tuple = self.evaluate(output_tuple_t)
 
       for (input_elem, output_elem) in zip(input_tuple, output_tuple):
         self.assertAllEqual(input_elem, output_elem)
@@ -1507,10 +1507,10 @@ class FIFOQueueDictTest(test.TestCase):
       enqueue_op4 = q.enqueue_many({"f": [40.0, 50.0]})
       dequeue = q.dequeue()
       dequeue_2 = q.dequeue_many(2)
-      sess.run(enqueue_op)
-      sess.run(enqueue_op2)
-      sess.run(enqueue_op3)
-      sess.run(enqueue_op4)
+      self.evaluate(enqueue_op)
+      self.evaluate(enqueue_op2)
+      self.evaluate(enqueue_op3)
+      self.evaluate(enqueue_op4)
       f = sess.run(dequeue["f"])
       self.assertEqual(10.0, f)
       f = sess.run(dequeue_2["f"])
@@ -1565,10 +1565,10 @@ class FIFOQueueDictTest(test.TestCase):
       })
       dequeue = q.dequeue()
       dequeue_2 = q.dequeue_many(2)
-      sess.run(enqueue_op)
-      sess.run(enqueue_op2)
-      sess.run(enqueue_op3)
-      sess.run(enqueue_op4)
+      self.evaluate(enqueue_op)
+      self.evaluate(enqueue_op2)
+      self.evaluate(enqueue_op3)
+      self.evaluate(enqueue_op4)
       i, f, s = sess.run([dequeue["i"], dequeue["f"], dequeue["s"]])
       self.assertEqual(123, i)
       self.assertEqual(10.0, f)
@@ -1597,7 +1597,7 @@ class FIFOQueueWithTimeoutTest(test.TestCase):
       # until operation_timeout_in_ms.
       with self.assertRaisesRegexp(errors_impl.DeadlineExceededError,
                                    "Timed out waiting for notification"):
-        sess.run(dequeued_t)
+        self.evaluate(dequeued_t)
 
   def testReusableAfterTimeout(self):
     with self.cached_session() as sess:
@@ -1613,8 +1613,8 @@ class FIFOQueueWithTimeoutTest(test.TestCase):
                                    "Timed out waiting for notification"):
         sess.run(dequeued_t, options=config_pb2.RunOptions(timeout_in_ms=10))
 
-      sess.run(enqueue_op)
-      self.assertEqual(37, sess.run(dequeued_t))
+      self.evaluate(enqueue_op)
+      self.assertEqual(37, self.evaluate(dequeued_t))
 
 
 class QueueContainerTest(test.TestCase):
diff --git a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
index cb7659a89a9..272adecfb8a 100644
--- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py
@@ -133,7 +133,7 @@ class FractionalAvgTest(test.TestCase):
           pseudo_random,
           overlapping,
           seed=self._SEED)
-      actual, row_seq, col_seq = sess.run([p, r, c])
+      actual, row_seq, col_seq = self.evaluate([p, r, c])
       expected = self._GetExpectedFractionalAvgPoolResult(input_tensor, row_seq,
                                                           col_seq, overlapping)
       self.assertShapeEqual(expected, p)
@@ -164,7 +164,7 @@ class FractionalAvgTest(test.TestCase):
             pseudo_random,
             overlapping,
             seed=self._SEED)
-        tensor_output, row_seq, col_seq = sess.run([p, r, c])
+        tensor_output, row_seq, col_seq = self.evaluate([p, r, c])
         expected_result = self._GetExpectedFractionalAvgPoolResult(
             rand_mat.astype(np.float32), row_seq, col_seq, overlapping)
         print("row sequence:")
diff --git a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
index 0427e34fc1f..9b1e73b318c 100644
--- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py
@@ -133,7 +133,7 @@ class FractionalMaxPoolTest(test.TestCase):
           pseudo_random,
           overlapping,
           seed=self._SEED)
-      actual, row_seq, col_seq = sess.run([p, r, c])
+      actual, row_seq, col_seq = self.evaluate([p, r, c])
       expected = self._GetExpectedFractionalMaxPoolResult(input_tensor, row_seq,
                                                           col_seq, overlapping)
       self.assertShapeEqual(expected, p)
@@ -164,7 +164,7 @@ class FractionalMaxPoolTest(test.TestCase):
             pseudo_random,
             overlapping,
             seed=self._SEED)
-        tensor_output, row_seq, col_seq = sess.run([p, r, c])
+        tensor_output, row_seq, col_seq = self.evaluate([p, r, c])
         expected_result = self._GetExpectedFractionalMaxPoolResult(rand_mat,
                                                                    row_seq,
                                                                    col_seq,
diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index 503569f3b18..23b3c7e1cc2 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -458,7 +458,7 @@ class FunctionalOpsTest(test.TestCase):
     grad = gradients_impl.gradients(ys=[loss], xs=[a, b])
     with self.test_session(use_gpu=True) as sess:
       variables.global_variables_initializer().run()
-      sess.run(grad)
+      self.evaluate(grad)
 
   @test_util.run_in_graph_and_eager_modes
   def testFoldShape(self):
@@ -567,8 +567,8 @@ class FunctionalOpsTest(test.TestCase):
           target="/job:worker/replica:0/task:0/cpu:1")
 
     with session.Session(worker[0].target) as sess:
-      sess.run(variables.global_variables_initializer())
-      mul = sess.run(remote_op)
+      self.evaluate(variables.global_variables_initializer())
+      mul = self.evaluate(remote_op)
       self.assertEqual(mul, [6])
 
   def testRemoteFunctionDirectSession(self):
@@ -591,8 +591,8 @@ class FunctionalOpsTest(test.TestCase):
           target="/job:localhost/replica:0/task:0/cpu:1")
 
     with self.test_session(config=worker_config) as sess:
-      sess.run(variables.global_variables_initializer())
-      mul = sess.run(remote_op)
+      self.evaluate(variables.global_variables_initializer())
+      mul = self.evaluate(remote_op)
       self.assertEqual(mul, [6])
 
   def testRemoteFunctionSameDeviceDirectSession(self):
@@ -610,8 +610,8 @@ class FunctionalOpsTest(test.TestCase):
           args=[a, b], Tout=[dtypes.int32], f=_remote_fn, target="/cpu:0")
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      mul = sess.run(remote_op)
+      self.evaluate(variables.global_variables_initializer())
+      mul = self.evaluate(remote_op)
       self.assertEqual(mul, [6])
 
   def testRemoteFunctionCPUGPU(self):
@@ -634,8 +634,8 @@ class FunctionalOpsTest(test.TestCase):
           target="/job:localhost/replica:0/task:0/device:GPU:0")[0] + 3.0
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      mul = sess.run(remote_op)
+      self.evaluate(variables.global_variables_initializer())
+      mul = self.evaluate(remote_op)
       self.assertEqual(mul, 9.0)
 
   def testRemoteFunctionGPUCPU(self):
@@ -658,8 +658,8 @@ class FunctionalOpsTest(test.TestCase):
           target="/job:localhost/replica:0/task:0/cpu:0")[0] + 3.0
 
     with self.cached_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      mul = sess.run(remote_op)
+      self.evaluate(variables.global_variables_initializer())
+      mul = self.evaluate(remote_op)
       self.assertEqual(mul, 9.0)
 
   def testRemoteFunctionGPUCPUStrings(self):
@@ -677,7 +677,7 @@ class FunctionalOpsTest(test.TestCase):
           args=[a], Tout=[dtypes.string], f=_remote_fn, target="/cpu:0")
 
     with self.cached_session() as sess:
-      ret = sess.run(remote_op)
+      ret = self.evaluate(remote_op)
       self.assertAllEqual(ret, [b"a"])
 
   def testRemoteFunctionCrossProcess(self):
@@ -699,8 +699,8 @@ class FunctionalOpsTest(test.TestCase):
           target="/job:worker/replica:0/task:1/cpu:0")[0] + 3.0
 
     with session.Session(workers[0].target) as sess:
-      sess.run(variables.global_variables_initializer())
-      mul = sess.run(remote_op)
+      self.evaluate(variables.global_variables_initializer())
+      mul = self.evaluate(remote_op)
       self.assertEqual(mul, 9)
 
   def testIf(self):
@@ -769,7 +769,7 @@ class FunctionalOpsTest(test.TestCase):
           else:
             fetch = "my_while:1"
         with self.session(graph=g, use_gpu=use_gpu) as sess:
-          return sess.run(fetch)
+          return self.evaluate(fetch)
 
     self.assertAllEqual(Run(20., False), 210.)
     self.assertAllEqual(Run(20., True), 210.)
@@ -857,11 +857,11 @@ class FunctionalOpsTest(test.TestCase):
           result_binary = functional_ops.While(
               [1.0, 0., 0.],
               function.Defun(*[dtypes.float32] * 3)(TestCond), TestBinary)
-          sess.run(variables.global_variables_initializer())
+          self.evaluate(variables.global_variables_initializer())
           assert len(result_unary) == 2
-          self.assertEqual([10.0, 54.0], sess.run(result_unary))
+          self.assertEqual([10.0, 54.0], self.evaluate(result_unary))
           assert len(result_binary) == 3
-          self.assertEqual([10.0, 54.0, 9.0], sess.run(result_binary))
+          self.assertEqual([10.0, 54.0, 9.0], self.evaluate(result_binary))
 
           def TestCondCapture(n, *args):
             del args
@@ -892,7 +892,7 @@ class FunctionalOpsTest(test.TestCase):
                 100, 0, -1, [0.], Body, rewrite_with_while=rewrite_with_while)
             [0],
         ]
-        xvals = sess.run(xs)
+        xvals = self.evaluate(xs)
       self.assertAllEqual(210, xvals[0])
       self.assertAllEqual(5050, xvals[1])
 
@@ -949,16 +949,16 @@ class FunctionalOpsTest(test.TestCase):
         result_binary = functional_ops.For(
             1, 10, 1, [0., 0.], TestBinary,
             rewrite_with_while=rewrite_with_while)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         assert not result_nullary
         # The nullary variant doesn't return anything so we can't easily run it.
         # As a total hack, fetch the operation by name and run it.
         sess.run(ops.get_default_graph().get_operation_by_name(
             "While" if rewrite_with_while else "For"))
         assert len(result_unary) == 1
-        self.assertEqual([54.0], sess.run(result_unary))
+        self.assertEqual([54.0], self.evaluate(result_unary))
         assert len(result_binary) == 2
-        self.assertEqual([54.0, 9.0], sess.run(result_binary))
+        self.assertEqual([54.0, 9.0], self.evaluate(result_binary))
 
   def _tfMLP(self, xval, wsval, bsval, rewrite_with_while):
     # On GPU, don't rewrite using a while loop.
@@ -1041,8 +1041,8 @@ class FunctionalOpsTest(test.TestCase):
       avals = [Poly(a), Grad(a)]
       b = constant_op.constant(1.)
       bvals = [Poly(b), Grad(b)]
-      self.assertAllEqual(sess.run(avals), [8., 4.])
-      self.assertAllEqual(sess.run(bvals), [17., 16.])
+      self.assertAllEqual(self.evaluate(avals), [8., 4.])
+      self.assertAllEqual(self.evaluate(bvals), [17., 16.])
 
 
 # TODO(akshayka): Replace `function.Defun` with tf.contrib.eager.defun` in the
@@ -1193,8 +1193,8 @@ class PartitionedCallTest(test.TestCase):
             allow_soft_placement=False,
             log_device_placement=True,
             device_count={"CPU": 2})) as sess:
-      sess.run(variables.global_variables_initializer())
-      expected = sess.run(sum_gather())
+      self.evaluate(variables.global_variables_initializer())
+      expected = self.evaluate(sum_gather())
       result = sess.run(
           functional_ops.partitioned_call(
               args=defined.captured_inputs, f=defined))
diff --git a/tensorflow/python/kernel_tests/gradient_correctness_test.py b/tensorflow/python/kernel_tests/gradient_correctness_test.py
index 291a69ebac6..12b8a4c8e3b 100644
--- a/tensorflow/python/kernel_tests/gradient_correctness_test.py
+++ b/tensorflow/python/kernel_tests/gradient_correctness_test.py
@@ -35,7 +35,7 @@ class GradientCorrectnessTest(test.TestCase):
       yexp = math_ops.exp(x)
       yexplog = math_ops.log(yexp)
       grads = gradients_impl.gradients([yexp, yexplog], [x])
-      grad_vals = sess.run(grads)
+      grad_vals = self.evaluate(grads)
       exp1_plus_one = (1.0 + np.exp(1.0)).astype(np.float32)
       # [dexp(x)/dx + d(log(exp(x)))/dx] @ x=1 == exp(1) + 1
       self.assertAllClose(grad_vals[0], exp1_plus_one)
@@ -44,13 +44,13 @@ class GradientCorrectnessTest(test.TestCase):
     x = constant_op.constant(3.)
     dx_dx, = gradients_impl.gradients(x, x)
     with self.cached_session() as sess:
-      self.assertAllClose(1., sess.run(dx_dx))
+      self.assertAllClose(1., self.evaluate(dx_dx))
 
   def testIntegerIdentityGradient(self):
     x = constant_op.constant(3)
     dx_dx, = gradients_impl.gradients(x, x)
     with self.cached_session() as sess:
-      self.assertAllClose(1, sess.run(dx_dx))
+      self.assertAllClose(1, self.evaluate(dx_dx))
 
   def testGradientWithIntegerPath(self):
     x = constant_op.constant([3.9, 4.1])
@@ -58,7 +58,7 @@ class GradientCorrectnessTest(test.TestCase):
     y = x * k
     dy_dx, = gradients_impl.gradients(y, x)
     with self.cached_session() as sess:
-      self.assertAllClose([3., 4.], sess.run(dy_dx))
+      self.assertAllClose([3., 4.], self.evaluate(dy_dx))
 
   def testNoIntegerGradient1(self):
     x = constant_op.constant([3.9, 4.1])
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index a3f2c0ddd75..87c7bbef3cf 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -704,12 +704,12 @@ class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase):
         ratio = outputs_2norm / inputs_2norm
         my_ops = variables.global_variables_initializer()
         with self.session(use_gpu=True) as sess:
-          sess.run(my_ops)
+          self.evaluate(my_ops)
           # Check the shape of the outputs
           t = self.evaluate(outputs)
           self.assertAllEqual(t.shape, outputs_shape)
           # Check isometry of the delta-orthogonal kernel.
-          self.assertAllClose(sess.run(ratio), gain, rtol=tol, atol=tol)
+          self.assertAllClose(self.evaluate(ratio), gain, rtol=tol, atol=tol)
 
   def testNonuniformity(self):
     value = 0
@@ -842,12 +842,12 @@ class ConvolutionOrthogonal1dInitializerTest(test.TestCase):
       ratio = outputs_2norm / inputs_2norm
       my_ops = variables.global_variables_initializer()
       with self.session(use_gpu=True) as sess:
-        sess.run(my_ops)
+        self.evaluate(my_ops)
         # Check the shape of the outputs
         t = self.evaluate(outputs)
         self.assertAllEqual(t.shape, outputs_shape)
         # Check isometry of the orthogonal kernel.
-        self.assertAllClose(sess.run(ratio), gain, rtol=tol, atol=tol)
+        self.assertAllClose(self.evaluate(ratio), gain, rtol=tol, atol=tol)
 
 
 class ConvolutionOrthogonal2dInitializerTest(test.TestCase):
@@ -937,12 +937,12 @@ class ConvolutionOrthogonal2dInitializerTest(test.TestCase):
       ratio = outputs_2norm / inputs_2norm
       my_ops = variables.global_variables_initializer()
       with self.session(use_gpu=True) as sess:
-        sess.run(my_ops)
+        self.evaluate(my_ops)
         # Check the shape of the outputs
         t = self.evaluate(outputs)
         self.assertAllEqual(t.shape, outputs_shape)
         # Check isometry of the orthogonal kernel.
-        self.assertAllClose(sess.run(ratio), gain, rtol=tol, atol=tol)
+        self.assertAllClose(self.evaluate(ratio), gain, rtol=tol, atol=tol)
 
 
 class ConvolutionOrthogonal3dInitializerTest(test.TestCase):
@@ -1062,12 +1062,12 @@ class ConvolutionOrthogonal3dInitializerTest(test.TestCase):
       ratio = outputs_2norm / inputs_2norm
       my_ops = variables.global_variables_initializer()
       with self.cached_session(use_gpu=True) as sess:
-        sess.run(my_ops)
+        self.evaluate(my_ops)
         # Check the shape of the outputs
         t = self.evaluate(outputs)
         self.assertAllEqual(t.shape, outputs_shape)
         # Check isometry of the orthogonal kernel.
-        self.assertAllClose(sess.run(ratio), gain, rtol=tol, atol=tol)
+        self.assertAllClose(self.evaluate(ratio), gain, rtol=tol, atol=tol)
 
 
 class IdentityInitializerTest(test.TestCase):
diff --git a/tensorflow/python/kernel_tests/inplace_ops_test.py b/tensorflow/python/kernel_tests/inplace_ops_test.py
index 51d16861dd8..e0c36d3d2e9 100644
--- a/tensorflow/python/kernel_tests/inplace_ops_test.py
+++ b/tensorflow/python/kernel_tests/inplace_ops_test.py
@@ -149,7 +149,7 @@ class InplaceOpsTest(test_util.TensorFlowTestCase):
       y = inplace_ops.alias_inplace_add(x, [0], [[1, 2, 3]])
       with ops.control_dependencies([y]):
         z = array_ops.identity(x)
-        _, vy, vz = sess.run([x, y, z])
+        _, vy, vz = self.evaluate([x, y, z])
       self.assertAllClose(vy, vz)
 
   def testError(self):
diff --git a/tensorflow/python/kernel_tests/io_ops_test.py b/tensorflow/python/kernel_tests/io_ops_test.py
index afa24195cb3..a6b477062eb 100644
--- a/tensorflow/python/kernel_tests/io_ops_test.py
+++ b/tensorflow/python/kernel_tests/io_ops_test.py
@@ -53,7 +53,7 @@ class IoOpsTest(test.TestCase):
         pass
       with self.cached_session() as sess:
         w = io_ops.write_file(temp.name, contents)
-        sess.run(w)
+        self.evaluate(w)
         with open(temp.name, 'rb') as f:
           file_contents = f.read()
         self.assertEqual(file_contents, contents)
@@ -67,7 +67,7 @@ class IoOpsTest(test.TestCase):
       filepath = os.path.join(subdir, 'subdir2', 'filename')
       with self.cached_session() as sess:
         w = io_ops.write_file(filepath, contents)
-        sess.run(w)
+        self.evaluate(w)
         with open(filepath, 'rb') as f:
           file_contents = f.read()
         self.assertEqual(file_contents, contents)
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py
index d5580d0e886..09867435a73 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_circulant_test.py
@@ -557,7 +557,7 @@ class LinearOperatorCirculant2DTestNonHermitianSpectrum(
       self.assertEqual(matrix_tensor.dtype,
                        linear_operator_circulant._DTYPE_COMPLEX)
       matrix_h = linalg.adjoint(matrix_tensor)
-      matrix, matrix_h = sess.run([matrix_tensor, matrix_h])
+      matrix, matrix_h = self.evaluate([matrix_tensor, matrix_h])
       self.assertAllClose(matrix, matrix_h, atol=0)
 
   def test_assert_non_singular_fails_for_singular_operator(self):
@@ -631,7 +631,7 @@ class LinearOperatorCirculant3DTest(test.TestCase):
                        linear_operator_circulant._DTYPE_COMPLEX)
       matrix_h = linalg.adjoint(matrix_tensor)
 
-      matrix, matrix_h = sess.run([matrix_tensor, matrix_h])
+      matrix, matrix_h = self.evaluate([matrix_tensor, matrix_h])
       self.assertAllEqual((2, 2 * 3 * 5, 2 * 3 * 5), matrix.shape)
       self.assertAllClose(matrix, matrix_h)
 
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
index 91f4097438f..80889a162f8 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py
@@ -147,12 +147,12 @@ class LinearOperatorDiagTest(
       operator_matmul = operator.matmul(x)
       mat_matmul = math_ops.matmul(mat, x)
       self.assertAllEqual(operator_matmul.get_shape(), mat_matmul.get_shape())
-      self.assertAllClose(*sess.run([operator_matmul, mat_matmul]))
+      self.assertAllClose(*self.evaluate([operator_matmul, mat_matmul]))
 
       operator_solve = operator.solve(x)
       mat_solve = linalg_ops.matrix_solve(mat, x)
       self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape())
-      self.assertAllClose(*sess.run([operator_solve, mat_solve]))
+      self.assertAllClose(*self.evaluate([operator_solve, mat_solve]))
 
   def test_diag_matmul(self):
     operator1 = linalg_lib.LinearOperatorDiag([2., 3.])
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
index 522213e26b7..e9fd91c6cf0 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py
@@ -170,7 +170,7 @@ class LinearOperatorIdentityTest(
       expected = x
 
       self.assertAllEqual(operator_matmul.get_shape(), expected.get_shape())
-      self.assertAllClose(*sess.run([operator_matmul, expected]))
+      self.assertAllClose(*self.evaluate([operator_matmul, expected]))
 
   def test_default_batch_shape_broadcasts_with_everything_dynamic(self):
     # These cannot be done in the automated (base test class) tests since they
@@ -207,7 +207,7 @@ class LinearOperatorIdentityTest(
 
       operator_matmul = operator.matmul(x)
       self.assertAllEqual(operator_matmul.get_shape(), expected.get_shape())
-      self.assertAllClose(*sess.run([operator_matmul, expected]))
+      self.assertAllClose(*self.evaluate([operator_matmul, expected]))
 
   def test_broadcast_matmul_dynamic_shapes(self):
     # These cannot be done in the automated (base test class) tests since they
@@ -403,13 +403,13 @@ class LinearOperatorScaledIdentityTest(
       expected = x * 2.2 + zeros
       operator_matmul = operator.matmul(x)
       self.assertAllEqual(operator_matmul.get_shape(), expected.get_shape())
-      self.assertAllClose(*sess.run([operator_matmul, expected]))
+      self.assertAllClose(*self.evaluate([operator_matmul, expected]))
 
       # Test solve
       expected = x / 2.2 + zeros
       operator_solve = operator.solve(x)
       self.assertAllEqual(operator_solve.get_shape(), expected.get_shape())
-      self.assertAllClose(*sess.run([operator_solve, expected]))
+      self.assertAllClose(*self.evaluate([operator_solve, expected]))
 
   def test_broadcast_matmul_and_solve_scalar_scale_multiplier(self):
     # These cannot be done in the automated (base test class) tests since they
@@ -429,13 +429,13 @@ class LinearOperatorScaledIdentityTest(
       expected = x * 2.2
       operator_matmul = operator.matmul(x)
       self.assertAllEqual(operator_matmul.get_shape(), expected.get_shape())
-      self.assertAllClose(*sess.run([operator_matmul, expected]))
+      self.assertAllClose(*self.evaluate([operator_matmul, expected]))
 
       # Test solve
       expected = x / 2.2
       operator_solve = operator.solve(x)
       self.assertAllEqual(operator_solve.get_shape(), expected.get_shape())
-      self.assertAllClose(*sess.run([operator_solve, expected]))
+      self.assertAllClose(*self.evaluate([operator_solve, expected]))
 
   def test_is_x_flags(self):
     operator = linalg_lib.LinearOperatorScaledIdentity(
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
index 5ce26169728..f12714677e2 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py
@@ -119,7 +119,7 @@ class BroadcastMatrixBatchDimsTest(test.TestCase):
     with self.cached_session() as sess:
       self.assertAllEqual(x_bc_expected.shape, x_bc.get_shape())
       self.assertAllEqual(y_bc_expected.shape, y_bc.get_shape())
-      x_bc_, y_bc_ = sess.run([x_bc, y_bc])
+      x_bc_, y_bc_ = self.evaluate([x_bc, y_bc])
       self.assertAllClose(x_bc_expected, x_bc_)
       self.assertAllClose(y_bc_expected, y_bc_)
 
@@ -138,7 +138,7 @@ class BroadcastMatrixBatchDimsTest(test.TestCase):
     with self.cached_session() as sess:
       self.assertAllEqual(x_bc_expected.shape, x_bc.get_shape())
       self.assertAllEqual(y_bc_expected.shape, y_bc.get_shape())
-      x_bc_, y_bc_ = sess.run([x_bc, y_bc])
+      x_bc_, y_bc_ = self.evaluate([x_bc, y_bc])
       self.assertAllClose(x_bc_expected, x_bc_)
       self.assertAllClose(y_bc_expected, y_bc_)
 
diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
index 09cb5cf0ba9..1d9f4032d1a 100644
--- a/tensorflow/python/kernel_tests/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/list_ops_test.py
@@ -806,7 +806,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     l_read2 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32)
     grad = gradients_impl.gradients([l_read1, l_read2], [x])
     with self.cached_session() as sess:
-      self.assertSequenceEqual(sess.run(grad), [2.])
+      self.assertSequenceEqual(self.evaluate(grad), [2.])
 
   def testSkipEagerBuildElementShape(self):
     fn = list_ops._build_element_shape
diff --git a/tensorflow/python/kernel_tests/listdiff_op_test.py b/tensorflow/python/kernel_tests/listdiff_op_test.py
index baeb40dd635..28657107980 100644
--- a/tensorflow/python/kernel_tests/listdiff_op_test.py
+++ b/tensorflow/python/kernel_tests/listdiff_op_test.py
@@ -47,7 +47,7 @@ class ListDiffTest(test.TestCase):
             y_tensor = ops.convert_to_tensor(y, dtype=dtype)
             out_tensor, idx_tensor = diff_func(x_tensor, y_tensor,
                                                index_dtype=index_dtype)
-            tf_out, tf_idx = sess.run([out_tensor, idx_tensor])
+            tf_out, tf_idx = self.evaluate([out_tensor, idx_tensor])
           self.assertAllEqual(tf_out, out)
           self.assertAllEqual(tf_idx, idx)
           self.assertEqual(1, out_tensor.get_shape().ndims)
diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index 3efad4ea116..79961d8dd1d 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -137,7 +137,7 @@ class HashTableOpTest(test.TestCase):
       output2 = table2.lookup(input_string)
       output3 = table3.lookup(input_string)
 
-      out1, out2, out3 = sess.run([output1, output2, output3])
+      out1, out2, out3 = self.evaluate([output1, output2, output3])
       self.assertAllEqual([0, 1, -1], out1)
       self.assertAllEqual([0, 1, -1], out2)
       self.assertAllEqual([0, 1, -1], out3)
@@ -174,7 +174,7 @@ class HashTableOpTest(test.TestCase):
           constant_op.constant(sp_shape, dtypes.int64))
       output = table.lookup(input_tensor)
 
-      out_indices, out_values, out_shape = sess.run(output)
+      out_indices, out_values, out_shape = self.evaluate(output)
 
       self.assertAllEqual([0, 1, -1], out_values)
       self.assertAllEqual(sp_indices, out_indices)
@@ -995,7 +995,7 @@ class InitializeTableFromFileOpTest(test.TestCase):
       output2 = table2.lookup(input_string)
       output3 = table3.lookup(input_string)
 
-      out1, out2, out3 = sess.run([output1, output2, output3])
+      out1, out2, out3 = self.evaluate([output1, output2, output3])
       self.assertAllEqual([0, 1, -1], out1)
       self.assertAllEqual([0, 1, -1], out2)
       self.assertAllEqual([0, 1, -1], out3)
@@ -1313,7 +1313,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
       out1 = table1.lookup(input_string)
       out2 = table2.lookup(input_string)
 
-      out1, out2 = sess.run([out1, out2])
+      out1, out2 = self.evaluate([out1, out2])
       self.assertAllEqual([5, 0, 1, 2, 5], out1)
       self.assertAllEqual([5, 0, 1, 2, 3], out2)
       self.assertEquals(vocab_size + oov_buckets, table1.size().eval())
@@ -1396,7 +1396,7 @@ class IdTableWithHashBucketsTest(test.TestCase):
       out1 = table1.lookup(input_string_1)
       out2 = table2.lookup(input_string_2)
 
-      out1, out2 = sess.run([out1, out2])
+      out1, out2 = self.evaluate([out1, out2])
       self.assertAllEqual([0, 1, 2, -1], out1)
       self.assertAllEqual([-2, 1, -2], out2)
       self.assertEquals(vocab_size + oov_buckets, table1.size().eval())
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index d3a907852a2..bda63bcaa92 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -1046,9 +1046,9 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
       init_op = variables.global_variables_initializer()
 
       with self.cached_session() as sess:
-        sess.run(init_op)
+        self.evaluate(init_op)
         for grad, _ in gradients_to_variables:
-          np_grad = sess.run(grad)
+          np_grad = self.evaluate(grad)
           self.assertFalse(np.isnan(np_grad).any())
 
   def testNonZeroLossWithPythonScalarWeight(self):
diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
index d41b449a1fa..83f4216e4d0 100644
--- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py
@@ -151,7 +151,7 @@ class ExponentialOpTest(test.TestCase):
       matrix2 = random_ops.random_normal([5, 5], seed=42)
       expm1 = linalg_impl.matrix_exponential(matrix1)
       expm2 = linalg_impl.matrix_exponential(matrix2)
-      expm = sess.run([expm1, expm2])
+      expm = self.evaluate([expm1, expm2])
       self.assertAllEqual(expm[0], expm[1])
 
 
diff --git a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
index 434458721c1..5cef4b79a32 100644
--- a/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_inverse_op_test.py
@@ -146,7 +146,7 @@ class InverseOpTest(test.TestCase):
         inv1 = linalg_ops.matrix_inverse(matrix1, adjoint=adjoint_)
         inv2 = linalg_ops.matrix_inverse(matrix2, adjoint=adjoint_)
         all_ops += [inv1, inv2]
-      inv = sess.run(all_ops)
+      inv = self.evaluate(all_ops)
       self.assertAllEqual(inv[0], inv[1])
       self.assertAllEqual(inv[2], inv[3])
 
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
index 81c0b5a7727..b0bce6a1b9b 100644
--- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -129,7 +129,7 @@ class LogarithmOpTest(test.TestCase):
           random_ops.random_normal([5, 5], seed=42), dtypes.complex64)
       logm1 = gen_linalg_ops.matrix_logarithm(matrix1)
       logm2 = gen_linalg_ops.matrix_logarithm(matrix2)
-      logm = sess.run([logm1, logm2])
+      logm = self.evaluate([logm1, logm2])
       self.assertAllEqual(logm[0], logm[1])
 
 
diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
index 1334d0c4cee..80badee8962 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
@@ -126,7 +126,7 @@ class MatrixSolveOpTest(test.TestCase):
         s1 = linalg_ops.matrix_solve(lhs1, rhs1, adjoint=adjoint_)
         s2 = linalg_ops.matrix_solve(lhs2, rhs2, adjoint=adjoint_)
         all_ops += [s1, s2]
-      val = sess.run(all_ops)
+      val = self.evaluate(all_ops)
       self.assertAllEqual(val[0], val[1])
       self.assertAllEqual(val[2], val[3])
 
diff --git a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
index 9212580313c..1f2144bdee9 100644
--- a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
@@ -108,7 +108,7 @@ class SquareRootOpTest(test.TestCase):
       sqrt1 = gen_linalg_ops.matrix_square_root(matrix1)
       sqrt2 = gen_linalg_ops.matrix_square_root(matrix2)
       all_ops = [sqrt1, sqrt2]
-      sqrt = sess.run(all_ops)
+      sqrt = self.evaluate(all_ops)
       self.assertAllEqual(sqrt[0], sqrt[1])
 
 
diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py
index 5dcdb9e4205..eb5f99582ca 100644
--- a/tensorflow/python/kernel_tests/metrics_test.py
+++ b/tensorflow/python/kernel_tests/metrics_test.py
@@ -203,10 +203,10 @@ class MeanTest(test.TestCase):
 
       mean, update_op = metrics.mean(values)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(4):
-        sess.run(update_op)
-      self.assertAlmostEqual(1.65, sess.run(mean), 5)
+        self.evaluate(update_op)
+      self.assertAlmostEqual(1.65, self.evaluate(mean), 5)
 
   def testUpdateOpsReturnsCurrentValue(self):
     with self.cached_session() as sess:
@@ -220,14 +220,14 @@ class MeanTest(test.TestCase):
 
       mean, update_op = metrics.mean(values)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
-      self.assertAlmostEqual(0.5, sess.run(update_op), 5)
-      self.assertAlmostEqual(1.475, sess.run(update_op), 5)
-      self.assertAlmostEqual(12.4 / 6.0, sess.run(update_op), 5)
-      self.assertAlmostEqual(1.65, sess.run(update_op), 5)
+      self.assertAlmostEqual(0.5, self.evaluate(update_op), 5)
+      self.assertAlmostEqual(1.475, self.evaluate(update_op), 5)
+      self.assertAlmostEqual(12.4 / 6.0, self.evaluate(update_op), 5)
+      self.assertAlmostEqual(1.65, self.evaluate(update_op), 5)
 
-      self.assertAlmostEqual(1.65, sess.run(mean), 5)
+      self.assertAlmostEqual(1.65, self.evaluate(mean), 5)
 
   def testUnweighted(self):
     values = _test_values((3, 2, 4, 1))
@@ -370,10 +370,10 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(4):
-        sess.run(update_op)
-      self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(mean))
+        self.evaluate(update_op)
+      self.assertAllClose([[-0.9 / 4., 3.525]], self.evaluate(mean))
 
   def testMultiDimensional(self):
     with self.cached_session() as sess:
@@ -391,10 +391,11 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(2):
-        sess.run(update_op)
-      self.assertAllClose([[[1, 2], [1, 2]], [[2, 3], [5, 6]]], sess.run(mean))
+        self.evaluate(update_op)
+      self.assertAllClose([[[1, 2], [1, 2]], [[2, 3], [5, 6]]],
+                          self.evaluate(mean))
 
   def testUpdateOpsReturnsCurrentValue(self):
     with self.cached_session() as sess:
@@ -408,14 +409,14 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
-      self.assertAllClose([[0, 1]], sess.run(update_op), 5)
-      self.assertAllClose([[-2.1, 5.05]], sess.run(update_op), 5)
-      self.assertAllClose([[2.3 / 3., 10.1 / 3.]], sess.run(update_op), 5)
-      self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(update_op), 5)
+      self.assertAllClose([[0, 1]], self.evaluate(update_op), 5)
+      self.assertAllClose([[-2.1, 5.05]], self.evaluate(update_op), 5)
+      self.assertAllClose([[2.3 / 3., 10.1 / 3.]], self.evaluate(update_op), 5)
+      self.assertAllClose([[-0.9 / 4., 3.525]], self.evaluate(update_op), 5)
 
-      self.assertAllClose([[-0.9 / 4., 3.525]], sess.run(mean), 5)
+      self.assertAllClose([[-0.9 / 4., 3.525]], self.evaluate(mean), 5)
 
   def testBinaryWeighted1d(self):
     with self.cached_session() as sess:
@@ -439,10 +440,10 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values, weights)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(4):
-        sess.run(update_op)
-      self.assertAllClose([[3.25, 0.5]], sess.run(mean), 5)
+        self.evaluate(update_op)
+      self.assertAllClose([[3.25, 0.5]], self.evaluate(mean), 5)
 
   def testWeighted1d(self):
     with self.cached_session() as sess:
@@ -466,10 +467,10 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values, weights)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(4):
-        sess.run(update_op)
-      self.assertAllClose([[0.8, 3.52]], sess.run(mean), 5)
+        self.evaluate(update_op)
+      self.assertAllClose([[0.8, 3.52]], self.evaluate(mean), 5)
 
   def testWeighted2d_1(self):
     with self.cached_session() as sess:
@@ -493,10 +494,10 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values, weights)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(4):
-        sess.run(update_op)
-      self.assertAllClose([[-2.1, 0.5]], sess.run(mean), 5)
+        self.evaluate(update_op)
+      self.assertAllClose([[-2.1, 0.5]], self.evaluate(mean), 5)
 
   def testWeighted2d_2(self):
     with self.cached_session() as sess:
@@ -520,10 +521,10 @@ class MeanTensorTest(test.TestCase):
 
       mean, update_op = metrics.mean_tensor(values, weights)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(4):
-        sess.run(update_op)
-      self.assertAllClose([[0, 0.5]], sess.run(mean), 5)
+        self.evaluate(update_op)
+      self.assertAllClose([[0, 0.5]], self.evaluate(mean), 5)
 
 
 class AccuracyTest(test.TestCase):
@@ -576,11 +577,11 @@ class AccuracyTest(test.TestCase):
     accuracy, update_op = metrics.accuracy(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_accuracy = accuracy.eval()
@@ -609,10 +610,10 @@ class AccuracyTest(test.TestCase):
 
       accuracy, update_op = metrics.accuracy(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in xrange(3):
-        sess.run(update_op)
-      self.assertEqual(0.5, sess.run(update_op))
+        self.evaluate(update_op)
+      self.assertEqual(0.5, self.evaluate(update_op))
       self.assertEqual(0.5, accuracy.eval())
 
   def testEffectivelyEquivalentSizes(self):
@@ -621,7 +622,7 @@ class AccuracyTest(test.TestCase):
     with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertEqual(1.0, update_op.eval())
       self.assertEqual(1.0, accuracy.eval())
 
@@ -631,7 +632,7 @@ class AccuracyTest(test.TestCase):
     with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions, weights=2.0)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertEqual(1.0, update_op.eval())
       self.assertEqual(1.0, accuracy.eval())
 
@@ -645,7 +646,7 @@ class AccuracyTest(test.TestCase):
     with self.cached_session() as sess:
       accuracy, update_op = metrics.accuracy(labels, predictions, weights)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       # if streaming_accuracy does not flatten the weight, accuracy would be
       # 0.33333334 due to an intended broadcast of weight. Due to flattening,
       # it will be higher than .95
@@ -666,7 +667,7 @@ class AccuracyTest(test.TestCase):
       accuracy, update_op = metrics.accuracy(labels, predictions,
                                              weights_placeholder)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       # if streaming_accuracy does not flatten the weight, accuracy would be
       # 0.33333334 due to an intended broadcast of weight. Due to flattening,
       # it will be higher than .95
@@ -704,10 +705,10 @@ class AccuracyTest(test.TestCase):
 
       accuracy, update_op = metrics.accuracy(labels, predictions, weights)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in xrange(3):
-        sess.run(update_op)
-      self.assertEqual(1.0, sess.run(update_op))
+        self.evaluate(update_op)
+      self.assertEqual(1.0, self.evaluate(update_op))
       self.assertEqual(1.0, accuracy.eval())
 
 
@@ -747,11 +748,11 @@ class PrecisionTest(test.TestCase):
     precision, update_op = metrics.precision(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_precision = precision.eval()
@@ -766,8 +767,8 @@ class PrecisionTest(test.TestCase):
     precision, update_op = metrics.precision(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(1, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(1, self.evaluate(update_op))
       self.assertAlmostEqual(1, precision.eval())
 
   def testSomeCorrect_multipleInputDtypes(self):
@@ -779,7 +780,7 @@ class PrecisionTest(test.TestCase):
       precision, update_op = metrics.precision(labels, predictions)
 
       with self.cached_session() as sess:
-        sess.run(variables.local_variables_initializer())
+        self.evaluate(variables.local_variables_initializer())
         self.assertAlmostEqual(0.5, update_op.eval())
         self.assertAlmostEqual(0.5, precision.eval())
 
@@ -882,8 +883,8 @@ class PrecisionTest(test.TestCase):
     precision, update_op = metrics.precision(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      sess.run(update_op)
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate(update_op)
       self.assertAlmostEqual(0, precision.eval())
 
   def testZeroTrueAndFalsePositivesGivesZeroPrecision(self):
@@ -892,8 +893,8 @@ class PrecisionTest(test.TestCase):
     precision, update_op = metrics.precision(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      sess.run(update_op)
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate(update_op)
       self.assertEqual(0.0, precision.eval())
 
 
@@ -934,11 +935,11 @@ class RecallTest(test.TestCase):
     recall, update_op = metrics.recall(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_recall = recall.eval()
@@ -953,8 +954,8 @@ class RecallTest(test.TestCase):
     recall, update_op = metrics.recall(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      sess.run(update_op)
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate(update_op)
       self.assertEqual(1, recall.eval())
 
   def testSomeCorrect_multipleInputDtypes(self):
@@ -966,7 +967,7 @@ class RecallTest(test.TestCase):
       recall, update_op = metrics.recall(labels, predictions)
 
       with self.cached_session() as sess:
-        sess.run(variables.local_variables_initializer())
+        self.evaluate(variables.local_variables_initializer())
         self.assertAlmostEqual(0.5, update_op.eval())
         self.assertAlmostEqual(0.5, recall.eval())
 
@@ -977,7 +978,7 @@ class RecallTest(test.TestCase):
     recall, update_op = metrics.recall(labels, predictions, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       weighted_tp = 2.0 + 5.0
       weighted_t = (2.0 + 2.0) + (5.0 + 5.0)
       expected_precision = weighted_tp / weighted_t
@@ -991,7 +992,7 @@ class RecallTest(test.TestCase):
     recall, update_op = metrics.recall(labels, predictions, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       weighted_tp = 3.0 + 1.0
       weighted_t = (2.0 + 3.0) + (4.0 + 1.0)
       expected_precision = weighted_tp / weighted_t
@@ -1006,8 +1007,8 @@ class RecallTest(test.TestCase):
     recall, update_op = metrics.recall(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      sess.run(update_op)
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate(update_op)
       self.assertEqual(0, recall.eval())
 
   def testZeroTruePositivesAndFalseNegativesGivesZeroRecall(self):
@@ -1016,8 +1017,8 @@ class RecallTest(test.TestCase):
     recall, update_op = metrics.recall(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      sess.run(update_op)
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate(update_op)
       self.assertEqual(0, recall.eval())
 
 
@@ -1056,11 +1057,11 @@ class AUCTest(test.TestCase):
     auc, update_op = metrics.auc(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_auc = auc.eval()
@@ -1078,8 +1079,8 @@ class AUCTest(test.TestCase):
       labels = constant_op.constant(inputs)
       auc, update_op = metrics.auc(labels, predictions, curve=curve)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(1, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(1, self.evaluate(update_op))
 
       self.assertEqual(1, auc.eval())
 
@@ -1093,8 +1094,8 @@ class AUCTest(test.TestCase):
             constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=label_dtype)
         auc, update_op = metrics.auc(labels, predictions)
 
-        sess.run(variables.local_variables_initializer())
-        self.assertAlmostEqual(0.5, sess.run(update_op))
+        self.evaluate(variables.local_variables_initializer())
+        self.assertAlmostEqual(0.5, self.evaluate(update_op))
 
         self.assertAlmostEqual(0.5, auc.eval())
 
@@ -1106,8 +1107,8 @@ class AUCTest(test.TestCase):
       weights = constant_op.constant([2], shape=(1, 1))
       auc, update_op = metrics.auc(labels, predictions, weights=weights)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.5, sess.run(update_op), 5)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.5, self.evaluate(update_op), 5)
 
       self.assertAlmostEqual(0.5, auc.eval(), 5)
 
@@ -1119,8 +1120,8 @@ class AUCTest(test.TestCase):
       weights = constant_op.constant([1, 2, 3, 4], shape=(1, 4))
       auc, update_op = metrics.auc(labels, predictions, weights=weights)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.7, sess.run(update_op), 5)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.7, self.evaluate(update_op), 5)
 
       self.assertAlmostEqual(0.7, auc.eval(), 5)
 
@@ -1134,10 +1135,10 @@ class AUCTest(test.TestCase):
       auc, update_op = metrics.auc(labels, predictions, curve='PR',
                                    summation_method='careful_interpolation')
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       # expected ~= 0.79726744594
       expected = 1 - math.log(1.5) / 2
-      self.assertAlmostEqual(expected, sess.run(update_op), delta=1e-3)
+      self.assertAlmostEqual(expected, self.evaluate(update_op), delta=1e-3)
       self.assertAlmostEqual(expected, auc.eval(), delta=1e-3)
 
   def testCorrectAnotherAUCPRSpecialCase(self):
@@ -1150,10 +1151,10 @@ class AUCTest(test.TestCase):
       auc, update_op = metrics.auc(labels, predictions, curve='PR',
                                    summation_method='careful_interpolation')
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       # expected ~= 0.61350593198
       expected = (2.5 - 2 * math.log(4./3) - 0.25 * math.log(7./5)) / 3
-      self.assertAlmostEqual(expected, sess.run(update_op), delta=1e-3)
+      self.assertAlmostEqual(expected, self.evaluate(update_op), delta=1e-3)
       self.assertAlmostEqual(expected, auc.eval(), delta=1e-3)
 
   def testThirdCorrectAUCPRSpecialCase(self):
@@ -1166,10 +1167,10 @@ class AUCTest(test.TestCase):
       auc, update_op = metrics.auc(labels, predictions, curve='PR',
                                    summation_method='careful_interpolation')
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       # expected ~= 0.90410597584
       expected = 1 - math.log(4./3) / 3
-      self.assertAlmostEqual(expected, sess.run(update_op), delta=1e-3)
+      self.assertAlmostEqual(expected, self.evaluate(update_op), delta=1e-3)
       self.assertAlmostEqual(expected, auc.eval(), delta=1e-3)
 
   def testIncorrectAUCPRSpecialCase(self):
@@ -1180,8 +1181,8 @@ class AUCTest(test.TestCase):
       auc, update_op = metrics.auc(labels, predictions, curve='PR',
                                    summation_method='trapezoidal')
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.79166, self.evaluate(update_op), delta=1e-3)
 
       self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3)
 
@@ -1195,8 +1196,8 @@ class AUCTest(test.TestCase):
       auc, update_op = metrics.auc(labels, predictions, curve='PR',
                                    summation_method='trapezoidal')
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.610317, self.evaluate(update_op), delta=1e-3)
 
       self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3)
 
@@ -1210,8 +1211,8 @@ class AUCTest(test.TestCase):
       auc, update_op = metrics.auc(labels, predictions, curve='PR',
                                    summation_method='trapezoidal')
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.90277, self.evaluate(update_op), delta=1e-3)
 
       self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-3)
 
@@ -1223,8 +1224,8 @@ class AUCTest(test.TestCase):
       labels = constant_op.constant(1 - inputs, dtype=dtypes_lib.float32)
       auc, update_op = metrics.auc(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0, self.evaluate(update_op))
 
       self.assertAlmostEqual(0, auc.eval())
 
@@ -1234,8 +1235,8 @@ class AUCTest(test.TestCase):
       labels = array_ops.zeros([4])
       auc, update_op = metrics.auc(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(1, sess.run(update_op), 6)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(1, self.evaluate(update_op), 6)
 
       self.assertAlmostEqual(1, auc.eval(), 6)
 
@@ -1245,8 +1246,8 @@ class AUCTest(test.TestCase):
       labels = array_ops.ones([4])
       auc, update_op = metrics.auc(labels, predictions, curve='PR')
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(1, sess.run(update_op), 6)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(1, self.evaluate(update_op), 6)
 
       self.assertAlmostEqual(1, auc.eval(), 6)
 
@@ -1317,9 +1318,9 @@ class AUCTest(test.TestCase):
                                      num_thresholds=500,
                                      weights=tf_weights)
 
-        sess.run(variables.local_variables_initializer())
+        self.evaluate(variables.local_variables_initializer())
         for i in range(num_batches):
-          sess.run(update_op)
+          self.evaluate(update_op)
 
         # Since this is only approximate, we can't expect a 6 digits match.
         # Although with higher number of samples/thresholds we should see the
@@ -1371,11 +1372,11 @@ class SpecificityAtSensitivityTest(test.TestCase):
         labels, predictions, sensitivity=0.7)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_specificity = specificity.eval()
@@ -1391,8 +1392,8 @@ class SpecificityAtSensitivityTest(test.TestCase):
         labels, predictions, sensitivity=0.7)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(1, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(1, self.evaluate(update_op))
       self.assertEqual(1, specificity.eval())
 
   def testSomeCorrectHighSensitivity(self):
@@ -1406,8 +1407,8 @@ class SpecificityAtSensitivityTest(test.TestCase):
         labels, predictions, sensitivity=0.8)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(1.0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(1.0, self.evaluate(update_op))
       self.assertAlmostEqual(1.0, specificity.eval())
 
   def testSomeCorrectLowSensitivity(self):
@@ -1421,9 +1422,9 @@ class SpecificityAtSensitivityTest(test.TestCase):
         labels, predictions, sensitivity=0.4)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
-      self.assertAlmostEqual(0.6, sess.run(update_op))
+      self.assertAlmostEqual(0.6, self.evaluate(update_op))
       self.assertAlmostEqual(0.6, specificity.eval())
 
   def testWeighted1d_multipleLabelDtypes(self):
@@ -1440,9 +1441,9 @@ class SpecificityAtSensitivityTest(test.TestCase):
           labels, predictions, weights=weights, sensitivity=0.4)
 
       with self.cached_session() as sess:
-        sess.run(variables.local_variables_initializer())
+        self.evaluate(variables.local_variables_initializer())
 
-        self.assertAlmostEqual(0.6, sess.run(update_op))
+        self.assertAlmostEqual(0.6, self.evaluate(update_op))
         self.assertAlmostEqual(0.6, specificity.eval())
 
   def testWeighted2d(self):
@@ -1458,9 +1459,9 @@ class SpecificityAtSensitivityTest(test.TestCase):
         labels, predictions, weights=weights, sensitivity=0.4)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
-      self.assertAlmostEqual(8.0 / 15.0, sess.run(update_op))
+      self.assertAlmostEqual(8.0 / 15.0, self.evaluate(update_op))
       self.assertAlmostEqual(8.0 / 15.0, specificity.eval())
 
 
@@ -1508,11 +1509,11 @@ class SensitivityAtSpecificityTest(test.TestCase):
         labels, predictions, specificity=0.7)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_sensitivity = sensitivity.eval()
@@ -1528,8 +1529,8 @@ class SensitivityAtSpecificityTest(test.TestCase):
         labels, predictions, specificity=0.7)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(1, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(1, self.evaluate(update_op))
       self.assertEqual(1, specificity.eval())
 
   def testSomeCorrectHighSpecificity(self):
@@ -1543,8 +1544,8 @@ class SensitivityAtSpecificityTest(test.TestCase):
         labels, predictions, specificity=0.8)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.8, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.8, self.evaluate(update_op))
       self.assertAlmostEqual(0.8, specificity.eval())
 
   def testSomeCorrectLowSpecificity(self):
@@ -1558,8 +1559,8 @@ class SensitivityAtSpecificityTest(test.TestCase):
         labels, predictions, specificity=0.4)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(0.6, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(0.6, self.evaluate(update_op))
       self.assertAlmostEqual(0.6, specificity.eval())
 
   def testWeighted_multipleLabelDtypes(self):
@@ -1577,8 +1578,8 @@ class SensitivityAtSpecificityTest(test.TestCase):
           labels, predictions, weights=weights, specificity=0.4)
 
       with self.cached_session() as sess:
-        sess.run(variables.local_variables_initializer())
-        self.assertAlmostEqual(0.675, sess.run(update_op))
+        self.evaluate(variables.local_variables_initializer())
+        self.assertAlmostEqual(0.675, self.evaluate(update_op))
         self.assertAlmostEqual(0.675, specificity.eval())
 
 
@@ -1639,14 +1640,14 @@ class PrecisionRecallThresholdsTest(test.TestCase):
     rec, rec_op = metrics.recall_at_thresholds(labels, predictions, thresholds)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates, then verify idempotency.
-      sess.run([prec_op, rec_op])
+      self.evaluate([prec_op, rec_op])
       initial_prec = prec.eval()
       initial_rec = rec.eval()
       for _ in range(10):
-        sess.run([prec_op, rec_op])
+        self.evaluate([prec_op, rec_op])
         self.assertAllClose(initial_prec, prec.eval())
         self.assertAllClose(initial_rec, rec.eval())
 
@@ -1663,8 +1664,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       rec, rec_op = metrics.recall_at_thresholds(labels, predictions,
                                                  thresholds)
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([prec_op, rec_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([prec_op, rec_op])
 
       self.assertEqual(1, prec.eval())
       self.assertEqual(1, rec.eval())
@@ -1683,8 +1684,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
         rec, rec_op = metrics.recall_at_thresholds(labels, predictions,
                                                    thresholds)
 
-        sess.run(variables.local_variables_initializer())
-        sess.run([prec_op, rec_op])
+        self.evaluate(variables.local_variables_initializer())
+        self.evaluate([prec_op, rec_op])
 
         self.assertAlmostEqual(0.5, prec.eval())
         self.assertAlmostEqual(0.5, rec.eval())
@@ -1701,8 +1702,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       rec, rec_op = metrics.recall_at_thresholds(labels, predictions,
                                                  thresholds)
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([prec_op, rec_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([prec_op, rec_op])
 
       self.assertAlmostEqual(0, prec.eval())
       self.assertAlmostEqual(0, rec.eval())
@@ -1729,8 +1730,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       rec_low = array_ops.reshape(rec_low, shape=())
       rec_high = array_ops.reshape(rec_high, shape=())
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([prec_op, rec_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([prec_op, rec_op])
 
       self.assertAlmostEqual(1.0, prec_low.eval(), places=5)
       self.assertAlmostEqual(0.0, prec_high.eval(), places=5)
@@ -1759,8 +1760,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       rec_low = array_ops.reshape(rec_low, shape=())
       rec_high = array_ops.reshape(rec_high, shape=())
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([prec_op, rec_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([prec_op, rec_op])
 
       self.assertAlmostEqual(1.0, prec_low.eval(), places=5)
       self.assertAlmostEqual(0.0, prec_high.eval(), places=5)
@@ -1783,8 +1784,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       [rec_low, rec_high] = array_ops.split(
           value=rec, num_or_size_splits=2, axis=0)
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([prec_op, rec_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([prec_op, rec_op])
 
       self.assertAlmostEqual(0.75, prec_low.eval())
       self.assertAlmostEqual(0.0, prec_high.eval())
@@ -1801,8 +1802,8 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       rec, rec_op = metrics.recall_at_thresholds(labels, predictions,
                                                  thresholds)
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([prec_op, rec_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([prec_op, rec_op])
 
       self.assertAlmostEqual(0, prec.eval(), 6)
       self.assertAlmostEqual(0, rec.eval(), 6)
@@ -1869,9 +1870,9 @@ class PrecisionRecallThresholdsTest(test.TestCase):
       rec, rec_op = metrics.recall_at_thresholds(tf_labels, tf_predictions,
                                                  thresholds)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(int(num_samples / batch_size)):
-        sess.run([prec_op, rec_op])
+        self.evaluate([prec_op, rec_op])
       # Since this is only approximate, we can't expect a 6 digits match.
       # Although with higher number of samples/thresholds we should see the
       # accuracy improving
@@ -2802,11 +2803,11 @@ class MeanAbsoluteErrorTest(test.TestCase):
     error, update_op = metrics.mean_absolute_error(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_error = error.eval()
@@ -2823,8 +2824,8 @@ class MeanAbsoluteErrorTest(test.TestCase):
     error, update_op = metrics.mean_absolute_error(labels, predictions, weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(3, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(3, self.evaluate(update_op))
       self.assertEqual(3, error.eval())
 
 
@@ -2867,11 +2868,11 @@ class MeanRelativeErrorTest(test.TestCase):
                                                    normalizer)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_error = error.eval()
@@ -2892,8 +2893,8 @@ class MeanRelativeErrorTest(test.TestCase):
         labels, predictions, normalizer=labels)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(expected_error, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(expected_error, self.evaluate(update_op))
       self.assertEqual(expected_error, error.eval())
 
   def testSingleUpdateNormalizedByZeros(self):
@@ -2908,8 +2909,8 @@ class MeanRelativeErrorTest(test.TestCase):
         labels, predictions, normalizer=array_ops.zeros_like(labels))
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(0.0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(0.0, self.evaluate(update_op))
       self.assertEqual(0.0, error.eval())
 
 
@@ -2946,11 +2947,11 @@ class MeanSquaredErrorTest(test.TestCase):
     error, update_op = metrics.mean_squared_error(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_error = error.eval()
@@ -2964,8 +2965,8 @@ class MeanSquaredErrorTest(test.TestCase):
     error, update_op = metrics.mean_squared_error(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(0, self.evaluate(update_op))
       self.assertEqual(0, error.eval())
 
   def testSingleUpdateWithError(self):
@@ -2977,8 +2978,8 @@ class MeanSquaredErrorTest(test.TestCase):
     error, update_op = metrics.mean_squared_error(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(6, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(6, self.evaluate(update_op))
       self.assertEqual(6, error.eval())
 
   def testSingleUpdateWithErrorAndWeights(self):
@@ -2991,8 +2992,8 @@ class MeanSquaredErrorTest(test.TestCase):
     error, update_op = metrics.mean_squared_error(labels, predictions, weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(13, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(13, self.evaluate(update_op))
       self.assertEqual(13, error.eval())
 
   def testMultipleBatchesOfSizeOne(self):
@@ -3013,9 +3014,9 @@ class MeanSquaredErrorTest(test.TestCase):
 
       error, update_op = metrics.mean_squared_error(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
-      sess.run(update_op)
-      self.assertAlmostEqual(208.0 / 6, sess.run(update_op), 5)
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate(update_op)
+      self.assertAlmostEqual(208.0 / 6, self.evaluate(update_op), 5)
 
       self.assertAlmostEqual(208.0 / 6, error.eval(), 5)
 
@@ -3054,11 +3055,11 @@ class MeanSquaredErrorTest(test.TestCase):
       mse1, update_op1 = metrics.mean_squared_error(
           labels1, predictions1, name='msd1')
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([update_op0, update_op1])
-      sess.run([update_op0, update_op1])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([update_op0, update_op1])
+      self.evaluate([update_op0, update_op1])
 
-      mse0, mse1 = sess.run([mse0, mse1])
+      mse0, mse1 = self.evaluate([mse0, mse1])
       self.assertAlmostEqual(208.0 / 6, mse0, 5)
       self.assertAlmostEqual(79.0 / 6, mse1, 5)
 
@@ -3081,9 +3082,9 @@ class MeanSquaredErrorTest(test.TestCase):
       mae, ma_update_op = metrics.mean_absolute_error(labels, predictions)
       mse, ms_update_op = metrics.mean_squared_error(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([ma_update_op, ms_update_op])
-      sess.run([ma_update_op, ms_update_op])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([ma_update_op, ms_update_op])
+      self.evaluate([ma_update_op, ms_update_op])
 
       self.assertAlmostEqual(32.0 / 6, mae.eval(), 5)
       self.assertAlmostEqual(208.0 / 6, mse.eval(), 5)
@@ -3123,11 +3124,11 @@ class RootMeanSquaredErrorTest(test.TestCase):
     error, update_op = metrics.root_mean_squared_error(labels, predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_error = error.eval()
@@ -3142,8 +3143,8 @@ class RootMeanSquaredErrorTest(test.TestCase):
 
       rmse, update_op = metrics.root_mean_squared_error(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(0, self.evaluate(update_op))
 
       self.assertEqual(0, rmse.eval())
 
@@ -3156,7 +3157,7 @@ class RootMeanSquaredErrorTest(test.TestCase):
 
       rmse, update_op = metrics.root_mean_squared_error(labels, predictions)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAlmostEqual(math.sqrt(6), update_op.eval(), 5)
       self.assertAlmostEqual(math.sqrt(6), rmse.eval(), 5)
 
@@ -3171,8 +3172,8 @@ class RootMeanSquaredErrorTest(test.TestCase):
       rmse, update_op = metrics.root_mean_squared_error(labels, predictions,
                                                         weights)
 
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(math.sqrt(13), sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(math.sqrt(13), self.evaluate(update_op))
 
       self.assertAlmostEqual(math.sqrt(13), rmse.eval(), 5)
 
@@ -3221,11 +3222,11 @@ class MeanCosineDistanceTest(test.TestCase):
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=1)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_error = error.eval()
@@ -3243,8 +3244,8 @@ class MeanCosineDistanceTest(test.TestCase):
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(0, self.evaluate(update_op))
       self.assertEqual(0, error.eval())
 
   def testSingleUpdateWithError1(self):
@@ -3259,8 +3260,8 @@ class MeanCosineDistanceTest(test.TestCase):
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(1, sess.run(update_op), 5)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(1, self.evaluate(update_op), 5)
       self.assertAlmostEqual(1, error.eval(), 5)
 
   def testSingleUpdateWithError2(self):
@@ -3280,8 +3281,8 @@ class MeanCosineDistanceTest(test.TestCase):
     error, update_op = metrics.mean_cosine_distance(labels, predictions, dim=2)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertAlmostEqual(1.0, sess.run(update_op), 5)
+      self.evaluate(variables.local_variables_initializer())
+      self.assertAlmostEqual(1.0, self.evaluate(update_op), 5)
       self.assertAlmostEqual(1.0, error.eval(), 5)
 
   def testSingleUpdateWithErrorAndWeights1(self):
@@ -3299,8 +3300,8 @@ class MeanCosineDistanceTest(test.TestCase):
         labels, predictions, dim=2, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
-      self.assertEqual(0, sess.run(update_op))
+      self.evaluate(variables.local_variables_initializer())
+      self.assertEqual(0, self.evaluate(update_op))
       self.assertEqual(0, error.eval())
 
   def testSingleUpdateWithErrorAndWeights2(self):
@@ -3318,7 +3319,7 @@ class MeanCosineDistanceTest(test.TestCase):
         labels, predictions, dim=2, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertEqual(1.5, update_op.eval())
       self.assertEqual(1.5, error.eval())
 
@@ -3360,10 +3361,10 @@ class PcntBelowThreshTest(test.TestCase):
       pcnt1, update_op1 = metrics.percentage_below(values, 7, name='medium')
       pcnt2, update_op2 = metrics.percentage_below(values, 1, name='low')
 
-      sess.run(variables.local_variables_initializer())
-      sess.run([update_op0, update_op1, update_op2])
+      self.evaluate(variables.local_variables_initializer())
+      self.evaluate([update_op0, update_op1, update_op2])
 
-      pcnt0, pcnt1, pcnt2 = sess.run([pcnt0, pcnt1, pcnt2])
+      pcnt0, pcnt1, pcnt2 = self.evaluate([pcnt0, pcnt1, pcnt2])
       self.assertAlmostEqual(1.0, pcnt0, 5)
       self.assertAlmostEqual(0.75, pcnt1, 5)
       self.assertAlmostEqual(0.0, pcnt2, 5)
@@ -3382,11 +3383,11 @@ class PcntBelowThreshTest(test.TestCase):
       pcnt2, update_op2 = metrics.percentage_below(
           values, 1, weights=weights, name='low')
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertListEqual([1.0, 0.5, 0.0],
-                           sess.run([update_op0, update_op1, update_op2]))
+                           self.evaluate([update_op0, update_op1, update_op2]))
 
-      pcnt0, pcnt1, pcnt2 = sess.run([pcnt0, pcnt1, pcnt2])
+      pcnt0, pcnt1, pcnt2 = self.evaluate([pcnt0, pcnt1, pcnt2])
       self.assertAlmostEqual(1.0, pcnt0, 5)
       self.assertAlmostEqual(0.5, pcnt1, 5)
       self.assertAlmostEqual(0.0, pcnt2, 5)
@@ -3446,11 +3447,11 @@ class MeanIOUTest(test.TestCase):
         labels, predictions, num_classes=num_classes)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_mean_iou = mean_iou.eval()
@@ -3482,9 +3483,9 @@ class MeanIOUTest(test.TestCase):
 
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(5):
-        sess.run(update_op)
+        self.evaluate(update_op)
       desired_output = np.mean([1.0 / 2.0, 1.0 / 4.0, 0.])
       self.assertEqual(desired_output, miou.eval())
 
@@ -3529,7 +3530,7 @@ class MeanIOUTest(test.TestCase):
 
       variables.local_variables_initializer().run()
       for _ in range(6):
-        sess.run(update_op)
+        self.evaluate(update_op)
       desired_output = np.mean([2.0 / 3.0, 1.0 / 2.0])
       self.assertAlmostEqual(desired_output, mean_iou.eval())
 
@@ -3563,9 +3564,9 @@ class MeanIOUTest(test.TestCase):
 
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(5):
-        sess.run(update_op)
+        self.evaluate(update_op)
       desired_output = np.mean([1.0 / 3.0, 2.0 / 4.0])
       self.assertAlmostEqual(desired_output, miou.eval())
 
@@ -3587,7 +3588,7 @@ class MeanIOUTest(test.TestCase):
     num_classes = 2
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       confusion_matrix = update_op.eval()
       self.assertAllEqual([[3, 0], [2, 5]], confusion_matrix)
       desired_miou = np.mean([3. / 5., 5. / 7.])
@@ -3599,7 +3600,7 @@ class MeanIOUTest(test.TestCase):
     num_classes = 1
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertEqual(40, update_op.eval()[0])
       self.assertEqual(1.0, miou.eval())
 
@@ -3609,7 +3610,7 @@ class MeanIOUTest(test.TestCase):
     num_classes = 2
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual([[0, 0], [40, 0]], update_op.eval())
       self.assertEqual(0., miou.eval())
 
@@ -3640,7 +3641,7 @@ class MeanIOUTest(test.TestCase):
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(
           labels, predictions, num_classes, weights=weights)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual([[2, 0], [2, 4]], update_op.eval())
       desired_miou = np.mean([2. / 4., 4. / 6.])
       self.assertAlmostEqual(desired_miou, miou.eval())
@@ -3659,7 +3660,7 @@ class MeanIOUTest(test.TestCase):
     num_classes = 3
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual([[7, 4, 3], [3, 5, 2], [0, 0, 0]], update_op.eval())
       self.assertAlmostEqual(
           1 / 3 * (7 / (7 + 3 + 7) + 5 / (5 + 4 + 5) + 0 / (0 + 5 + 0)),
@@ -3671,7 +3672,7 @@ class MeanIOUTest(test.TestCase):
     num_classes = 2
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual([[1, 0], [0, 0]], update_op.eval())
       self.assertAlmostEqual(1, miou.eval())
 
@@ -3689,7 +3690,7 @@ class MeanIOUTest(test.TestCase):
     num_classes = 3
     with self.cached_session() as sess:
       miou, update_op = metrics.mean_iou(labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual([[9, 5, 0], [3, 7, 0], [0, 0, 0]], update_op.eval())
       self.assertAlmostEqual(
           1 / 2 * (9 / (9 + 3 + 5) + 7 / (7 + 5 + 3)), miou.eval())
@@ -3752,11 +3753,11 @@ class MeanPerClassAccuracyTest(test.TestCase):
         labels, predictions, num_classes=num_classes)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
 
       # Run several updates.
       for _ in range(10):
-        sess.run(update_op)
+        self.evaluate(update_op)
 
       # Then verify idempotency.
       initial_mean_accuracy = mean_accuracy.eval()
@@ -3788,9 +3789,9 @@ class MeanPerClassAccuracyTest(test.TestCase):
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(5):
-        sess.run(update_op)
+        self.evaluate(update_op)
       desired_output = np.mean([1.0, 1.0 / 3.0, 0.0])
       self.assertAlmostEqual(desired_output, mean_accuracy.eval())
 
@@ -3835,7 +3836,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
 
       variables.local_variables_initializer().run()
       for _ in range(6):
-        sess.run(update_op)
+        self.evaluate(update_op)
       desired_output = np.mean([2.0 / 2.0, 0.5 / 1.5])
       self.assertAlmostEqual(desired_output, mean_accuracy.eval())
 
@@ -3870,9 +3871,9 @@ class MeanPerClassAccuracyTest(test.TestCase):
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes)
 
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       for _ in range(5):
-        sess.run(update_op)
+        self.evaluate(update_op)
       desired_output = np.mean([1.0 / 2.0, 2.0 / 3.0, 0.])
       self.assertAlmostEqual(desired_output, mean_accuracy.eval())
 
@@ -3883,7 +3884,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     with self.cached_session() as sess:
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertEqual(1.0, update_op.eval()[0])
       self.assertEqual(1.0, mean_accuracy.eval())
 
@@ -3894,7 +3895,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     with self.cached_session() as sess:
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual([0.0, 0.0], update_op.eval())
       self.assertEqual(0., mean_accuracy.eval())
 
@@ -3913,7 +3914,7 @@ class MeanPerClassAccuracyTest(test.TestCase):
     with self.cached_session() as sess:
       mean_accuracy, update_op = metrics.mean_per_class_accuracy(
           labels, predictions, num_classes, weights=weights)
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       desired_accuracy = np.array([2. / 2., 4. / 6.], dtype=np.float32)
       self.assertAllEqual(desired_accuracy, update_op.eval())
       desired_mean_accuracy = np.mean(desired_accuracy)
@@ -3945,7 +3946,7 @@ class FalseNegativesTest(test.TestCase):
         labels=labels, predictions=predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(3., tn_update_op.eval())
       self.assertAllClose(3., tn.eval())
@@ -3964,7 +3965,7 @@ class FalseNegativesTest(test.TestCase):
         labels=labels, predictions=predictions, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(5., tn_update_op.eval())
       self.assertAllClose(5., tn.eval())
@@ -3994,7 +3995,7 @@ class FalseNegativesAtThresholdsTest(test.TestCase):
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), fn.eval())
       self.assertAllEqual((0, 2, 3), fn_update_op.eval())
       self.assertAllEqual((0, 2, 3), fn.eval())
@@ -4013,7 +4014,7 @@ class FalseNegativesAtThresholdsTest(test.TestCase):
         thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), fn.eval())
       self.assertAllEqual((0.0, 8.0, 11.0), fn_update_op.eval())
       self.assertAllEqual((0.0, 8.0, 11.0), fn.eval())
@@ -4044,7 +4045,7 @@ class FalsePositivesTest(test.TestCase):
         labels=labels, predictions=predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(7., tn_update_op.eval())
       self.assertAllClose(7., tn.eval())
@@ -4063,7 +4064,7 @@ class FalsePositivesTest(test.TestCase):
         labels=labels, predictions=predictions, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(14., tn_update_op.eval())
       self.assertAllClose(14., tn.eval())
@@ -4093,7 +4094,7 @@ class FalsePositivesAtThresholdsTest(test.TestCase):
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), fp.eval())
       self.assertAllEqual((7, 4, 2), fp_update_op.eval())
       self.assertAllEqual((7, 4, 2), fp.eval())
@@ -4114,7 +4115,7 @@ class FalsePositivesAtThresholdsTest(test.TestCase):
         thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), fp.eval())
       self.assertAllEqual((125.0, 42.0, 12.0), fp_update_op.eval())
       self.assertAllEqual((125.0, 42.0, 12.0), fp.eval())
@@ -4145,7 +4146,7 @@ class TrueNegativesTest(test.TestCase):
         labels=labels, predictions=predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(3., tn_update_op.eval())
       self.assertAllClose(3., tn.eval())
@@ -4164,7 +4165,7 @@ class TrueNegativesTest(test.TestCase):
         labels=labels, predictions=predictions, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(4., tn_update_op.eval())
       self.assertAllClose(4., tn.eval())
@@ -4194,7 +4195,7 @@ class TrueNegativesAtThresholdsTest(test.TestCase):
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), tn.eval())
       self.assertAllEqual((2, 5, 7), tn_update_op.eval())
       self.assertAllEqual((2, 5, 7), tn.eval())
@@ -4213,7 +4214,7 @@ class TrueNegativesAtThresholdsTest(test.TestCase):
         thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), tn.eval())
       self.assertAllEqual((5.0, 15.0, 23.0), tn_update_op.eval())
       self.assertAllEqual((5.0, 15.0, 23.0), tn.eval())
@@ -4244,7 +4245,7 @@ class TruePositivesTest(test.TestCase):
         labels=labels, predictions=predictions)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(7., tn_update_op.eval())
       self.assertAllClose(7., tn.eval())
@@ -4263,7 +4264,7 @@ class TruePositivesTest(test.TestCase):
         labels=labels, predictions=predictions, weights=weights)
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllClose(0., tn.eval())
       self.assertAllClose(12., tn_update_op.eval())
       self.assertAllClose(12., tn.eval())
@@ -4293,7 +4294,7 @@ class TruePositivesAtThresholdsTest(test.TestCase):
         predictions=predictions, labels=labels, thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0, 0, 0), tp.eval())
       self.assertAllEqual((3, 1, 0), tp_update_op.eval())
       self.assertAllEqual((3, 1, 0), tp.eval())
@@ -4310,7 +4311,7 @@ class TruePositivesAtThresholdsTest(test.TestCase):
         thresholds=[0.15, 0.5, 0.85])
 
     with self.cached_session() as sess:
-      sess.run(variables.local_variables_initializer())
+      self.evaluate(variables.local_variables_initializer())
       self.assertAllEqual((0.0, 0.0, 0.0), tp.eval())
       self.assertAllEqual((111.0, 37.0, 0.0), tp_update_op.eval())
       self.assertAllEqual((111.0, 37.0, 0.0), tp.eval())
diff --git a/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py
index 15e38265421..87f1991aa78 100644
--- a/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/neon_depthwise_conv_op_test.py
@@ -142,8 +142,8 @@ class DepthwiseConv2DTest(test.TestCase):
       conv_interface = nn_impl.depthwise_conv2d(
           t1, t2, strides=[1, stride, stride, 1], padding=padding)
 
-      native_result = sess.run(conv_native)
-      interface_result = sess.run(conv_interface)
+      native_result = self.evaluate(conv_native)
+      interface_result = self.evaluate(conv_interface)
 
     print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes,
           ", stride:", stride, ", padding: ", padding, ", max diff: ",
@@ -211,7 +211,7 @@ class DepthwiseConv2DTest(test.TestCase):
         t2 = constant_op.constant(x2, shape=filter_in_sizes)
         conv = nn_ops.depthwise_conv2d_native(
             t1, t2, strides=[1, stride, stride, 1], padding=padding)
-        value = sess.run(conv)
+        value = self.evaluate(conv)
     print("value = ", value)
     self.assertAllClose(expected, np.ravel(value), 1e-5)
     self.assertShapeEqual(value, conv)
diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py
index e202b6e8a43..5ff0c58bf1b 100644
--- a/tensorflow/python/kernel_tests/norm_op_test.py
+++ b/tensorflow/python/kernel_tests/norm_op_test.py
@@ -70,7 +70,7 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_):
         tf_matrix = constant_op.constant(matrix)
         tf_norm = linalg_ops.norm(
             tf_matrix, ord=ord_, axis=axis_, keepdims=keep_dims_)
-        tf_norm_val = sess.run(tf_norm)
+        tf_norm_val = self.evaluate(tf_norm)
       else:
         tf_matrix = array_ops.placeholder(dtype_)
         tf_norm = linalg_ops.norm(
diff --git a/tensorflow/python/kernel_tests/nth_element_op_test.py b/tensorflow/python/kernel_tests/nth_element_op_test.py
index 338b6cec010..6cd49746710 100644
--- a/tensorflow/python/kernel_tests/nth_element_op_test.py
+++ b/tensorflow/python/kernel_tests/nth_element_op_test.py
@@ -35,7 +35,7 @@ class NthElementTest(test.TestCase):
     with self.cached_session(use_gpu=False) as sess:
       inputs_op = ops.convert_to_tensor(inputs, dtype=dtype)
       values_op = nn_ops.nth_element(inputs_op, n, reverse=reverse)
-      values = sess.run(values_op)
+      values = self.evaluate(values_op)
 
       self.assertShapeEqual(np_expected_values, values_op)
       self.assertAllClose(np_expected_values, values)
diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
index 520b6633759..b4818360d57 100644
--- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
@@ -126,7 +126,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       threads = [
           self.checkedThread(
@@ -158,7 +158,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       results = []
 
       def dequeue():
-        results.append(sess.run(dequeued_t))
+        results.append(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in enqueue_ops]
       for thread in threads:
@@ -193,13 +193,13 @@ class PaddingFIFOQueueTest(test.TestCase):
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
         for enqueue_op in enqueue_ops:
-          sess.run(enqueue_op)
+          self.evaluate(enqueue_op)
 
       results = []
 
       def dequeue():
         for _ in xrange(len(elems)):
-          results.append(sess.run(dequeued_t))
+          results.append(self.evaluate(dequeued_t))
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -224,7 +224,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         enqueue_op.run()
 
       for i in xrange(len(elems)):
-        x_val, y_val = sess.run(dequeued_t)
+        x_val, y_val = self.evaluate(dequeued_t)
         x, y = elems[i]
         self.assertEqual([x], x_val)
         self.assertEqual([y], y_val)
@@ -327,7 +327,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       for i in range(8):
-        float_val, int_val = sess.run(dequeued_t)
+        float_val, int_val = self.evaluate(dequeued_t)
         self.assertEqual(float_elems[i % 4], float_val)
         self.assertAllEqual(int_elems[i % 4], int_val)
 
@@ -344,7 +344,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       for i in range(8):
-        float_val, int_val = sess.run(dequeued_t)
+        float_val, int_val = self.evaluate(dequeued_t)
         self.assertEqual(float_elems[i % 4], float_val)
         self.assertAllEqual(int_elems[i % 4], int_val)
 
@@ -387,17 +387,17 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       enqueue_op.run()
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[0:4], float_val)
       self.assertAllEqual(int_elems[0:4], int_val)
       self.assertEqual(float_val.shape, dequeued_t[0].get_shape())
       self.assertEqual(int_val.shape, dequeued_t[1].get_shape())
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[4:8], float_val)
       self.assertAllEqual(int_elems[4:8], int_val)
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       self.assertAllEqual(float_elems[8], float_val)
       self.assertAllEqual(int_elems[8], int_val)
       self.assertEqual(float_val.shape, dequeued_single_t[0].get_shape())
@@ -418,7 +418,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       enqueue_op.run()
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[0:4], float_val)
       self.assertAllEqual(int_elems[0:4], int_val)
       self.assertTrue(
@@ -428,11 +428,11 @@ class PaddingFIFOQueueTest(test.TestCase):
           tensor_shape.TensorShape(int_val.shape).is_compatible_with(dequeued_t[
               1].get_shape()))
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertAllEqual(float_elems[4:8], float_val)
       self.assertAllEqual(int_elems[4:8], int_val)
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       self.assertAllEqual(float_elems[8], float_val)
       self.assertAllEqual(int_elems[8], int_val)
       self.assertTrue(
@@ -459,7 +459,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       for enqueue_op in enqueue_ops:
         enqueue_op.run()
-      string_val, int_val = sess.run(dequeued_t)
+      string_val, int_val = self.evaluate(dequeued_t)
 
       self.assertAllEqual([[b"a", b"", b""], [b"ab", b"", b""],
                            [b"abc", b"", b""], [b"abc", b"d", b""],
@@ -473,7 +473,7 @@ class PaddingFIFOQueueTest(test.TestCase):
           tensor_shape.TensorShape(int_val.shape).is_compatible_with(dequeued_t[
               1].get_shape()))
 
-      string_val, int_val = sess.run(dequeued_single_t)
+      string_val, int_val = self.evaluate(dequeued_single_t)
       self.assertAllEqual([b"abc", b"d", b"e", b"f"], string_val)
       self.assertAllEqual([[1, 2, 3, 4]], int_val)
       self.assertTrue(
@@ -500,7 +500,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       for enqueue_op in enqueue_ops:
         enqueue_op.run()
-      string_val, int_val = sess.run(dequeued_t)
+      string_val, int_val = self.evaluate(dequeued_t)
 
       self.assertAllEqual([[b"a", b"", b""], [b"ab", b"", b""],
                            [b"abc", b"", b""], [b"abc", b"d", b""],
@@ -514,7 +514,7 @@ class PaddingFIFOQueueTest(test.TestCase):
           tensor_shape.TensorShape(int_val.shape).is_compatible_with(dequeued_t[
               1].get_shape()))
 
-      string_val, int_val = sess.run(dequeued_single_t)
+      string_val, int_val = self.evaluate(dequeued_single_t)
       self.assertAllEqual([b"abc", b"d", b"e", b"f"], string_val)
       self.assertAllEqual([[1, 2, 3, 4]], int_val)
       self.assertTrue(
@@ -633,7 +633,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       # Enqueue 100 items in parallel on 10 threads.
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       threads = [self.checkedThread(target=enqueue) for _ in range(10)]
       for thread in threads:
@@ -656,7 +656,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t))
+        dequeued_elems.extend(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in range(10)]
       for thread in threads:
@@ -680,7 +680,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t))
+        dequeued_elems.extend(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in range(10)]
       for thread in threads:
@@ -700,11 +700,11 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       def enqueue():
         for _ in xrange(100):
-          sess.run(enqueue_op)
+          self.evaluate(enqueue_op)
 
       def dequeue():
         for _ in xrange(100):
-          self.assertTrue(sess.run(dequeued_t) in (10.0, 20.0))
+          self.assertTrue(self.evaluate(dequeued_t) in (10.0, 20.0))
 
       enqueue_threads = [self.checkedThread(target=enqueue) for _ in range(10)]
       dequeue_threads = [self.checkedThread(target=dequeue) for _ in range(10)]
@@ -736,7 +736,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       def dequeue():
         for i in xrange(250):
-          self.assertEqual(i, sess.run(dequeued_t))
+          self.assertEqual(i, self.evaluate(dequeued_t))
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -767,7 +767,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       dequeuemany_t = q.dequeue_many(count_placeholder)
 
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       enqueue_thread = self.checkedThread(target=enqueue)
       enqueue_thread.start()
@@ -805,10 +805,10 @@ class PaddingFIFOQueueTest(test.TestCase):
         # The enqueue_op should run after the dequeue op has blocked.
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t).tolist())
+        dequeued_elems.extend(self.evaluate(dequeued_t).tolist())
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -832,10 +832,10 @@ class PaddingFIFOQueueTest(test.TestCase):
         # The enqueue_op should run after the dequeue op has blocked.
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t).tolist())
+        dequeued_elems.extend(self.evaluate(dequeued_t).tolist())
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -901,11 +901,11 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       def dequeue():
         for elem in elems:
-          self.assertEqual([elem], sess.run(dequeued_t))
+          self.assertEqual([elem], self.evaluate(dequeued_t))
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -926,8 +926,8 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def dequeue():
-        self.assertAllEqual(elems[:3], sess.run(dequeued_t))
-        self.assertAllEqual(elems[3:], sess.run(dequeued_t))
+        self.assertAllEqual(elems[:3], self.evaluate(dequeued_t))
+        self.assertAllEqual(elems[3:], self.evaluate(dequeued_t))
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -947,7 +947,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -968,11 +968,11 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def dequeue():
-        self.assertAllEqual(elems, sess.run(dequeued_t))
+        self.assertAllEqual(elems, self.evaluate(dequeued_t))
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -993,11 +993,11 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def dequeue():
-        self.assertAllEqual(elems[:3], sess.run(dequeued_t))
+        self.assertAllEqual(elems[:3], self.evaluate(dequeued_t))
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -1017,16 +1017,16 @@ class PaddingFIFOQueueTest(test.TestCase):
       cleanup_dequeue_t = q.dequeue()
 
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        self.assertAllEqual(elems[0:3], sess.run(dequeued_t))
+        self.assertAllEqual(elems[0:3], self.evaluate(dequeued_t))
         with self.assertRaises(errors_impl.OutOfRangeError):
-          sess.run(dequeued_t)
-        self.assertEqual(elems[3], sess.run(cleanup_dequeue_t))
+          self.evaluate(dequeued_t)
+        self.assertEqual(elems[3], self.evaluate(cleanup_dequeue_t))
 
       def close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       enqueue_thread = self.checkedThread(target=enqueue)
       enqueue_thread.start()
@@ -1059,7 +1059,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       def dequeue():
         with self.assertRaises(errors_impl.OutOfRangeError):
-          sess.run([dequeued_a_t, dequeued_b_t])
+          self.evaluate([dequeued_a_t, dequeued_b_t])
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -1072,7 +1072,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       # Test that the elements in the partially-dequeued batch are
       # restored in the correct order.
       for elem_a, elem_b in zip(elems_a, elems_b):
-        val_a, val_b = sess.run([cleanup_dequeue_a_t, cleanup_dequeue_b_t])
+        val_a, val_b = self.evaluate([cleanup_dequeue_a_t, cleanup_dequeue_b_t])
         self.assertEqual(elem_a, val_a)
         self.assertEqual(elem_b, val_b)
       self.assertEqual(0, q.size().eval())
@@ -1087,7 +1087,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -1107,7 +1107,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -1155,7 +1155,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread = self.checkedThread(target=blocking_enqueue)
       thread.start()
@@ -1178,7 +1178,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread = self.checkedThread(target=blocking_enqueue)
       thread.start()
@@ -1207,7 +1207,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       def blocking_enqueue():
         # Expect the operation to succeed once the dequeue op runs.
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       enqueue_thread = self.checkedThread(target=blocking_enqueue)
       enqueue_thread.start()
@@ -1217,7 +1217,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       time.sleep(0.1)
 
       def close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       close_thread = self.checkedThread(target=close)
       close_thread.start()
@@ -1242,7 +1242,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       enqueue_thread = self.checkedThread(target=blocking_enqueue)
       enqueue_thread.start()
@@ -1252,7 +1252,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       time.sleep(0.1)
 
       def close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       close_thread = self.checkedThread(target=close)
       close_thread.start()
@@ -1379,19 +1379,19 @@ class PaddingFIFOQueueTest(test.TestCase):
 
   def _blockingDequeue(self, sess, dequeue_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_op)
+      self.evaluate(dequeue_op)
 
   def _blockingDequeueMany(self, sess, dequeue_many_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_many_op)
+      self.evaluate(dequeue_many_op)
 
   def _blockingEnqueue(self, sess, enqueue_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(enqueue_op)
+      self.evaluate(enqueue_op)
 
   def _blockingEnqueueMany(self, sess, enqueue_many_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(enqueue_many_op)
+      self.evaluate(enqueue_many_op)
 
   def testResetOfBlockingOperation(self):
     with self.cached_session() as sess:
@@ -1434,7 +1434,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       def blocking_enqueue():
         enq_done.append(False)
         # This will fill the queue and then block until enough dequeues happen.
-        sess.run(enq)
+        self.evaluate(enq)
         enq_done.append(True)
 
       thread = self.checkedThread(target=blocking_enqueue)
@@ -1444,14 +1444,14 @@ class PaddingFIFOQueueTest(test.TestCase):
       results = []
       results.append(deq.eval())  # Will only complete after the enqueue starts.
       self.assertEqual(len(enq_done), 1)
-      self.assertEqual(sess.run(size_op), 5)
+      self.assertEqual(self.evaluate(size_op), 5)
 
       for _ in range(3):
         results.append(deq.eval())
 
       time.sleep(0.1)
       self.assertEqual(len(enq_done), 1)
-      self.assertEqual(sess.run(size_op), 5)
+      self.assertEqual(self.evaluate(size_op), 5)
 
       # This dequeue will unblock the thread.
       results.append(deq.eval())
@@ -1477,7 +1477,7 @@ class PaddingFIFOQueueTest(test.TestCase):
 
       def blocking_dequeue():
         # Will only complete after 4 enqueues complete.
-        results.extend(sess.run(deq))
+        results.extend(self.evaluate(deq))
 
       thread = self.checkedThread(target=blocking_dequeue)
       thread.start()
@@ -1486,7 +1486,7 @@ class PaddingFIFOQueueTest(test.TestCase):
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
         self.assertEqual(len(results), 0)
-        sess.run(enq)
+        self.evaluate(enq)
 
       # Enough enqueued to unblock the dequeue
       thread.join()
@@ -1517,7 +1517,7 @@ class PaddingFIFOQueueTest(test.TestCase):
       q.enqueue_many(input_tuple).run()
 
       output_tuple_t = q.dequeue_many(32)
-      output_tuple = sess.run(output_tuple_t)
+      output_tuple = self.evaluate(output_tuple_t)
 
       for (input_elem, output_elem) in zip(input_tuple, output_tuple):
         self.assertAllEqual(input_elem, output_elem)
diff --git a/tensorflow/python/kernel_tests/parse_single_example_op_test.py b/tensorflow/python/kernel_tests/parse_single_example_op_test.py
index a84895a287e..3f500872827 100644
--- a/tensorflow/python/kernel_tests/parse_single_example_op_test.py
+++ b/tensorflow/python/kernel_tests/parse_single_example_op_test.py
@@ -107,7 +107,7 @@ class ParseExampleTest(test.TestCase):
         for result_dict in [out, out_with_example_name]:
           result = flatten_values_tensors_or_sparse(result_dict.values())
           # Check values.
-          tf_result = sess.run(result)
+          tf_result = self.evaluate(result)
           _compare_output_to_expected(self, result_dict, expected_values,
                                       tf_result)
 
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index 8f359bd32cd..1f677103dc6 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -101,7 +101,7 @@ class ParseExampleTest(test.TestCase):
         out = parsing_ops.parse_example(**kwargs)
         result = flatten_values_tensors_or_sparse(out.values())
         # Check values.
-        tf_result = sess.run(result)
+        tf_result = self.evaluate(result)
         _compare_output_to_expected(self, out, expected_values, tf_result)
 
       # Check shapes; if serialized is a Tensor we need its size to
@@ -1614,7 +1614,7 @@ class DecodeJSONExampleTest(test.TestCase):
           shape=examples.shape,
           dtype=dtypes.string)
       binary_tensor = parsing_ops.decode_json_example(json_tensor)
-      binary_val = sess.run(binary_tensor)
+      binary_val = self.evaluate(binary_tensor)
 
       if examples.shape:
         self.assertShapeEqual(binary_val, json_tensor)
@@ -1700,7 +1700,7 @@ class DecodeJSONExampleTest(test.TestCase):
       json_tensor = constant_op.constant(["{]"])
       binary_tensor = parsing_ops.decode_json_example(json_tensor)
       with self.assertRaisesOpError("Error while parsing JSON"):
-        sess.run(binary_tensor)
+        self.evaluate(binary_tensor)
 
 
 class ParseTensorOpTest(test.TestCase):
diff --git a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
index e393c7a0229..a8e962bc3a6 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
@@ -81,7 +81,7 @@ class PoolingTest(test.TestCase):
           data_format=data_format)
       if data_format == "NCDHW":
         t = test_util.NCHWToNHWC(t)
-      vals = sess.run(t)
+      vals = self.evaluate(t)
     # Verifies values.
     actual = vals.flatten()
     self.assertAllClose(expected, actual)
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 61628c4756f..81222719f2a 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -826,7 +826,7 @@ class PoolingTest(test.TestCase):
           strides=[1, 1, 1, 1],
           Targmax=dtypes.int64,
           padding="VALID")
-      out, argmax = sess.run([out_op, argmax_op])
+      out, argmax = self.evaluate([out_op, argmax_op])
       self.assertShapeEqual(out, out_op)
       self.assertShapeEqual(argmax, argmax_op)
       self.assertAllClose(out.ravel(), [1.0, 1.0, 1.0, 1.0])
diff --git a/tensorflow/python/kernel_tests/priority_queue_test.py b/tensorflow/python/kernel_tests/priority_queue_test.py
index 73a9c816382..9be682ea52f 100644
--- a/tensorflow/python/kernel_tests/priority_queue_test.py
+++ b/tensorflow/python/kernel_tests/priority_queue_test.py
@@ -50,7 +50,7 @@ class PriorityQueueTest(test.TestCase):
         enq.run()
 
       deq = q.dequeue_many(100)
-      deq_elem, deq_value_0, deq_value_1 = sess.run(deq)
+      deq_elem, deq_value_0, deq_value_1 = self.evaluate(deq)
 
       allowed = {}
       missed = set()
@@ -81,7 +81,7 @@ class PriorityQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       dequeue_op = q.dequeue_many(100)
 
@@ -93,7 +93,7 @@ class PriorityQueueTest(test.TestCase):
       for t in enqueue_threads:
         t.start()
 
-      deq_elem, deq_value_0, deq_value_1 = sess.run(dequeue_op)
+      deq_elem, deq_value_0, deq_value_1 = self.evaluate(dequeue_op)
 
       for t in enqueue_threads:
         t.join()
@@ -132,12 +132,12 @@ class PriorityQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       dequeued = []
 
       def dequeue(dequeue_op):
-        (dequeue_indices, dequeue_values) = sess.run(dequeue_op)
+        (dequeue_indices, dequeue_values) = self.evaluate(dequeue_op)
         self.assertAllEqual(dequeue_indices, dequeue_values)
         dequeued.extend(dequeue_indices)
 
@@ -184,10 +184,10 @@ class PriorityQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue(dequeue_op, dequeued):
-        (dequeue_indices, dequeue_values) = sess.run(dequeue_op)
+        (dequeue_indices, dequeue_values) = self.evaluate(dequeue_op)
         self.assertAllEqual(dequeue_indices, dequeue_values)
         dequeue_wait.acquire()
         dequeued.extend(dequeue_indices)
@@ -215,7 +215,7 @@ class PriorityQueueTest(test.TestCase):
 
       # We can't guarantee full sorting because we can't guarantee
       # that the dequeued.extend() call runs immediately after the
-      # sess.run() call.  Here we're just happy everything came out.
+      # self.evaluate() call.  Here we're just happy everything came out.
       self.assertAllEqual(set(dequeued), set(all_enqueued_values))
 
   def testRoundTripInsertManyMultiThreadedReadOnceSorts(self):
@@ -236,7 +236,7 @@ class PriorityQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       dequeue_op = q.dequeue_many(100)
 
@@ -248,7 +248,7 @@ class PriorityQueueTest(test.TestCase):
       for t in enqueue_threads:
         t.start()
 
-      deq_elem, deq_value_0, deq_value_1 = sess.run(dequeue_op)
+      deq_elem, deq_value_0, deq_value_1 = self.evaluate(dequeue_op)
 
       for t in enqueue_threads:
         t.join()
@@ -276,7 +276,7 @@ class PriorityQueueTest(test.TestCase):
       side_value_1 = np.random.rand(1000).astype(bytes)
       q.enqueue_many((elem, side_value_0, side_value_1)).run()
       deq = q.dequeue_many(1000)
-      deq_elem, deq_value_0, deq_value_1 = sess.run(deq)
+      deq_elem, deq_value_0, deq_value_1 = self.evaluate(deq)
 
       allowed = {}
       for e, v0, v1 in zip(elem, side_value_0, side_value_1):
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index b101da036e6..c9cbe44a7f3 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -307,9 +307,9 @@ class PyFuncTest(test.TestCase):
     with session_lib.Session() as sess:
       producer = iter(range(3))
       x, = script_ops.py_func(lambda: next(producer), [], [dtypes.int64])
-      self.assertEqual(sess.run(x), 0)
-      self.assertEqual(sess.run(x), 1)
-      self.assertEqual(sess.run(x), 2)
+      self.assertEqual(self.evaluate(x), 0)
+      self.assertEqual(self.evaluate(x), 1)
+      self.assertEqual(self.evaluate(x), 2)
 
   def testStateless(self):
     # Not using self.cached_session(), which disables optimization.
@@ -317,9 +317,9 @@ class PyFuncTest(test.TestCase):
       producer = iter(range(3))
       x, = script_ops.py_func(
           lambda: next(producer), [], [dtypes.int64], stateful=False)
-      self.assertEqual(sess.run(x), 0)
-      self.assertEqual(sess.run(x), 0)
-      self.assertEqual(sess.run(x), 0)
+      self.assertEqual(self.evaluate(x), 0)
+      self.assertEqual(self.evaluate(x), 0)
+      self.assertEqual(self.evaluate(x), 0)
 
   def testGradientFunction(self):
     # Input to tf.py_func is necessary, otherwise get_gradient_function()
@@ -390,7 +390,7 @@ class PyFuncTest(test.TestCase):
     f = script_ops.py_func(
         do_nothing, [constant_op.constant(3, dtypes.int64)], [], stateful=False)
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(f), [])
+      self.assertEqual(self.evaluate(f), [])
 
   def _testExceptionHandling(self, py_exp, tf_exp, eager=False):
 
diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py
index 617b7242043..305b5aa3645 100644
--- a/tensorflow/python/kernel_tests/qr_op_test.py
+++ b/tensorflow/python/kernel_tests/qr_op_test.py
@@ -60,7 +60,7 @@ class QrOpTest(test.TestCase):
             q1, r1 = linalg_ops.qr(matrix1, full_matrices=full_matrices_)
             q2, r2 = linalg_ops.qr(matrix2, full_matrices=full_matrices_)
             all_ops += [q1, r1, q2, r2]
-      val = sess.run(all_ops)
+      val = self.evaluate(all_ops)
       for i in range(8):
         q = 4 * i
         self.assertAllEqual(val[q], val[q + 2])  # q1 == q2
@@ -129,7 +129,7 @@ def _GetQrOpTest(dtype_, shape_, full_matrices_, use_static_shape_):
       q_tf, r_tf = linalg_ops.qr(x_tf, full_matrices=full_matrices_)
 
       if use_static_shape_:
-        q_tf_val, r_tf_val = sess.run([q_tf, r_tf])
+        q_tf_val, r_tf_val = self.evaluate([q_tf, r_tf])
       else:
         q_tf_val, r_tf_val = sess.run([q_tf, r_tf], feed_dict={x_tf: x_np})
 
diff --git a/tensorflow/python/kernel_tests/random/multinomial_op_big_test.py b/tensorflow/python/kernel_tests/random/multinomial_op_big_test.py
index 0023506b77a..cab841741e7 100644
--- a/tensorflow/python/kernel_tests/random/multinomial_op_big_test.py
+++ b/tensorflow/python/kernel_tests/random/multinomial_op_big_test.py
@@ -39,7 +39,7 @@ class MultinomialTest(test.TestCase):
           num_samples=1000000,
           seed=15)
       for _ in range(100):
-        x = sess.run(samples)
+        x = self.evaluate(samples)
         indices, counts = np.unique(x, return_counts=True)
         for index, count in zip(indices, counts):
           if index in counts_by_indices.keys():
@@ -57,7 +57,7 @@ class MultinomialTest(test.TestCase):
           num_samples=1000000,
           seed=15)
       for _ in range(100):
-        x = sess.run(samples)
+        x = self.evaluate(samples)
         indices, counts = np.unique(x, return_counts=True)
         for index, count in zip(indices, counts):
           if index in counts_by_indices.keys():
@@ -79,7 +79,7 @@ class MultinomialTest(test.TestCase):
       # we'll run out of memory if we try to draw 1e9 samples directly
       # really should fit in 12GB of memory...
       for _ in range(100):
-        x = sess.run(samples)
+        x = self.evaluate(samples)
         indices, counts = np.unique(x, return_counts=True)
         for index, count in zip(indices, counts):
           if index in counts_by_indices.keys():
diff --git a/tensorflow/python/kernel_tests/random/random_gamma_test.py b/tensorflow/python/kernel_tests/random/random_gamma_test.py
index 606e8862c47..d18e3feb045 100644
--- a/tensorflow/python/kernel_tests/random/random_gamma_test.py
+++ b/tensorflow/python/kernel_tests/random/random_gamma_test.py
@@ -48,7 +48,7 @@ class RandomGammaTest(test.TestCase):
             [num], alpha, beta=beta, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
         for i in xrange(10):
-          ret[i, :] = sess.run(rng)
+          ret[i, :] = self.evaluate(rng)
       return ret
 
     return func
diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py
index 6de894846bc..76618316b24 100644
--- a/tensorflow/python/kernel_tests/random/random_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/random_ops_test.py
@@ -49,9 +49,9 @@ class RandomOpTestCommon(test.TestCase):
         random_seed.set_random_seed(graph_seed)
       x = rng_func([num], min_or_mean, max_or_stddev, dtype=dtype, seed=op_seed)
 
-      y = sess.run(x)
-      z = sess.run(x)
-      w = sess.run(x)
+      y = self.evaluate(x)
+      z = self.evaluate(x)
+      w = self.evaluate(x)
 
       # We use exact equality here. If the random-number generator is producing
       # the same output, all three outputs will be bitwise identical.
@@ -69,7 +69,7 @@ class RandomNormalTest(RandomOpTestCommon):
             [num], mean=mu, stddev=sigma, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
         for i in xrange(10):
-          ret[i, :] = sess.run(rng)
+          ret[i, :] = self.evaluate(rng)
       return ret
 
     return func
@@ -160,7 +160,7 @@ class TruncatedNormalTest(test.TestCase):
             [num], mean=mu, stddev=sigma, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
         for i in xrange(10):
-          ret[i, :] = sess.run(rng)
+          ret[i, :] = self.evaluate(rng)
       return ret
 
     return func
@@ -256,7 +256,7 @@ class RandomUniformTest(RandomOpTestCommon):
             [num], minval=minv, maxval=maxv, dtype=dtype, seed=seed)
         ret = np.empty([10, num])
         for i in xrange(10):
-          ret[i, :] = sess.run(rng)
+          ret[i, :] = self.evaluate(rng)
       return ret
 
     return func
diff --git a/tensorflow/python/kernel_tests/random/random_poisson_test.py b/tensorflow/python/kernel_tests/random/random_poisson_test.py
index 95e48101f6f..47c0858db74 100644
--- a/tensorflow/python/kernel_tests/random/random_poisson_test.py
+++ b/tensorflow/python/kernel_tests/random/random_poisson_test.py
@@ -43,7 +43,7 @@ class RandomPoissonTest(test.TestCase):
         rng = random_ops.random_poisson(lam, [num], dtype=dtype, seed=seed)
         ret = np.empty([10, num])
         for i in xrange(10):
-          ret[i, :] = sess.run(rng)
+          ret[i, :] = self.evaluate(rng)
       return ret
 
     return func
diff --git a/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py b/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
index f3fcf1eff7a..ed4f5434d9f 100644
--- a/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
+++ b/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
@@ -84,9 +84,9 @@ class RandomShuffleQueueTest(test.TestCase):
       dequeue_t = q.dequeue()
       results = []
       for _ in range(2):
-        a, b = sess.run(dequeue_t)
+        a, b = self.evaluate(dequeue_t)
         results.append((a, b))
-      a, b = sess.run(q.dequeue_many(3))
+      a, b = self.evaluate(q.dequeue_many(3))
       for i in range(3):
         results.append((a[i], b[i]))
       self.assertItemsEqual([(1, [5]), (2, [6]), (3, [7]), (4, [8]), (9, [10])],
@@ -101,7 +101,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       # Run one producer thread for each element in elems.
       def enqueue(enqueue_op):
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       threads = [
           self.checkedThread(
@@ -133,7 +133,7 @@ class RandomShuffleQueueTest(test.TestCase):
       results = []
 
       def dequeue():
-        results.append(sess.run(dequeued_t))
+        results.append(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in enqueue_ops]
       for thread in threads:
@@ -167,13 +167,13 @@ class RandomShuffleQueueTest(test.TestCase):
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
         for enqueue_op in enqueue_ops:
-          sess.run(enqueue_op)
+          self.evaluate(enqueue_op)
 
       results = []
 
       def dequeue():
         for _ in xrange(len(elems)):
-          results.append(sess.run(dequeued_t))
+          results.append(self.evaluate(dequeued_t))
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -197,7 +197,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       results = []
       for _ in xrange(len(elems)):
-        x, y = sess.run(dequeued_t)
+        x, y = self.evaluate(dequeued_t)
         results.append((x, y))
       self.assertItemsEqual(elems, results)
 
@@ -321,7 +321,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       results = []
       for _ in range(8):
-        float_val, int_val = sess.run(dequeued_t)
+        float_val, int_val = self.evaluate(dequeued_t)
         results.append((float_val, [int_val[0], int_val[1]]))
       expected = list(zip(float_elems, int_elems)) * 2
       self.assertItemsEqual(expected, results)
@@ -368,20 +368,20 @@ class RandomShuffleQueueTest(test.TestCase):
       enqueue_op.run()
 
       results = []
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       self.assertEqual(float_val.shape, dequeued_t[0].get_shape())
       self.assertEqual(int_val.shape, dequeued_t[1].get_shape())
       results.extend(zip(float_val, int_val.tolist()))
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       results.extend(zip(float_val, int_val.tolist()))
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       self.assertEqual(float_val.shape, dequeued_single_t[0].get_shape())
       self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape())
       results.append((float_val, int_val.tolist()))
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       results.append((float_val, int_val.tolist()))
 
       self.assertItemsEqual(zip(float_elems, int_elems), results)
@@ -402,21 +402,21 @@ class RandomShuffleQueueTest(test.TestCase):
       enqueue_op.run()
 
       results = []
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       # dequeue_up_to has undefined shape.
       self.assertEqual([None], dequeued_t[0].get_shape().as_list())
       self.assertEqual([None, 2], dequeued_t[1].get_shape().as_list())
       results.extend(zip(float_val, int_val.tolist()))
 
-      float_val, int_val = sess.run(dequeued_t)
+      float_val, int_val = self.evaluate(dequeued_t)
       results.extend(zip(float_val, int_val.tolist()))
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       self.assertEqual(float_val.shape, dequeued_single_t[0].get_shape())
       self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape())
       results.append((float_val, int_val.tolist()))
 
-      float_val, int_val = sess.run(dequeued_single_t)
+      float_val, int_val = self.evaluate(dequeued_single_t)
       results.append((float_val, int_val.tolist()))
 
       self.assertItemsEqual(zip(float_elems, int_elems), results)
@@ -442,7 +442,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       # Enqueue 100 items in parallel on 10 threads.
       def enqueue():
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       threads = [self.checkedThread(target=enqueue) for _ in range(10)]
       for thread in threads:
@@ -466,7 +466,7 @@ class RandomShuffleQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t))
+        dequeued_elems.extend(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in range(10)]
       for thread in threads:
@@ -489,7 +489,7 @@ class RandomShuffleQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t))
+        dequeued_elems.extend(self.evaluate(dequeued_t))
 
       threads = [self.checkedThread(target=dequeue) for _ in range(10)]
       for thread in threads:
@@ -515,7 +515,7 @@ class RandomShuffleQueueTest(test.TestCase):
       dequeued_elems = []
 
       def dequeue(dequeue_op):
-        dequeued_elems.extend(sess.run(dequeue_op))
+        dequeued_elems.extend(self.evaluate(dequeue_op))
 
       threads = []
       for dequeue_op in dequeue_ops:
@@ -539,10 +539,10 @@ class RandomShuffleQueueTest(test.TestCase):
         # The enqueue_op should run after the dequeue op has blocked.
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t).tolist())
+        dequeued_elems.extend(self.evaluate(dequeued_t).tolist())
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -566,10 +566,10 @@ class RandomShuffleQueueTest(test.TestCase):
         # The enqueue_op should run after the dequeue op has blocked.
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
-        sess.run(enqueue_op)
+        self.evaluate(enqueue_op)
 
       def dequeue():
-        dequeued_elems.extend(sess.run(dequeued_t).tolist())
+        dequeued_elems.extend(self.evaluate(dequeued_t).tolist())
 
       enqueue_thread = self.checkedThread(target=enqueue)
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -665,18 +665,18 @@ class RandomShuffleQueueTest(test.TestCase):
       results = []
 
       # Manually dequeue until we hit min_size.
-      results.append(sess.run(dequeued_t))
-      results.append(sess.run(dequeued_t))
+      results.append(self.evaluate(dequeued_t))
+      results.append(self.evaluate(dequeued_t))
 
       def blocking_dequeue():
-        results.append(sess.run(dequeued_t))
-        results.append(sess.run(dequeued_t))
+        results.append(self.evaluate(dequeued_t))
+        results.append(self.evaluate(dequeued_t))
 
         self.assertItemsEqual(elems, results)
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=blocking_dequeue)
       dequeue_thread.start()
@@ -701,7 +701,7 @@ class RandomShuffleQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
         finished.append(True)
 
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -727,12 +727,12 @@ class RandomShuffleQueueTest(test.TestCase):
       progress = []  # Must be mutable
 
       def dequeue():
-        self.assertItemsEqual(elems, sess.run(dequeued_t))
+        self.assertItemsEqual(elems, self.evaluate(dequeued_t))
         progress.append(1)
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
         progress.append(2)
 
       self.assertEqual(len(progress), 0)
@@ -763,9 +763,9 @@ class RandomShuffleQueueTest(test.TestCase):
       results = []
 
       def dequeue():
-        results.extend(sess.run(dequeued_t))
+        results.extend(self.evaluate(dequeued_t))
         self.assertEquals(3, len(results))
-        results.extend(sess.run(dequeued_t))
+        results.extend(self.evaluate(dequeued_t))
         self.assertEquals(4, len(results))
 
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -794,11 +794,11 @@ class RandomShuffleQueueTest(test.TestCase):
       results = []
 
       def dequeue():
-        results.extend(sess.run(dequeued_t))
+        results.extend(self.evaluate(dequeued_t))
         self.assertEquals(3, len(results))
         # min_after_dequeue is 2, we ask for 3 elements, and we end up only
         # getting the remaining 1.
-        results.extend(sess.run(dequeued_t))
+        results.extend(self.evaluate(dequeued_t))
         self.assertEquals(4, len(results))
 
       dequeue_thread = self.checkedThread(target=dequeue)
@@ -824,16 +824,16 @@ class RandomShuffleQueueTest(test.TestCase):
       results = []
 
       def dequeue():
-        results.extend(sess.run(dequeued_t))
+        results.extend(self.evaluate(dequeued_t))
         self.assertEqual(len(results), 3)
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
         # While the last dequeue failed, we want to insure that it returns
         # any elements that it potentially reserved to dequeue. Thus the
         # next cleanup should return a single element.
-        results.extend(sess.run(cleanup_dequeue_t))
+        results.extend(self.evaluate(cleanup_dequeue_t))
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -854,7 +854,7 @@ class RandomShuffleQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -874,7 +874,7 @@ class RandomShuffleQueueTest(test.TestCase):
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.OutOfRangeError,
                                      "is closed and has insufficient"):
-          sess.run(dequeued_t)
+          self.evaluate(dequeued_t)
 
       dequeue_thread = self.checkedThread(target=dequeue)
       dequeue_thread.start()
@@ -922,7 +922,7 @@ class RandomShuffleQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread = self.checkedThread(target=blocking_enqueue)
       thread.start()
@@ -950,7 +950,7 @@ class RandomShuffleQueueTest(test.TestCase):
       enqueue_op.run()
 
       def blocking_enqueue():
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread = self.checkedThread(target=blocking_enqueue)
       thread.start()
@@ -987,11 +987,11 @@ class RandomShuffleQueueTest(test.TestCase):
       def blocking_enqueue():
         # Expect the operation to succeed since it will complete
         # before the queue is closed.
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
         # Expect the operation to fail due to the queue being closed.
         with self.assertRaisesRegexp(errors_impl.CancelledError, "closed"):
-          sess.run(blocking_enqueue_op)
+          self.evaluate(blocking_enqueue_op)
 
       thread1 = self.checkedThread(target=blocking_enqueue)
       thread1.start()
@@ -1001,7 +1001,7 @@ class RandomShuffleQueueTest(test.TestCase):
       time.sleep(0.1)
 
       def blocking_close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       thread2 = self.checkedThread(target=blocking_close)
       thread2.start()
@@ -1032,7 +1032,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       def blocking_enqueue():
         # This will block until the dequeue after the close.
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
       thread1 = self.checkedThread(target=blocking_enqueue)
       thread1.start()
@@ -1050,7 +1050,7 @@ class RandomShuffleQueueTest(test.TestCase):
         time.sleep(0.1)
 
       def blocking_close():
-        sess.run(close_op)
+        self.evaluate(close_op)
 
       thread2 = self.checkedThread(target=blocking_close)
       thread2.start()
@@ -1064,7 +1064,7 @@ class RandomShuffleQueueTest(test.TestCase):
       # At this point the close operation will complete, so the next enqueue
       # will fail.
       with self.assertRaisesRegexp(errors_impl.CancelledError, "closed"):
-        sess.run(blocking_enqueue_op)
+        self.evaluate(blocking_enqueue_op)
 
   def testSharedQueueSameSession(self):
     with self.cached_session():
@@ -1216,23 +1216,23 @@ class RandomShuffleQueueTest(test.TestCase):
 
   def _blockingDequeue(self, sess, dequeue_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_op)
+      self.evaluate(dequeue_op)
 
   def _blockingDequeueMany(self, sess, dequeue_many_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_many_op)
+      self.evaluate(dequeue_many_op)
 
   def _blockingDequeueUpTo(self, sess, dequeue_up_to_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(dequeue_up_to_op)
+      self.evaluate(dequeue_up_to_op)
 
   def _blockingEnqueue(self, sess, enqueue_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(enqueue_op)
+      self.evaluate(enqueue_op)
 
   def _blockingEnqueueMany(self, sess, enqueue_many_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(enqueue_many_op)
+      self.evaluate(enqueue_many_op)
 
   def testResetOfBlockingOperation(self):
     with self.cached_session() as sess:
@@ -1383,7 +1383,7 @@ class RandomShuffleQueueTest(test.TestCase):
       def blocking_enqueue():
         enq_done.append(False)
         # This will fill the queue and then block until enough dequeues happen.
-        sess.run(enq)
+        self.evaluate(enq)
         enq_done.append(True)
 
       thread = self.checkedThread(target=blocking_enqueue)
@@ -1393,14 +1393,14 @@ class RandomShuffleQueueTest(test.TestCase):
       results = []
       results.append(deq.eval())  # Will only complete after the enqueue starts.
       self.assertEqual(len(enq_done), 1)
-      self.assertEqual(sess.run(size_op), 5)
+      self.assertEqual(self.evaluate(size_op), 5)
 
       for _ in range(3):
         results.append(deq.eval())
 
       time.sleep(0.1)
       self.assertEqual(len(enq_done), 1)
-      self.assertEqual(sess.run(size_op), 5)
+      self.assertEqual(self.evaluate(size_op), 5)
 
       # This dequeue will unblock the thread.
       results.append(deq.eval())
@@ -1426,7 +1426,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       def blocking_dequeue():
         # Will only complete after 4 enqueues complete.
-        results.extend(sess.run(deq))
+        results.extend(self.evaluate(deq))
 
       thread = self.checkedThread(target=blocking_dequeue)
       thread.start()
@@ -1435,7 +1435,7 @@ class RandomShuffleQueueTest(test.TestCase):
         # TODO(mrry): Figure out how to do this without sleeping.
         time.sleep(0.1)
         self.assertEqual(len(results), 0)
-        sess.run(enq)
+        self.evaluate(enq)
 
       # Enough enqueued to unblock the dequeue
       thread.join()
diff --git a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py
index 13f97a9367b..d80bea955eb 100644
--- a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py
@@ -62,7 +62,7 @@ class StatelessOpsTest(test.TestCase):
         for stateless_op, stateful_op in cases:
           stateful = stateful_op(seed=seed[1])
           pure = stateless_op(seed=preseed)
-          self.assertAllEqual(stateful.eval(), self.evaluate(pure))
+          self.assertAllEqual(self.evaluate(stateful), self.evaluate(pure))
 
   def _test_determinism(self, cases):
     # Stateless values should be equal iff the seeds are equal (roughly)
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 18a8a3d547f..a4a18c52194 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -140,147 +140,143 @@ class TFCompressionTestCase(test.TestCase):
 
 class IdentityReaderTest(test.TestCase):
 
-  def _ExpectRead(self, sess, key, value, expected):
-    k, v = sess.run([key, value])
+  def _ExpectRead(self, key, value, expected):
+    k, v = self.evaluate([key, value])
     self.assertAllEqual(expected, k)
     self.assertAllEqual(expected, v)
 
   def testOneEpoch(self):
-    with self.cached_session() as sess:
-      reader = io_ops.IdentityReader("test_reader")
-      work_completed = reader.num_work_units_completed()
-      produced = reader.num_records_produced()
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      queued_length = queue.size()
-      key, value = reader.read(queue)
+    reader = io_ops.IdentityReader("test_reader")
+    work_completed = reader.num_work_units_completed()
+    produced = reader.num_records_produced()
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    queued_length = queue.size()
+    key, value = reader.read(queue)
 
-      self.assertAllEqual(0, self.evaluate(work_completed))
-      self.assertAllEqual(0, self.evaluate(produced))
-      self.assertAllEqual(0, self.evaluate(queued_length))
+    self.assertAllEqual(0, self.evaluate(work_completed))
+    self.assertAllEqual(0, self.evaluate(produced))
+    self.assertAllEqual(0, self.evaluate(queued_length))
 
-      queue.enqueue_many([["A", "B", "C"]]).run()
-      queue.close().run()
-      self.assertAllEqual(3, self.evaluate(queued_length))
+    self.evaluate(queue.enqueue_many([["A", "B", "C"]]))
+    self.evaluate(queue.close())
+    self.assertAllEqual(3, self.evaluate(queued_length))
 
-      self._ExpectRead(sess, key, value, b"A")
-      self.assertAllEqual(1, self.evaluate(produced))
+    self._ExpectRead(key, value, b"A")
+    self.assertAllEqual(1, self.evaluate(produced))
 
-      self._ExpectRead(sess, key, value, b"B")
+    self._ExpectRead(key, value, b"B")
 
-      self._ExpectRead(sess, key, value, b"C")
-      self.assertAllEqual(3, self.evaluate(produced))
-      self.assertAllEqual(0, self.evaluate(queued_length))
+    self._ExpectRead(key, value, b"C")
+    self.assertAllEqual(3, self.evaluate(produced))
+    self.assertAllEqual(0, self.evaluate(queued_length))
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      self.evaluate([key, value])
 
-      self.assertAllEqual(3, self.evaluate(work_completed))
-      self.assertAllEqual(3, self.evaluate(produced))
-      self.assertAllEqual(0, self.evaluate(queued_length))
+    self.assertAllEqual(3, self.evaluate(work_completed))
+    self.assertAllEqual(3, self.evaluate(produced))
+    self.assertAllEqual(0, self.evaluate(queued_length))
 
   def testMultipleEpochs(self):
-    with self.cached_session() as sess:
-      reader = io_ops.IdentityReader("test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      enqueue = queue.enqueue_many([["DD", "EE"]])
-      key, value = reader.read(queue)
+    reader = io_ops.IdentityReader("test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    enqueue = queue.enqueue_many([["DD", "EE"]])
+    key, value = reader.read(queue)
 
-      enqueue.run()
-      self._ExpectRead(sess, key, value, b"DD")
-      self._ExpectRead(sess, key, value, b"EE")
-      enqueue.run()
-      self._ExpectRead(sess, key, value, b"DD")
-      self._ExpectRead(sess, key, value, b"EE")
-      enqueue.run()
-      self._ExpectRead(sess, key, value, b"DD")
-      self._ExpectRead(sess, key, value, b"EE")
-      queue.close().run()
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        sess.run([key, value])
+    self.evaluate(enqueue)
+    self._ExpectRead(key, value, b"DD")
+    self._ExpectRead(key, value, b"EE")
+    self.evaluate(enqueue)
+    self._ExpectRead(key, value, b"DD")
+    self._ExpectRead(key, value, b"EE")
+    self.evaluate(enqueue)
+    self._ExpectRead(key, value, b"DD")
+    self._ExpectRead(key, value, b"EE")
+    self.evaluate(queue.close())
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      self.evaluate([key, value])
 
   def testSerializeRestore(self):
-    with self.cached_session() as sess:
-      reader = io_ops.IdentityReader("test_reader")
-      produced = reader.num_records_produced()
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      queue.enqueue_many([["X", "Y", "Z"]]).run()
-      key, value = reader.read(queue)
+    reader = io_ops.IdentityReader("test_reader")
+    produced = reader.num_records_produced()
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    self.evaluate(queue.enqueue_many([["X", "Y", "Z"]]))
+    key, value = reader.read(queue)
 
-      self._ExpectRead(sess, key, value, b"X")
-      self.assertAllEqual(1, self.evaluate(produced))
-      state = reader.serialize_state().eval()
+    self._ExpectRead(key, value, b"X")
+    self.assertAllEqual(1, self.evaluate(produced))
+    state = self.evaluate(reader.serialize_state())
 
-      self._ExpectRead(sess, key, value, b"Y")
-      self._ExpectRead(sess, key, value, b"Z")
-      self.assertAllEqual(3, self.evaluate(produced))
+    self._ExpectRead(key, value, b"Y")
+    self._ExpectRead(key, value, b"Z")
+    self.assertAllEqual(3, self.evaluate(produced))
 
-      queue.enqueue_many([["Y", "Z"]]).run()
-      queue.close().run()
-      reader.restore_state(state).run()
-      self.assertAllEqual(1, self.evaluate(produced))
-      self._ExpectRead(sess, key, value, b"Y")
-      self._ExpectRead(sess, key, value, b"Z")
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        sess.run([key, value])
-      self.assertAllEqual(3, self.evaluate(produced))
+    self.evaluate(queue.enqueue_many([["Y", "Z"]]))
+    self.evaluate(queue.close())
+    self.evaluate(reader.restore_state(state))
+    self.assertAllEqual(1, self.evaluate(produced))
+    self._ExpectRead(key, value, b"Y")
+    self._ExpectRead(key, value, b"Z")
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      self.evaluate([key, value])
+    self.assertAllEqual(3, self.evaluate(produced))
 
-      self.assertEqual(bytes, type(state))
+    self.assertEqual(bytes, type(state))
 
-      with self.assertRaises(ValueError):
-        reader.restore_state([])
+    with self.assertRaises(ValueError):
+      reader.restore_state([])
 
-      with self.assertRaises(ValueError):
-        reader.restore_state([state, state])
+    with self.assertRaises(ValueError):
+      reader.restore_state([state, state])
 
-      with self.assertRaisesOpError(
-          "Could not parse state for IdentityReader 'test_reader'"):
-        reader.restore_state(state[1:]).run()
+    with self.assertRaisesOpError(
+        "Could not parse state for IdentityReader 'test_reader'"):
+      self.evaluate(reader.restore_state(state[1:]))
 
-      with self.assertRaisesOpError(
-          "Could not parse state for IdentityReader 'test_reader'"):
-        reader.restore_state(state[:-1]).run()
+    with self.assertRaisesOpError(
+        "Could not parse state for IdentityReader 'test_reader'"):
+      self.evaluate(reader.restore_state(state[:-1]))
 
-      with self.assertRaisesOpError(
-          "Could not parse state for IdentityReader 'test_reader'"):
-        reader.restore_state(state + b"ExtraJunk").run()
+    with self.assertRaisesOpError(
+        "Could not parse state for IdentityReader 'test_reader'"):
+      self.evaluate(reader.restore_state(state + b"ExtraJunk"))
 
-      with self.assertRaisesOpError(
-          "Could not parse state for IdentityReader 'test_reader'"):
-        reader.restore_state(b"PREFIX" + state).run()
+    with self.assertRaisesOpError(
+        "Could not parse state for IdentityReader 'test_reader'"):
+      self.evaluate(reader.restore_state(b"PREFIX" + state))
 
-      with self.assertRaisesOpError(
-          "Could not parse state for IdentityReader 'test_reader'"):
-        reader.restore_state(b"BOGUS" + state[5:]).run()
+    with self.assertRaisesOpError(
+        "Could not parse state for IdentityReader 'test_reader'"):
+      self.evaluate(reader.restore_state(b"BOGUS" + state[5:]))
 
   def testReset(self):
-    with self.cached_session() as sess:
-      reader = io_ops.IdentityReader("test_reader")
-      work_completed = reader.num_work_units_completed()
-      produced = reader.num_records_produced()
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      queued_length = queue.size()
-      key, value = reader.read(queue)
+    reader = io_ops.IdentityReader("test_reader")
+    work_completed = reader.num_work_units_completed()
+    produced = reader.num_records_produced()
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    queued_length = queue.size()
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([["X", "Y", "Z"]]).run()
-      self._ExpectRead(sess, key, value, b"X")
-      self.assertLess(0, self.evaluate(queued_length))
-      self.assertAllEqual(1, self.evaluate(produced))
+    self.evaluate(queue.enqueue_many([["X", "Y", "Z"]]))
+    self._ExpectRead(key, value, b"X")
+    self.assertLess(0, self.evaluate(queued_length))
+    self.assertAllEqual(1, self.evaluate(produced))
 
-      self._ExpectRead(sess, key, value, b"Y")
-      self.assertLess(0, self.evaluate(work_completed))
-      self.assertAllEqual(2, self.evaluate(produced))
+    self._ExpectRead(key, value, b"Y")
+    self.assertLess(0, self.evaluate(work_completed))
+    self.assertAllEqual(2, self.evaluate(produced))
 
-      reader.reset().run()
-      self.assertAllEqual(0, self.evaluate(work_completed))
-      self.assertAllEqual(0, self.evaluate(produced))
-      self.assertAllEqual(1, self.evaluate(queued_length))
-      self._ExpectRead(sess, key, value, b"Z")
+    self.evaluate(reader.reset())
+    self.assertAllEqual(0, self.evaluate(work_completed))
+    self.assertAllEqual(0, self.evaluate(produced))
+    self.assertAllEqual(1, self.evaluate(queued_length))
+    self._ExpectRead(key, value, b"Z")
 
-      queue.enqueue_many([["K", "L"]]).run()
-      self._ExpectRead(sess, key, value, b"K")
+    self.evaluate(queue.enqueue_many([["K", "L"]]))
+    self._ExpectRead(key, value, b"K")
 
 
 class WholeFileReaderTest(test.TestCase):
@@ -301,44 +297,42 @@ class WholeFileReaderTest(test.TestCase):
       os.remove(fn)
     super(WholeFileReaderTest, self).tearDown()
 
-  def _ExpectRead(self, sess, key, value, index):
-    k, v = sess.run([key, value])
+  def _ExpectRead(self, key, value, index):
+    k, v = self.evaluate([key, value])
     self.assertAllEqual(compat.as_bytes(self._filenames[index]), k)
     self.assertAllEqual(self._content[index], v)
 
   def testOneEpoch(self):
-    with self.cached_session() as sess:
-      reader = io_ops.WholeFileReader("test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      queue.enqueue_many([self._filenames]).run()
-      queue.close().run()
-      key, value = reader.read(queue)
+    reader = io_ops.WholeFileReader("test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    self.evaluate(queue.enqueue_many([self._filenames]))
+    self.evaluate(queue.close())
+    key, value = reader.read(queue)
 
-      self._ExpectRead(sess, key, value, 0)
-      self._ExpectRead(sess, key, value, 1)
-      self._ExpectRead(sess, key, value, 2)
+    self._ExpectRead(key, value, 0)
+    self._ExpectRead(key, value, 1)
+    self._ExpectRead(key, value, 2)
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      self.evaluate([key, value])
 
   def testInfiniteEpochs(self):
-    with self.cached_session() as sess:
-      reader = io_ops.WholeFileReader("test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      enqueue = queue.enqueue_many([self._filenames])
-      key, value = reader.read(queue)
+    reader = io_ops.WholeFileReader("test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    enqueue = queue.enqueue_many([self._filenames])
+    key, value = reader.read(queue)
 
-      enqueue.run()
-      self._ExpectRead(sess, key, value, 0)
-      self._ExpectRead(sess, key, value, 1)
-      enqueue.run()
-      self._ExpectRead(sess, key, value, 2)
-      self._ExpectRead(sess, key, value, 0)
-      self._ExpectRead(sess, key, value, 1)
-      enqueue.run()
-      self._ExpectRead(sess, key, value, 2)
-      self._ExpectRead(sess, key, value, 0)
+    self.evaluate(enqueue)
+    self._ExpectRead(key, value, 0)
+    self._ExpectRead(key, value, 1)
+    self.evaluate(enqueue)
+    self._ExpectRead(key, value, 2)
+    self._ExpectRead(key, value, 0)
+    self._ExpectRead(key, value, 1)
+    self.evaluate(enqueue)
+    self._ExpectRead(key, value, 2)
+    self._ExpectRead(key, value, 0)
 
 
 class TextLineReaderTest(test.TestCase):
@@ -366,22 +360,21 @@ class TextLineReaderTest(test.TestCase):
     return filenames
 
   def _testOneEpoch(self, files):
-    with self.cached_session() as sess:
-      reader = io_ops.TextLineReader(name="test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.TextLineReader(name="test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(self._num_lines):
-          k, v = sess.run([key, value])
-          self.assertAllEqual("%s:%d" % (files[i], j + 1), compat.as_text(k))
-          self.assertAllEqual(self._LineText(i, j), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(self._num_lines):
+        k, v = self.evaluate([key, value])
+        self.assertAllEqual("%s:%d" % (files[i], j + 1), compat.as_text(k))
+        self.assertAllEqual(self._LineText(i, j), v)
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
   def testOneEpochLF(self):
     self._testOneEpoch(self._CreateFiles(crlf=False))
@@ -391,22 +384,21 @@ class TextLineReaderTest(test.TestCase):
 
   def testSkipHeaderLines(self):
     files = self._CreateFiles()
-    with self.cached_session() as sess:
-      reader = io_ops.TextLineReader(skip_header_lines=1, name="test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.TextLineReader(skip_header_lines=1, name="test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(self._num_lines - 1):
-          k, v = sess.run([key, value])
-          self.assertAllEqual("%s:%d" % (files[i], j + 2), compat.as_text(k))
-          self.assertAllEqual(self._LineText(i, j + 1), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(self._num_lines - 1):
+        k, v = self.evaluate([key, value])
+        self.assertAllEqual("%s:%d" % (files[i], j + 2), compat.as_text(k))
+        self.assertAllEqual(self._LineText(i, j + 1), v)
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
 
 class FixedLengthRecordReaderTest(TFCompressionTestCase):
@@ -522,55 +514,53 @@ class FixedLengthRecordReaderTest(TFCompressionTestCase):
   # gap_bytes=hop_bytes-record_bytes
   def _TestOneEpoch(self, files, num_records, gap_bytes, encoding=None):
     hop_bytes = 0 if gap_bytes == 0 else self._record_bytes + gap_bytes
-    with self.cached_session() as sess:
-      reader = io_ops.FixedLengthRecordReader(
-          header_bytes=self._header_bytes,
-          record_bytes=self._record_bytes,
-          footer_bytes=self._footer_bytes,
-          hop_bytes=hop_bytes,
-          encoding=encoding,
-          name="test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.FixedLengthRecordReader(
+        header_bytes=self._header_bytes,
+        record_bytes=self._record_bytes,
+        footer_bytes=self._footer_bytes,
+        hop_bytes=hop_bytes,
+        encoding=encoding,
+        name="test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(num_records):
-          k, v = sess.run([key, value])
-          self.assertAllEqual("%s:%d" % (files[i], j), compat.as_text(k))
-          self.assertAllEqual(self._Record(i, j), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(num_records):
+        k, v = self.evaluate([key, value])
+        self.assertAllEqual("%s:%d" % (files[i], j), compat.as_text(k))
+        self.assertAllEqual(self._Record(i, j), v)
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
   def _TestOneEpochWithHopBytes(self,
                                 files,
                                 num_overlapped_records,
                                 encoding=None):
-    with self.cached_session() as sess:
-      reader = io_ops.FixedLengthRecordReader(
-          header_bytes=self._header_bytes,
-          record_bytes=self._record_bytes,
-          footer_bytes=self._footer_bytes,
-          hop_bytes=self._hop_bytes,
-          encoding=encoding,
-          name="test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.FixedLengthRecordReader(
+        header_bytes=self._header_bytes,
+        record_bytes=self._record_bytes,
+        footer_bytes=self._footer_bytes,
+        hop_bytes=self._hop_bytes,
+        encoding=encoding,
+        name="test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(num_overlapped_records):
-          k, v = sess.run([key, value])
-          self.assertAllEqual("%s:%d" % (files[i], j), compat.as_text(k))
-          self.assertAllEqual(self._OverlappedRecord(i, j), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(num_overlapped_records):
+        k, v = self.evaluate([key, value])
+        self.assertAllEqual("%s:%d" % (files[i], j), compat.as_text(k))
+        self.assertAllEqual(self._OverlappedRecord(i, j), v)
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
   def testOneEpoch(self):
     for num_records in [0, 7]:
@@ -621,84 +611,80 @@ class TFRecordReaderTest(TFCompressionTestCase):
 
   def testOneEpoch(self):
     files = self._CreateFiles()
-    with self.cached_session() as sess:
-      reader = io_ops.TFRecordReader(name="test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.TFRecordReader(name="test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(self._num_records):
-          k, v = sess.run([key, value])
-          self.assertTrue(compat.as_text(k).startswith("%s:" % files[i]))
-          self.assertAllEqual(self._Record(i, j), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(self._num_records):
+        k, v = self.evaluate([key, value])
+        self.assertTrue(compat.as_text(k).startswith("%s:" % files[i]))
+        self.assertAllEqual(self._Record(i, j), v)
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
   def testReadUpTo(self):
     files = self._CreateFiles()
-    with self.cached_session() as sess:
-      reader = io_ops.TFRecordReader(name="test_reader")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      batch_size = 3
-      key, value = reader.read_up_to(queue, batch_size)
+    reader = io_ops.TFRecordReader(name="test_reader")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    batch_size = 3
+    key, value = reader.read_up_to(queue, batch_size)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      num_k = 0
-      num_v = 0
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    num_k = 0
+    num_v = 0
 
-      while True:
-        try:
-          k, v = sess.run([key, value])
-          # Test reading *up to* batch_size records
-          self.assertLessEqual(len(k), batch_size)
-          self.assertLessEqual(len(v), batch_size)
-          num_k += len(k)
-          num_v += len(v)
-        except errors_impl.OutOfRangeError:
-          break
+    while True:
+      try:
+        k, v = self.evaluate([key, value])
+        # Test reading *up to* batch_size records
+        self.assertLessEqual(len(k), batch_size)
+        self.assertLessEqual(len(v), batch_size)
+        num_k += len(k)
+        num_v += len(v)
+      except errors_impl.OutOfRangeError:
+        break
 
-      # Test that we have read everything
-      self.assertEqual(self._num_files * self._num_records, num_k)
-      self.assertEqual(self._num_files * self._num_records, num_v)
+    # Test that we have read everything
+    self.assertEqual(self._num_files * self._num_records, num_k)
+    self.assertEqual(self._num_files * self._num_records, num_v)
 
   def testReadZlibFiles(self):
     options = tf_record.TFRecordOptions(TFRecordCompressionType.ZLIB)
     files = self._CreateFiles(options)
 
-    with self.cached_session() as sess:
-      reader = io_ops.TFRecordReader(name="test_reader", options=options)
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.TFRecordReader(name="test_reader", options=options)
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(self._num_records):
-          k, v = sess.run([key, value])
-          self.assertTrue(compat.as_text(k).startswith("%s:" % files[i]))
-          self.assertAllEqual(self._Record(i, j), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(self._num_records):
+        k, v = self.evaluate([key, value])
+        self.assertTrue(compat.as_text(k).startswith("%s:" % files[i]))
+        self.assertAllEqual(self._Record(i, j), v)
 
   def testReadGzipFiles(self):
     options = tf_record.TFRecordOptions(TFRecordCompressionType.GZIP)
     files = self._CreateFiles(options)
 
-    with self.cached_session() as sess:
-      reader = io_ops.TFRecordReader(name="test_reader", options=options)
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.TFRecordReader(name="test_reader", options=options)
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue_many([files]).run()
-      queue.close().run()
-      for i in range(self._num_files):
-        for j in range(self._num_records):
-          k, v = sess.run([key, value])
-          self.assertTrue(compat.as_text(k).startswith("%s:" % files[i]))
-          self.assertAllEqual(self._Record(i, j), v)
+    self.evaluate(queue.enqueue_many([files]))
+    self.evaluate(queue.close())
+    for i in range(self._num_files):
+      for j in range(self._num_records):
+        k, v = self.evaluate([key, value])
+        self.assertTrue(compat.as_text(k).startswith("%s:" % files[i]))
+        self.assertAllEqual(self._Record(i, j), v)
 
 
 class AsyncReaderTest(test.TestCase):
@@ -724,7 +710,7 @@ class AsyncReaderTest(test.TestCase):
         thread_data.append(thread_data_t(t, queue, output))
 
       # Start all readers. They are all blocked waiting for queue entries.
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for d in thread_data:
         d.thread.start()
 
@@ -733,7 +719,7 @@ class AsyncReaderTest(test.TestCase):
         fname = os.path.join(self.get_temp_dir(), "deadlock.%s.txt" % i)
         with open(fname, "wb") as f:
           f.write(("file-%s" % i).encode())
-        d.queue.enqueue_many([[fname]]).run()
+        self.evaluate(d.queue.enqueue_many([[fname]]))
         d.thread.join()
         self.assertEqual([[("file-%s" % i).encode()]], d.output)
 
@@ -752,22 +738,21 @@ class LMDBReaderTest(test.TestCase):
     shutil.copy(path, self.db_path)
 
   def testReadFromFile(self):
-    with self.cached_session() as sess:
-      reader = io_ops.LMDBReader(name="test_read_from_file")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.LMDBReader(name="test_read_from_file")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue([self.db_path]).run()
-      queue.close().run()
-      for i in range(10):
-        k, v = sess.run([key, value])
-        self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(i)))
-        self.assertAllEqual(
-            compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + i))))
+    self.evaluate(queue.enqueue([self.db_path]))
+    self.evaluate(queue.close())
+    for i in range(10):
+      k, v = self.evaluate([key, value])
+      self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(i)))
+      self.assertAllEqual(
+          compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + i))))
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
   def testReadFromSameFile(self):
     with self.cached_session() as sess:
@@ -782,29 +767,28 @@ class LMDBReaderTest(test.TestCase):
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
       for _ in range(3):
         for _ in range(10):
-          k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
+          k1, v1, k2, v2 = self.evaluate([key1, value1, key2, value2])
           self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
           self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
       coord.request_stop()
       coord.join(threads)
 
   def testReadFromFolder(self):
-    with self.cached_session() as sess:
-      reader = io_ops.LMDBReader(name="test_read_from_folder")
-      queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
-      key, value = reader.read(queue)
+    reader = io_ops.LMDBReader(name="test_read_from_folder")
+    queue = data_flow_ops.FIFOQueue(99, [dtypes.string], shapes=())
+    key, value = reader.read(queue)
 
-      queue.enqueue([self.db_path]).run()
-      queue.close().run()
-      for i in range(10):
-        k, v = sess.run([key, value])
-        self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(i)))
-        self.assertAllEqual(
-            compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + i))))
+    self.evaluate(queue.enqueue([self.db_path]))
+    self.evaluate(queue.close())
+    for i in range(10):
+      k, v = self.evaluate([key, value])
+      self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(i)))
+      self.assertAllEqual(
+          compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + i))))
 
-      with self.assertRaisesOpError("is closed and has insufficient elements "
-                                    "\\(requested 1, current size 0\\)"):
-        k, v = sess.run([key, value])
+    with self.assertRaisesOpError("is closed and has insufficient elements "
+                                  "\\(requested 1, current size 0\\)"):
+      k, v = self.evaluate([key, value])
 
   def testReadFromFileRepeatedly(self):
     with self.cached_session() as sess:
@@ -819,7 +803,7 @@ class LMDBReaderTest(test.TestCase):
       for _ in range(3):
         # Go over all 10 records each time.
         for j in range(10):
-          k, v = sess.run([key, value])
+          k, v = self.evaluate([key, value])
           self.assertAllEqual(compat.as_bytes(k), compat.as_bytes(str(j)))
           self.assertAllEqual(
               compat.as_bytes(v), compat.as_bytes(str(chr(ord("a") + j))))
diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py
index ebb9872f226..74020667d93 100644
--- a/tensorflow/python/kernel_tests/record_input_test.py
+++ b/tensorflow/python/kernel_tests/record_input_test.py
@@ -54,7 +54,7 @@ class RecordInputOpTest(test.TestCase):
           batch_size=1,
           name="record_input").get_yield_op()
 
-      self.assertEqual(sess.run(yield_op), b"0000000000")
+      self.assertEqual(self.evaluate(yield_op), b"0000000000")
 
   def testRecordInputSimpleGzip(self):
     with self.cached_session() as sess:
@@ -73,7 +73,7 @@ class RecordInputOpTest(test.TestCase):
           compression_type=tf_record.TFRecordCompressionType.GZIP).get_yield_op(
           )
 
-      self.assertEqual(sess.run(yield_op), b"0000000000")
+      self.assertEqual(self.evaluate(yield_op), b"0000000000")
 
   def testRecordInputSimpleZlib(self):
     with self.cached_session() as sess:
@@ -92,7 +92,7 @@ class RecordInputOpTest(test.TestCase):
           compression_type=tf_record.TFRecordCompressionType.ZLIB).get_yield_op(
           )
 
-      self.assertEqual(sess.run(yield_op), b"0000000000")
+      self.assertEqual(self.evaluate(yield_op), b"0000000000")
 
   def testRecordInputEpochs(self):
     files = 100
@@ -117,7 +117,7 @@ class RecordInputOpTest(test.TestCase):
       for _ in range(3):
         epoch_set = set()
         for _ in range(int(files * records_per_file / batches)):
-          op_list = sess.run(yield_op)
+          op_list = self.evaluate(yield_op)
           self.assertTrue(len(op_list) is batches)
           for r in op_list:
             self.assertTrue(r[0] not in epoch_set)
@@ -138,15 +138,15 @@ class RecordInputOpTest(test.TestCase):
 
         yield_op = records.get_yield_op()
         for _ in range(50):
-          sess.run(yield_op)
+          self.evaluate(yield_op)
 
   def testEmptyGlob(self):
     with self.cached_session() as sess:
       record_input = data_flow_ops.RecordInput(file_pattern="foo")
       yield_op = record_input.get_yield_op()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       with self.assertRaises(NotFoundError):
-        sess.run(yield_op)
+        self.evaluate(yield_op)
 
   def testBufferTooSmall(self):
     files = 10
@@ -171,7 +171,7 @@ class RecordInputOpTest(test.TestCase):
       for _ in range(3):
         epoch_set = set()
         for _ in range(int(files * records_per_file / batches)):
-          op_list = sess.run(yield_op)
+          op_list = self.evaluate(yield_op)
           self.assertTrue(len(op_list) is batches)
           for r in op_list:
             self.assertTrue(r[0] not in epoch_set)
diff --git a/tensorflow/python/kernel_tests/reduce_benchmark_test.py b/tensorflow/python/kernel_tests/reduce_benchmark_test.py
index 3a2fb81157d..ef9c4c350fd 100644
--- a/tensorflow/python/kernel_tests/reduce_benchmark_test.py
+++ b/tensorflow/python/kernel_tests/reduce_benchmark_test.py
@@ -81,7 +81,7 @@ class ReduceBenchmarks(test.Benchmark):
       grad, = gradients_impl.gradients(reduction, tensor)
 
       def fn():
-        sess.run(grad.op)
+        self.evaluate(grad.op)
 
       self._run(fn, 10000)
 
@@ -98,7 +98,7 @@ class ReduceBenchmarks(test.Benchmark):
         grad, = gradients_impl.gradients(reduction, tensor)
 
       def fn():
-        sess.run(grad.op)
+        self.evaluate(grad.op)
 
       self._run(fn, 10000)
 
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index d1a295f42b4..4eb329796e1 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -185,7 +185,7 @@ class SumReductionTest(BaseReductionTest):
     for dtype in [dtypes.int64, dtypes.int32]:
       with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_sum([0, 0], constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, 0)
 
   def testInfinity(self):
@@ -216,7 +216,7 @@ class SumReductionTest(BaseReductionTest):
       tf_arr = variables.Variable(arr)
       variables.global_variables_initializer().run()
       tf_mean = math_ops.reduce_mean(tf_arr, 0, False)
-      tf_out_mean = sess.run(tf_mean)
+      tf_out_mean = self.evaluate(tf_mean)
     self.assertAllClose(tf_out_mean, 1.)
 
   def testFloat32(self):
@@ -238,7 +238,7 @@ class SumReductionTest(BaseReductionTest):
       with self.session(graph=ops.Graph(), use_gpu=True) as sess:
         tf_row_sum = self._tf_reduce(arr, 1, False)
         tf_col_sum = self._tf_reduce(arr, 0, False)
-        tf_out_row, tf_out_col = sess.run([tf_row_sum, tf_col_sum])
+        tf_out_row, tf_out_col = self.evaluate([tf_row_sum, tf_col_sum])
       self.assertAllClose(col_sum, tf_out_col)
       self.assertAllClose(row_sum, tf_out_row)
 
@@ -252,7 +252,7 @@ class SumReductionTest(BaseReductionTest):
           with self.session(graph=ops.Graph(), use_gpu=True) as sess:
             tf_sum_xz = self._tf_reduce(arr, [0, 2], False)
             tf_sum_y = self._tf_reduce(arr, 1, False)
-            tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y])
+            tf_out_sum_xz, tf_out_sum_y = self.evaluate([tf_sum_xz, tf_sum_y])
           self.assertAllClose(sum_y, tf_out_sum_y)
           self.assertAllClose(sum_xz, tf_out_sum_xz)
 
@@ -400,7 +400,7 @@ class MeanReductionTest(BaseReductionTest):
     for dtype in [dtypes.int64, dtypes.int32]:
       with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_mean([0, 0], constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, 0)
 
   def testInfinity(self):
@@ -473,7 +473,7 @@ class ProdReductionTest(BaseReductionTest):
     for dtype in [dtypes.int64, dtypes.int32]:
       with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_prod([0, 0], constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, 0)
 
   def testInfinity(self):
@@ -576,7 +576,7 @@ class MinReductionTest(test.TestCase):
     for dtype in [dtypes.int64, dtypes.int32]:
       with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_min([0, 0], constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, 0)
 
   def testInfinity(self):
@@ -689,7 +689,7 @@ class MaxReductionTest(test.TestCase):
     for dtype in [dtypes.int64, dtypes.int32]:
       with self.cached_session(use_gpu=True) as sess:
         v = math_ops.reduce_max([0, 0], constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, 0)
 
   def testInfinity(self):
@@ -817,7 +817,7 @@ class AllReductionTest(test.TestCase):
       with self.session(use_gpu=True) as sess:
         v = math_ops.reduce_all([True, True],
                                 constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, True)
 
   def testAll3D(self):
@@ -866,7 +866,7 @@ class AnyReductionTest(test.TestCase):
       with self.session(use_gpu=True) as sess:
         v = math_ops.reduce_any([True, True],
                                 constant_op.constant(0, dtype=dtype))
-        tf_v = sess.run(v)
+        tf_v = self.evaluate(v)
       self.assertAllEqual(tf_v, True)
 
   def testAll3D(self):
@@ -962,7 +962,7 @@ class CountNonzeroReductionTest(test.TestCase):
     # Test case for GitHub issue 18712
     with self.cached_session() as sess:
       v = math_ops.count_nonzero(constant_op.constant(["test"]))
-      self.assertAllClose(sess.run(v), 1)
+      self.assertAllClose(self.evaluate(v), 1)
 
   def testStringReduce1D(self):
     # Create a 1D array of strings
diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py
index 68243f27c05..30cef908853 100644
--- a/tensorflow/python/kernel_tests/relu_op_test.py
+++ b/tensorflow/python/kernel_tests/relu_op_test.py
@@ -147,7 +147,7 @@ class ReluTest(test.TestCase):
       # Repeat the experiment for 100 times. All tensor shapes and its tensor
       # values are randomly generated for each run.
       for _ in xrange(100):
-        dx_f32_v, dx_f16_v = sess.run([dx_f32, dx_f16])
+        dx_f32_v, dx_f16_v = self.evaluate([dx_f32, dx_f16])
         self.assertAllClose(dx_f32_v, dx_f16_v, atol=3e-4)
 
   def testGradientFloat64(self):
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index e85b04469b1..13b39926ec1 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -153,7 +153,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
   def testCachedValueReadBeforeWrite(self):
     with self.cached_session() as sess:
       v = resource_variable_ops.ResourceVariable(0.0, caching_device="cpu:0")
-      sess.run(v.initializer)
+      self.evaluate(v.initializer)
       value, _ = sess.run([v, v.assign_add(1.0)])
       self.assertAllEqual(value, 0.0)
 
@@ -590,11 +590,11 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     with ops.Graph().as_default(), self.cached_session() as sess:
       # v describes a VariableDef-based variable without an initial value.
       v = resource_variable_ops.ResourceVariable(variable_def=v_def)
-      self.assertEqual(3.0, sess.run(v.initialized_value()))
+      self.assertEqual(3.0, self.evaluate(v.initialized_value()))
 
       # initialized_value should not rerun the initializer_op if the variable
       # has already been initialized elsewhere.
-      sess.run(v.assign(1.0))
+      self.evaluate(v.assign(1.0))
       self.assertEqual(1.0, v.initialized_value().eval())
 
     v_def.ClearField("initial_value_name")
@@ -606,7 +606,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       self.assertProtoEquals(v_def, v.to_proto())
       # But attempts to use initialized_value will result in errors.
       with self.assertRaises(ValueError):
-        sess.run(v.initialized_value())
+        self.evaluate(v.initialized_value())
 
   def testTrainableInProto(self):
     with ops.Graph().as_default():
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index 952ef34456e..c3881219828 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -161,8 +161,8 @@ class StatefulScatterNdTest(test.TestCase):
     init = variables.global_variables_initializer()
 
     with self.session(use_gpu=True) as sess:
-      sess.run(init)
-      result = sess.run(scatter)
+      self.evaluate(init)
+      result = self.evaluate(scatter)
       self.assertAllClose(result, expected)
 
   def testSimpleResource(self):
@@ -175,8 +175,8 @@ class StatefulScatterNdTest(test.TestCase):
     init = variables.global_variables_initializer()
 
     with self.session(use_gpu=True) as sess:
-      sess.run(init)
-      sess.run(scatter)
+      self.evaluate(init)
+      self.evaluate(scatter)
       self.assertAllClose(ref.eval(), expected)
 
   def testSimple2(self):
@@ -189,8 +189,8 @@ class StatefulScatterNdTest(test.TestCase):
     init = variables.global_variables_initializer()
 
     with self.session(use_gpu=True) as sess:
-      sess.run(init)
-      result = sess.run(scatter)
+      self.evaluate(init)
+      result = self.evaluate(scatter)
       self.assertAllClose(result, expected)
 
   def testSimple3(self):
@@ -203,8 +203,8 @@ class StatefulScatterNdTest(test.TestCase):
     init = variables.global_variables_initializer()
 
     with self.session(use_gpu=True) as sess:
-      sess.run(init)
-      result = sess.run(scatter)
+      self.evaluate(init)
+      result = self.evaluate(scatter)
       self.assertAllClose(result, expected)
 
   def testVariableRankUpdate(self):
@@ -341,8 +341,8 @@ class StatefulScatterNdTest(test.TestCase):
     init = variables.global_variables_initializer()
 
     with session.Session() as sess:
-      sess.run(init)
-      result = sess.run(scatter)
+      self.evaluate(init)
+      result = self.evaluate(scatter)
       assert np.allclose(result, expected_result)
 
   # TODO(fpmc): Re-enable this test when gpu_pip test actually runs on a GPU.
@@ -421,7 +421,7 @@ class ScatterNdTest(test.TestCase):
                          b"", b"", b"seven"])
     scatter = self.scatter_nd(indices, updates, shape=(8,))
     with self.cached_session() as sess:
-      result = sess.run(scatter)
+      result = self.evaluate(scatter)
       self.assertAllEqual(expected, result)
 
     # Same indice is updated twice by same value.
@@ -432,7 +432,7 @@ class ScatterNdTest(test.TestCase):
     expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"])
     scatter = self.scatter_nd(indices, updates, shape=(8,))
     with self.cached_session() as sess:
-      result = sess.run(scatter)
+      result = self.evaluate(scatter)
       self.assertAllEqual(expected, result)
 
     # Same indice is updated twice by different value.
@@ -444,7 +444,7 @@ class ScatterNdTest(test.TestCase):
                 np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])]
     scatter = self.scatter_nd(indices, updates, shape=(8,))
     with self.cached_session() as sess:
-      result = sess.run(scatter)
+      result = self.evaluate(scatter)
       self.assertTrue(np.array_equal(result, expected[0]) or
                       np.array_equal(result, expected[1]))
 
diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
index 85756b769d8..42577f7e423 100644
--- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -63,7 +63,7 @@ class SelfAdjointEigTest(test.TestCase):
           e1 = linalg_ops.self_adjoint_eigvals(matrix1)
           e2 = linalg_ops.self_adjoint_eigvals(matrix2)
           all_ops += [e1, e2]
-      val = sess.run(all_ops)
+      val = self.evaluate(all_ops)
       self.assertAllEqual(val[0], val[2])
       # The algorithm is slightly different for compute_v being True and False,
       # so require approximate equality only here.
@@ -81,7 +81,7 @@ class SelfAdjointEigTest(test.TestCase):
     self.assertEqual(matrix.shape, (32, 32))
     matrix_tensor = constant_op.constant(matrix)
     with self.session(use_gpu=True) as sess:
-      (e, v) = sess.run(linalg_ops.self_adjoint_eig(matrix_tensor))
+      (e, v) = self.evaluate(linalg_ops.self_adjoint_eig(matrix_tensor))
       self.assertEqual(e.size, 32)
       self.assertAllClose(
           np.matmul(v, v.transpose()), np.eye(32, dtype=np.float32), atol=2e-3)
diff --git a/tensorflow/python/kernel_tests/session_ops_test.py b/tensorflow/python/kernel_tests/session_ops_test.py
index 03e1ae852fc..dc663cb091c 100644
--- a/tensorflow/python/kernel_tests/session_ops_test.py
+++ b/tensorflow/python/kernel_tests/session_ops_test.py
@@ -37,7 +37,7 @@ class SessionOpsTest(test.TestCase):
       b = constant_op.constant(5)
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
-      h = sess.run(h)
+      h = self.evaluate(h)
 
       # Feed a tensor handle.
       f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
@@ -51,7 +51,7 @@ class SessionOpsTest(test.TestCase):
       b = constant_op.constant(5)
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
-      h = sess.run(h)
+      h = self.evaluate(h)
 
       # Get the tensor from its handle.
       self.assertEqual(50, h.eval())
@@ -64,7 +64,7 @@ class SessionOpsTest(test.TestCase):
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
       v = math_ops.multiply(a, c)
-      h, v = sess.run([h, v])
+      h, v = self.evaluate([h, v])
 
       self.assertEqual(50, h.eval())
       self.assertEqual(500, v)
@@ -77,7 +77,7 @@ class SessionOpsTest(test.TestCase):
       p = math_ops.less(a, b)
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
-      p, h = sess.run([p, h])
+      p, h = self.evaluate([p, h])
 
       # Run by feeding a tensor handle.
       f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
@@ -94,7 +94,7 @@ class SessionOpsTest(test.TestCase):
       # Initialize a handle.
       a = constant_op.constant(0)
       h = session_ops.get_session_handle(a)
-      h = sess.run(h)
+      h = self.evaluate(h)
 
       # Do some computation.
       f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
@@ -111,7 +111,7 @@ class SessionOpsTest(test.TestCase):
       # Initialize a handle.
       a = constant_op.constant(0)
       h = session_ops.get_session_handle(a)
-      h = sess.run(h)
+      h = self.evaluate(h)
 
       # Do some computation.
       f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
@@ -133,7 +133,7 @@ class SessionOpsTest(test.TestCase):
       b = constant_op.constant(5)
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
-      h = sess.run(h)
+      h = self.evaluate(h)
 
       # Feed a tensor handle.
       f, x = session_ops.get_session_tensor(h.handle, dtypes.int32)
@@ -144,7 +144,7 @@ class SessionOpsTest(test.TestCase):
       with ops.device(test.gpu_device_name()):
         a = constant_op.constant(10)
         h = session_ops.get_session_handle(a)
-        h = sess.run(h)
+        h = self.evaluate(h)
         self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
 
   def testHandleDelete(self):
@@ -154,7 +154,7 @@ class SessionOpsTest(test.TestCase):
       b = constant_op.constant(5)
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
-      sess.run(h).delete()
+      self.evaluate(h).delete()
 
   def testHandleDeleteRaw(self):
     with self.cached_session() as sess:
@@ -163,7 +163,7 @@ class SessionOpsTest(test.TestCase):
       b = constant_op.constant(5)
       c = math_ops.multiply(a, b)
       h = session_ops.get_session_handle(c)
-      h = sess.run(h)
+      h = self.evaluate(h)
 
       # Delete using a raw tensor handle.
       raw_h = h.get_raw_handle()
@@ -174,10 +174,10 @@ class SessionOpsTest(test.TestCase):
     with self.cached_session() as sess:
       with ops.device(test.gpu_device_name()):
         a = constant_op.constant(1.0)
-        a_handle = sess.run(session_ops.get_session_handle(a))
+        a_handle = self.evaluate(session_ops.get_session_handle(a))
       with ops.device("/cpu:0"):
         b = constant_op.constant(2.0)
-        b_handle = sess.run(session_ops.get_session_handle(b))
+        b_handle = self.evaluate(session_ops.get_session_handle(b))
 
       a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32)
       b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32)
@@ -193,8 +193,8 @@ class SessionOpsTest(test.TestCase):
       # initial values live on CPU
       with ops.device("/cpu:0"):
         one = constant_op.constant(1, dtype=dtypes.float32)
-        one_handle = sess.run(session_ops.get_session_handle(one))
-        x_handle = sess.run(session_ops.get_session_handle(one))
+        one_handle = self.evaluate(session_ops.get_session_handle(one))
+        x_handle = self.evaluate(session_ops.get_session_handle(one))
 
       # addition lives on GPU
       with ops.device(test.gpu_device_name()):
@@ -219,8 +219,8 @@ class SessionOpsTest(test.TestCase):
       b = constant_op.constant(2.0)
       b_handle_op = session_ops.get_session_handle(b)
 
-      a_handle = sess.run(a_handle_op)
-      b_handle = sess.run(b_handle_op)
+      a_handle = self.evaluate(a_handle_op)
+      b_handle = self.evaluate(b_handle_op)
 
       a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32)
       b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32)
@@ -239,7 +239,7 @@ class SessionOpsTest(test.TestCase):
       c = math_ops.multiply(a, b)
       d = math_ops.multiply(c, c)
 
-      h_c = sess.run(session_ops.get_session_handle(c))
+      h_c = self.evaluate(session_ops.get_session_handle(c))
 
       self.assertAllClose(2500.0, sess.run(d, feed_dict={c: h_c}))
 
@@ -248,7 +248,7 @@ class SessionOpsTest(test.TestCase):
       a = constant_op.constant(10.0)
       b = constant_op.constant(5.0)
       c = math_ops.multiply(a, b)
-      h_c = sess.run(session_ops.get_session_handle(c))
+      h_c = self.evaluate(session_ops.get_session_handle(c))
       d = array_ops.identity(c)
 
       c_val = sess.run(c, feed_dict={c: h_c})
@@ -277,8 +277,8 @@ class SessionOpsTest(test.TestCase):
       d = math_ops.div(a, b)
       e = math_ops.subtract(c, d)
 
-      h_c = sess.run(session_ops.get_session_handle(c))
-      h_d = sess.run(session_ops.get_session_handle(d))
+      h_c = self.evaluate(session_ops.get_session_handle(c))
+      h_d = self.evaluate(session_ops.get_session_handle(d))
 
       self.assertAllClose(48.0, sess.run(e, feed_dict={c: h_c, d: h_d}))
       self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c}))
@@ -288,13 +288,13 @@ class SessionOpsTest(test.TestCase):
       a = variables.Variable(12.0)
       inc_a = state_ops.assign_add(a, 2.0)
       b = math_ops.add(a, 5.0)
-      sess.run(a.initializer)
+      self.evaluate(a.initializer)
 
       h_a_read = sess.run(session_ops.get_session_handle(a.read_value()))
-      self.assertAllClose(12.0, sess.run(a))
+      self.assertAllClose(12.0, self.evaluate(a))
 
       self.assertAllClose(17.0, sess.run(b, feed_dict={a: h_a_read}))
-      sess.run(inc_a)
+      self.evaluate(inc_a)
       self.assertAllClose(19.0, sess.run(b, feed_dict={a: h_a_read}))
 
 
diff --git a/tensorflow/python/kernel_tests/sets_test.py b/tensorflow/python/kernel_tests/sets_test.py
index 8335e9c139a..ba3d32b192d 100644
--- a/tensorflow/python/kernel_tests/sets_test.py
+++ b/tensorflow/python/kernel_tests/sets_test.py
@@ -159,7 +159,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
       self.assertEqual(None, op.get_shape().dims)
       self.assertEqual(dtypes.int32, op.dtype)
     with self.cached_session() as sess:
-      results = sess.run(ops)
+      results = self.evaluate(ops)
     self.assertAllEqual(results[0], results[1])
     return results[0]
 
@@ -534,7 +534,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
   def _set_intersection_count(self, a, b):
     op = sets.set_size(sets.set_intersection(a, b))
     with self.cached_session() as sess:
-      return sess.run(op)
+      return self.evaluate(op)
 
   def test_set_difference_multirow_2d(self):
     for dtype in _DTYPES:
@@ -972,7 +972,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
   def _set_difference_count(self, a, b, aminusb=True):
     op = sets.set_size(sets.set_difference(a, b, aminusb))
     with self.cached_session() as sess:
-      return sess.run(op)
+      return self.evaluate(op)
 
   def test_set_union_multirow_2d(self):
     for dtype in _DTYPES:
@@ -1221,7 +1221,7 @@ class SetOpsTest(test_util.TensorFlowTestCase):
   def _set_union_count(self, a, b):
     op = sets.set_size(sets.set_union(a, b))
     with self.cached_session() as sess:
-      return sess.run(op)
+      return self.evaluate(op)
 
   def _assert_set_operation(self, expected_indices, expected_values,
                             expected_shape, sparse_tensor_value, dtype):
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 3e0eae326bf..a0506fbfc57 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -73,8 +73,8 @@ class ShapeOpsTest(test.TestCase):
     with self.cached_session(use_gpu=use_gpu) as sess:
       tf_ans = array_ops.shape_n([x, x, x])
       tf_ans_64 = array_ops.shape_n([x, x, x], out_type=dtypes.int64)
-      result = sess.run(tf_ans)
-      result_64 = sess.run(tf_ans_64)
+      result = self.evaluate(tf_ans)
+      result_64 = self.evaluate(tf_ans_64)
     for i in range(3):
       self.assertAllEqual(np_ans, result[i])
       self.assertAllEqual(np_ans, result_64[i])
diff --git a/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py b/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py
index c4e5b6f6740..de3351e543c 100644
--- a/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py
+++ b/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py
@@ -56,7 +56,7 @@ class ReconstructionOpsTest(test.TestCase):
     reconstruction = reconstruction_ops.overlap_and_add(signal, 2)
 
     with self.session(use_gpu=True) as sess:
-      output = sess.run(reconstruction)
+      output = self.evaluate(reconstruction)
 
       expected_output = np.array([1, 1, 2, 2, 3, 2, 2, 1, 1])
 
@@ -99,7 +99,7 @@ class ReconstructionOpsTest(test.TestCase):
     reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)
 
     with self.session(use_gpu=True) as sess:
-      output = sess.run(reconstruction)
+      output = self.evaluate(reconstruction)
       string_output = [np.base_repr(x, self.bases[0]) for x in output]
 
       self.assertEqual(string_output, self.expected_string)
@@ -109,7 +109,7 @@ class ReconstructionOpsTest(test.TestCase):
     reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)
 
     with self.session(use_gpu=True) as sess:
-      output = sess.run(reconstruction)
+      output = self.evaluate(reconstruction)
 
       accumulator = True
       for i in range(self.batch_size):
@@ -125,7 +125,7 @@ class ReconstructionOpsTest(test.TestCase):
     reconstruction = reconstruction_ops.overlap_and_add(signal, self.frame_hop)
 
     with self.session(use_gpu=True) as sess:
-      output = sess.run(reconstruction)
+      output = self.evaluate(reconstruction)
 
       string_output = [np.base_repr(int(x), self.bases[0]) for x in
                        np.squeeze(output)]
diff --git a/tensorflow/python/kernel_tests/signal/spectral_ops_test.py b/tensorflow/python/kernel_tests/signal/spectral_ops_test.py
index 7583c4d8fc5..7b9748c7f26 100644
--- a/tensorflow/python/kernel_tests/signal/spectral_ops_test.py
+++ b/tensorflow/python/kernel_tests/signal/spectral_ops_test.py
@@ -235,7 +235,8 @@ class SpectralOpsTest(test.TestCase):
       inverse_window = inverse_window_fn(frame_length, dtype=dtypes.float32)
 
       with self.cached_session(use_gpu=True) as sess:
-        hann_window, inverse_window = sess.run([hann_window, inverse_window])
+        hann_window, inverse_window = self.evaluate(
+            [hann_window, inverse_window])
 
       # Expect unit gain at each phase of the window.
       product_window = hann_window * inverse_window
@@ -263,7 +264,8 @@ class SpectralOpsTest(test.TestCase):
       inverse_window = inverse_window_fn(frame_length, dtype=dtypes.float32)
 
       with self.cached_session(use_gpu=True) as sess:
-        hann_window, inverse_window = sess.run([hann_window, inverse_window])
+        hann_window, inverse_window = self.evaluate(
+            [hann_window, inverse_window])
 
       self.assertAllClose(hann_window, inverse_window * 1.5)
 
@@ -293,7 +295,7 @@ class SpectralOpsTest(test.TestCase):
       # the sum of the magnitude STFT.
       sinusoid = math_ops.sin(
           2 * np.pi * math_ops.linspace(0.0, 1.0, signal_length))
-      sinusoid_gradient = sess.run(self._compute_stft_gradient(sinusoid))
+      sinusoid_gradient = self.evaluate(self._compute_stft_gradient(sinusoid))
       self.assertFalse((sinusoid_gradient == 0.0).all())
 
   def test_gradients_numerical(self):
diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py
index 5bb34a632d2..ee48c6eb0ed 100644
--- a/tensorflow/python/kernel_tests/slice_op_test.py
+++ b/tensorflow/python/kernel_tests/slice_op_test.py
@@ -207,7 +207,7 @@ class SliceTest(test.TestCase):
           dtype=dtypes.float32)
       slice_t = array_ops.slice(a, [0, 0], [2, 2])
       slice2_t = a[:2, :2]
-      slice_val, slice2_val = sess.run([slice_t, slice2_t])
+      slice_val, slice2_val = self.evaluate([slice_t, slice2_t])
     self.assertAllEqual(slice_val, inp[:2, :2])
     self.assertAllEqual(slice2_val, inp[:2, :2])
     self.assertEqual(slice_val.shape, slice_t.get_shape())
@@ -247,7 +247,7 @@ class SliceTest(test.TestCase):
                    + sizes[3], indices[4]:indices[4] + sizes[4], indices[5]:
                    indices[5] + sizes[5]]
 
-      slice_val, slice2_val = sess.run([slice_t, slice2_t])
+      slice_val, slice2_val = self.evaluate([slice_t, slice2_t])
 
     expected_val = inp[indices[0]:indices[0] + sizes[0], indices[1]:indices[
         1] + sizes[1], indices[2]:indices[2] + sizes[2], indices[3]:indices[
@@ -313,7 +313,7 @@ class SliceTest(test.TestCase):
       g1 = gradients_impl.gradients(loss1, x)[0]
       g2 = gradients_impl.gradients(loss2, x)[0]
 
-      g1_val, g2_val = sess.run([g1, g2])
+      g1_val, g2_val = self.evaluate([g1, g2])
     self.assertAllEqual(g1_val, g2_val)
 
   def testGradientsAll(self):
diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py
index 8ac98a198c6..c9aaa68971a 100644
--- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py
+++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py
@@ -273,7 +273,7 @@ class SpaceToDepthTest(test.TestCase):
       actual = array_ops.space_to_depth(t, block_size, data_format=data_format)
 
     with self.cached_session(use_gpu=use_gpu) as sess:
-      actual_vals, expected_vals = sess.run([actual, expected])
+      actual_vals, expected_vals = self.evaluate([actual, expected])
       self.assertTrue(np.array_equal(actual_vals, expected_vals))
 
   def testAgainstTranspose(self):
diff --git a/tensorflow/python/kernel_tests/sparse_add_op_test.py b/tensorflow/python/kernel_tests/sparse_add_op_test.py
index a746830afb3..c61f8633558 100644
--- a/tensorflow/python/kernel_tests/sparse_add_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_add_op_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
@@ -85,13 +86,13 @@ class SparseAddTest(test.TestCase):
         constant_op.constant(shape, dtypes.int64))
 
   def testAddSelf(self):
-    with self.session(use_gpu=False) as sess:
+    with test_util.force_cpu():
       for sp_a in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
         for sp_b in (self._SparseTensorValue_3x3(), self._SparseTensor_3x3()):
           sp_sum = sparse_ops.sparse_add(sp_a, sp_b)
           self.assertAllEqual((3, 3), sp_sum.get_shape())
 
-          sum_out = sess.run(sp_sum)
+          sum_out = self.evaluate(sp_sum)
 
           self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
           self.assertAllEqual(sum_out.indices, [[0, 1], [1, 0], [2, 0], [2, 1]])
@@ -99,12 +100,12 @@ class SparseAddTest(test.TestCase):
           self.assertAllEqual(sum_out.dense_shape, [3, 3])
 
   def testAddSelfAndNegation(self):
-    with self.session(use_gpu=False) as sess:
+    with test_util.force_cpu():
       sp_a = self._SparseTensor_3x3()
       sp_b = self._SparseTensor_3x3(negate=True)
 
       sp_sum = sparse_ops.sparse_add(sp_a, sp_b, 0.1)
-      sum_out = sess.run(sp_sum)
+      sum_out = self.evaluate(sp_sum)
 
       self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
       self.assertAllEqual(sum_out.indices, np.empty([0, 2]))
@@ -112,7 +113,7 @@ class SparseAddTest(test.TestCase):
       self.assertAllEqual(sum_out.dense_shape, [3, 3])
 
   def testSmallValuesShouldVanish(self):
-    with self.session(use_gpu=False) as sess:
+    with test_util.force_cpu():
       sp_a = self._SparseTensor_3x3()
       sp_b = self._SparseTensor_3x3_v2()
 
@@ -123,7 +124,7 @@ class SparseAddTest(test.TestCase):
 
       # two values should vanish: |.1| < .21, and |-.2| < .21
       sp_sum = sparse_ops.sparse_add(sp_a, sp_b, thresh=0.21)
-      sum_out = sess.run(sp_sum)
+      sum_out = self.evaluate(sp_sum)
 
       self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
       self.assertAllEqual(sum_out.indices, [[0, 1], [2, 0]])
@@ -132,7 +133,7 @@ class SparseAddTest(test.TestCase):
 
       # only .1 vanishes
       sp_sum = sparse_ops.sparse_add(sp_a, sp_b, thresh=0.11)
-      sum_out = sess.run(sp_sum)
+      sum_out = self.evaluate(sp_sum)
 
       self.assertEqual(sp_sum.dense_shape.get_shape(), [2])
       self.assertAllEqual(sum_out.indices, [[0, 1], [2, 0], [2, 1]])
@@ -147,7 +148,7 @@ class SparseAddTest(test.TestCase):
           sp_a, nnz_a = self._randomTensor([n, m], np.float32)
           sp_b, nnz_b = self._randomTensor([n, m], np.float32)
           sp_sum = sparse_ops.sparse_add(sp_a, sp_b)
-          nnz_sum = len(sp_sum.values.eval())
+          nnz_sum = len(self.evaluate(sp_sum.values))
 
           err = gradient_checker.compute_gradient_error(
               [sp_a.values, sp_b.values], [(nnz_a,), (nnz_b,)], sp_sum.values,
@@ -162,16 +163,16 @@ class SparseAddTest(test.TestCase):
         rand_vals_np = np.random.randn(n, m).astype(dtype)
         dense_np = np.random.randn(n, m).astype(dtype)
 
-        with self.cached_session(use_gpu=False):
+        with test_util.force_cpu():
           sparse, unused_nnz = _sparsify(rand_vals_np, index_dtype=index_dtype)
-          s = sparse_ops.sparse_add(sparse,
-                                    constant_op.constant(dense_np)).eval()
+          s = self.evaluate(
+              sparse_ops.sparse_add(sparse, constant_op.constant(dense_np)))
           self.assertAllEqual(dense_np + rand_vals_np, s)
           self.assertTrue(s.dtype == dtype)
 
           # check commutativity
-          s = sparse_ops.sparse_add(constant_op.constant(dense_np),
-                                    sparse).eval()
+          s = self.evaluate(
+              sparse_ops.sparse_add(constant_op.constant(dense_np), sparse))
           self.assertAllEqual(dense_np + rand_vals_np, s)
           self.assertTrue(s.dtype == dtype)
 
@@ -191,7 +192,7 @@ class SparseAddTest(test.TestCase):
       self.assertLess(err, 1e-3)
 
   def testInvalidSparseTensor(self):
-    with self.session(use_gpu=False) as sess:
+    with test_util.force_cpu():
       shape = [2, 2]
       val = [0]
       dense = constant_op.constant(np.zeros(shape, dtype=np.int32))
@@ -205,7 +206,7 @@ class SparseAddTest(test.TestCase):
 
         with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
                                      "invalid index"):
-          sess.run(s)
+          self.evaluate(s)
 
 ######################## Benchmarking code
 
diff --git a/tensorflow/python/kernel_tests/sparse_concat_op_test.py b/tensorflow/python/kernel_tests/sparse_concat_op_test.py
index 402c5eb4ea3..368a533e569 100644
--- a/tensorflow/python/kernel_tests/sparse_concat_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_concat_op_test.py
@@ -147,7 +147,7 @@ class SparseConcatTest(test.TestCase):
           self.assertEqual(sp_concat.values.get_shape(), [4])
           self.assertEqual(sp_concat.dense_shape.get_shape(), [2])
 
-          concat_out = sess.run(sp_concat)
+          concat_out = self.evaluate(sp_concat)
 
           self.assertAllEqual(concat_out.indices,
                               [[0, 2], [1, 0], [2, 0], [2, 2]])
@@ -169,7 +169,7 @@ class SparseConcatTest(test.TestCase):
             self.assertEqual(sp_concat.values.get_shape(), [8])
             self.assertEqual(sp_concat.dense_shape.get_shape(), [2])
 
-            concat_out = sess.run(sp_concat)
+            concat_out = self.evaluate(sp_concat)
 
             self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4],
                                                      [2, 0], [2, 2], [2, 3],
@@ -195,7 +195,7 @@ class SparseConcatTest(test.TestCase):
         self.assertEqual(sp_concat.values.get_shape(), [7])
         self.assertEqual(sp_concat.dense_shape.get_shape(), [2])
 
-        concat_out = sess.run(sp_concat)
+        concat_out = self.evaluate(sp_concat)
 
         self.assertAllEqual(
             concat_out.indices,
@@ -220,7 +220,7 @@ class SparseConcatTest(test.TestCase):
         self.assertEqual(sp_concat.values.get_shape(), [10])
         self.assertEqual(sp_concat.dense_shape.get_shape(), [2])
 
-        concat_out = sess.run(sp_concat)
+        concat_out = self.evaluate(sp_concat)
 
         self.assertAllEqual(concat_out.indices, [[0, 2], [1, 0], [1, 4], [1, 8],
                                                  [2, 0], [2, 2], [2, 3], [2, 6],
@@ -244,7 +244,7 @@ class SparseConcatTest(test.TestCase):
         self.assertEqual(sp_concat.values.get_shape(), [8])
         self.assertEqual(sp_concat.dense_shape.get_shape(), [2])
 
-        concat_out = sess.run(sp_concat)
+        concat_out = self.evaluate(sp_concat)
 
         self.assertAllEqual(
             concat_out.indices,
@@ -287,7 +287,7 @@ class SparseConcatTest(test.TestCase):
 
         # Shape mismatches can only be caught when the op is run
         with self.assertRaisesOpError("Input shapes must match"):
-          sess.run(sp_concat)
+          self.evaluate(sp_concat)
 
   def testMismatchedShapesExpandNonconcatDim(self):
     with self.session(use_gpu=False) as sess:
@@ -302,8 +302,8 @@ class SparseConcatTest(test.TestCase):
           sp_concat_dim1 = sparse_ops.sparse_concat(
               concat_dim1, [sp_a, sp_b, sp_c, sp_d], expand_nonconcat_dim=True)
 
-          sp_concat_dim0_out = sess.run(sp_concat_dim0)
-          sp_concat_dim1_out = sess.run(sp_concat_dim1)
+          sp_concat_dim0_out = self.evaluate(sp_concat_dim0)
+          sp_concat_dim1_out = self.evaluate(sp_concat_dim1)
 
           self.assertAllEqual(sp_concat_dim0_out.indices,
                               [[0, 2], [1, 0], [2, 0], [2, 2], [4, 1], [5, 0],
diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
index a824d5c8263..66589fa315d 100644
--- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
@@ -140,7 +140,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           t = _indexedslice(mat_to_add)
           q.apply_indexed_slices_grad(t).run()
 
-        result = sess.run(q.take_indexed_slices_grad(1))
+        result = self.evaluate(q.take_indexed_slices_grad(1))
 
         self._assertEqual_nparray(sum_elems / len(elems), result, sess)
 
@@ -189,7 +189,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       accum_op.run()
 
       takeg_t = q.take_indexed_slices_grad(1)
-      val = sess.run(takeg_t)
+      val = self.evaluate(takeg_t)
       self.assertAllEqual([0, 1, 2], val.indices)
       self.assertAllEqual([[0.5, 0.5], [0, 2], [3, 0]], val.values)
       self.assertAllEqual([-1, 2], val.dense_shape)
@@ -209,7 +209,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       accum_op.run()
 
       takeg_t = q.take_indexed_slices_grad(1)
-      val = sess.run(takeg_t)
+      val = self.evaluate(takeg_t)
       self.assertAllEqual([0, 1, 2], val.indices)
       self.assertAllEqual([[1, 1], [0, 2], [3, 0]], val.values)
       self.assertAllEqual([-1, 2], val.dense_shape)
@@ -235,7 +235,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       accum_op.run()
 
       takeg_t = q.take_indexed_slices_grad(1)
-      val = sess.run(takeg_t)
+      val = self.evaluate(takeg_t)
       self.assertAllEqual(val.indices, [0, 1, 2])
       self.assertAllEqual(val.values, [[0.5, 0.5], [0, 2], [3, 0]])
       self.assertAllEqual(val.dense_shape, [-1, 2])
@@ -252,7 +252,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       accum_op.run()
 
       takeg_t = q.take_indexed_slices_grad(1)
-      val = sess.run(takeg_t)
+      val = self.evaluate(takeg_t)
       self.assertAllEqual(val.indices, [0, 1, 2])
       self.assertAllEqual(val.values, [[5, 5], [0, 20], [30, 0]])
       self.assertAllEqual(val.dense_shape, [-1, 2])
@@ -269,7 +269,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       takeg_t = q.take_indexed_slices_grad(1)
 
       def apply_indexed_slices_grad(accum_op):
-        sess.run(accum_op)
+        self.evaluate(accum_op)
 
       threads = [
           self.checkedThread(
@@ -281,7 +281,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       for thread in threads:
         thread.join()
 
-      val = sess.run(takeg_t)
+      val = self.evaluate(takeg_t)
 
       expected_val = sum(elems) / len(elems)
       self._assertEqual_nparray(
@@ -303,7 +303,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       takeg_t = q.take_indexed_slices_grad(1)
 
       def apply_indexed_slices_grad(accum_op):
-        sess.run(accum_op)
+        self.evaluate(accum_op)
 
       threads = [
           self.checkedThread(target=apply_indexed_slices_grad, args=(o,))
@@ -315,7 +315,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       for thread in threads:
         thread.join()
 
-      val = sess.run(takeg_t)
+      val = self.evaluate(takeg_t)
 
       expected_val = 550.0
       self._assertEqual_nparray(
@@ -338,13 +338,13 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       def apply_indexed_slices_grad():
         for accum_op in accum_ops:
           time.sleep(1.0)
-          sess.run(accum_op)
+          self.evaluate(accum_op)
 
       apply_indexed_slices_grad_thread = self.checkedThread(
           target=apply_indexed_slices_grad)
 
       def take_grad():
-        t = sess.run(takeg_t)
+        t = self.evaluate(takeg_t)
         results.append(t)
 
       threads = [self.checkedThread(target=take_grad) for _ in range(10)]
@@ -378,10 +378,10 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
 
       def apply_indexed_slices_grad():
         for accum_op in accum_ops:
-          sess.run(accum_op)
+          self.evaluate(accum_op)
 
       def take_grad():
-        results.append(sess.run(takeg_t))
+        results.append(self.evaluate(takeg_t))
 
       accum_thread = self.checkedThread(target=apply_indexed_slices_grad)
       takeg_thread = self.checkedThread(target=take_grad)
@@ -394,7 +394,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
 
   def _blocking_takeg(self, sess, takeg_op):
     with self.assertRaisesOpError("was cancelled"):
-      sess.run(takeg_op)
+      self.evaluate(takeg_op)
 
   def testAccumulatorCancel(self):
     with self.cached_session() as sess:
@@ -585,7 +585,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
                     np.float32)).run()
 
       # After take grad, constraints on accumulated gradient are removed
-      sess.run(q.take_grad(1))
+      self.evaluate(q.take_grad(1))
 
       # First successful gradient imposes new constraints.
       # Hereafter, shape will additionally constrained to [None,2,2,3]
@@ -615,7 +615,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           grad_values=np.array(
               [[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]]).astype(np.float32)).run()
 
-      val = sess.run(q.take_indexed_slices_grad(1))
+      val = self.evaluate(q.take_indexed_slices_grad(1))
       self.assertAllEqual(val.dense_shape, [2, 2, 2, 2])
 
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -627,7 +627,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
               [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]).astype(
                   np.float32)).run()
 
-      val = sess.run(q.take_indexed_slices_grad(1))
+      val = self.evaluate(q.take_indexed_slices_grad(1))
       self.assertAllEqual(val.dense_shape, [-1, 2, 2, 3])
 
   def testApplyGradtInt32IndicesAndShape(self):
@@ -653,7 +653,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       accum_op.run()
       self.assertEqual(q.num_accumulated().eval(), 2)
 
-      val = sess.run(q.take_indexed_slices_grad(1))
+      val = self.evaluate(q.take_indexed_slices_grad(1))
       self.assertAllEqual(val.indices, [0, 2])
       self.assertAllEqual(val.values, [[0, 0, 1], [3, 0, 4]])
       self.assertAllEqual(val.dense_shape, [3, 3])
diff --git a/tensorflow/python/kernel_tests/sparse_cross_op_test.py b/tensorflow/python/kernel_tests/sparse_cross_op_test.py
index 17e867439a8..8451b96c564 100644
--- a/tensorflow/python/kernel_tests/sparse_cross_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_cross_op_test.py
@@ -43,7 +43,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_dense(self):
     """Tests only dense inputs."""
@@ -63,7 +63,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_integer_mixed_string_sparse(self):
     """Tests mixed type."""
@@ -77,7 +77,7 @@ class SparseCrossOpTest(test.TestCase):
         '55555_X_batch2-FC2-F2'
     ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_integer_mixed_string_dense(self):
     """Tests mixed dense inputs."""
@@ -95,7 +95,7 @@ class SparseCrossOpTest(test.TestCase):
         '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2'
     ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_sparse_cross_dense(self):
     """Tests sparse and dense inputs."""
@@ -112,7 +112,7 @@ class SparseCrossOpTest(test.TestCase):
             'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
         ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_integer_sparse_input(self):
     """Tests mixed type sparse and dense inputs."""
@@ -128,7 +128,7 @@ class SparseCrossOpTest(test.TestCase):
             '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2'
         ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_permutation_3x3x3(self):
     """Tests 3x3x3 permutation."""
@@ -170,7 +170,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3'
     ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_permutation_3x1x2(self):
     """Tests 3x1x2 permutation."""
@@ -189,7 +189,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_large_batch(self):
     """Tests with large batch size to force multithreading."""
@@ -222,7 +222,7 @@ class SparseCrossOpTest(test.TestCase):
 
     expected_out = self._sparse_tensor(col_out)
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_one_column_empty(self):
     """Tests when one column is empty.
@@ -235,7 +235,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
     ])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_empty(sess.run(op))
+      self._assert_sparse_tensor_empty(self.evaluate(op))
 
   def test_some_columns_empty(self):
     """Tests when more than one columns are empty.
@@ -254,7 +254,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]], 2)
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_all_columns_empty(self):
     """Tests when all columns are empty.
@@ -267,7 +267,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([])
     ])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_empty(sess.run(op))
+      self._assert_sparse_tensor_empty(self.evaluate(op))
 
   def test_hashed_zero_bucket_no_hash_key(self):
     op = sparse_ops.sparse_cross_hashed([
@@ -278,7 +278,7 @@ class SparseCrossOpTest(test.TestCase):
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[1971693436396284976]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_hashed_zero_bucket(self):
     op = sparse_ops.sparse_cross_hashed(
@@ -291,7 +291,7 @@ class SparseCrossOpTest(test.TestCase):
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[4847552627144134031]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
   def test_hashed_no_hash_key(self):
@@ -305,7 +305,7 @@ class SparseCrossOpTest(test.TestCase):
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[83]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_hashed_output(self):
     op = sparse_ops.sparse_cross_hashed(
@@ -319,7 +319,7 @@ class SparseCrossOpTest(test.TestCase):
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[31]])
     with self.cached_session() as sess:
-      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+      self._assert_sparse_tensor_equals(expected_out, self.evaluate(op))
 
   def test_hashed__has_no_collision(self):
     """Tests that fingerprint concatenation has no collisions."""
@@ -345,7 +345,7 @@ class SparseCrossOpTest(test.TestCase):
         ],
         num_buckets=1000)
     with self.cached_session() as sess:
-      out = sess.run(op)
+      out = self.evaluate(op)
       self.assertEqual(6, len(out.values))
       self.assertAllEqual([[0, i] for i in range(6)], out.indices)
       self.assertTrue(all(x < 1000 and x >= 0 for x in out.values))
diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index db3f6c44e22..ad253595d25 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -154,7 +154,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
                        sparse_tensor.SparseTensor.from_value(values_v)):
           sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
-          output = sess.run(sp_output)
+          output = self.evaluate(sp_output)
           self._AssertResultsSorted(output, vocab_size)
 
   def testInt64AndFloat32(self):
@@ -163,7 +163,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       indices, values = self._SparseTensor_3x50(np.int64, np.float32)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsSorted(output, vocab_size)
 
   def testInt64AndFloat64(self):
@@ -172,7 +172,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsSorted(output, vocab_size)
 
   def testInt32AndFloat32NonCanonicalOrder(self):
@@ -182,7 +182,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(
           indices, values, vocab_size, already_sorted=True)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsNotSorted(output, vocab_size)
 
   def testInt64AndFloat32NonCanonicalOrder(self):
@@ -192,7 +192,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(
           indices, values, vocab_size, already_sorted=True)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsNotSorted(output, vocab_size)
 
   def testInt64AndFloat64NonCanonicalOrder(self):
@@ -203,7 +203,7 @@ class SparseMergeTest(test_util.TensorFlowTestCase):
       sp_output = sparse_ops.sparse_merge(
           indices, values, vocab_size_tensor, already_sorted=True)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsNotSorted(output, vocab_size)
 
   def testShouldSetLastDimensionInDynamicShape(self):
@@ -261,7 +261,7 @@ class SparseMergeHighDimTest(test_util.TensorFlowTestCase):
       indices, values = self._SparseTensor_3x50(np.int64, np.float32)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsSorted(output, vocab_size)
 
   def testInt64AndFloat64(self):
@@ -270,7 +270,7 @@ class SparseMergeHighDimTest(test_util.TensorFlowTestCase):
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsSorted(output, vocab_size)
 
   def testInt64AndFloat64Shape(self):
@@ -279,7 +279,7 @@ class SparseMergeHighDimTest(test_util.TensorFlowTestCase):
       indices, values = self._SparseTensor_3x50(np.int64, np.float64)
       sp_output = sparse_ops.sparse_merge(indices, values, vocab_size)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
       self._AssertResultsSorted(output, vocab_size)
 
 
@@ -302,7 +302,7 @@ class SparseRetainTest(test_util.TensorFlowTestCase):
         to_retain = np.array([1, 0, 0, 1, 1, 0], dtype=np.bool)
         sp_output = sparse_ops.sparse_retain(sp_input, to_retain)
 
-        output = sess.run(sp_output)
+        output = self.evaluate(sp_output)
 
         self.assertAllEqual(output.indices, [[0, 0], [1, 4], [3, 2]])
         self.assertAllEqual(output.values, [0, 14, 32])
@@ -314,7 +314,7 @@ class SparseRetainTest(test_util.TensorFlowTestCase):
       to_retain = np.zeros((6,), dtype=np.bool)
       sp_output = sparse_ops.sparse_retain(sp_input, to_retain)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
 
       self.assertAllEqual(output.indices, np.array([]).reshape((0, 2)))
       self.assertAllEqual(output.values, [])
@@ -365,7 +365,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       new_shape = np.array([3, 6, 7], dtype=np.int64)
       sp_output = sparse_ops.sparse_reset_shape(sp_input, new_shape)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
 
       self.assertAllEqual(output.indices, [[0, 0, 0], [0, 1, 0], [0, 1, 3],
                                            [1, 1, 4], [1, 3, 2], [1, 3, 3]])
@@ -378,7 +378,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       new_shape = np.array([3, 6, 7], dtype=np.int64)
       sp_output = sparse_ops.sparse_reset_shape(sp_input, new_shape)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
 
       self.assertAllEqual(output.indices, [[0, 0, 0], [0, 1, 0], [0, 1, 3],
                                            [1, 1, 4], [1, 3, 2], [1, 3, 3]])
@@ -404,7 +404,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       sp_input = self._SparseTensor_2x5x6()
       sp_output = sparse_ops.sparse_reset_shape(sp_input)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
 
       self.assertAllEqual(output.indices, [[0, 0, 0], [0, 1, 0], [0, 1, 3],
                                            [1, 1, 4], [1, 3, 2], [1, 3, 3]])
@@ -416,7 +416,7 @@ class SparseResetShapeTest(test_util.TensorFlowTestCase):
       sp_input = self._SparseTensor_2x5x6_Empty()
       sp_output = sparse_ops.sparse_reset_shape(sp_input)
 
-      output = sess.run(sp_output)
+      output = self.evaluate(sp_output)
 
       self.assertAllEqual(output.indices.shape, [0, 3])
       self.assertAllEqual(output.values.shape, [0])
@@ -591,8 +591,8 @@ class SparseAddTest(test_util.TensorFlowTestCase):
     sp_output = sparse_ops.sparse_add(sp_input, sp_input)
 
     with self.session(use_gpu=False) as sess:
-      sess.run(variables.global_variables_initializer())
-      output = sess.run(sp_output)
+      self.evaluate(variables.global_variables_initializer())
+      output = self.evaluate(sp_output)
       self.assertAllEqual(output.values, [2])
 
 
diff --git a/tensorflow/python/kernel_tests/sparse_reorder_op_test.py b/tensorflow/python/kernel_tests/sparse_reorder_op_test.py
index 7b83ae51779..bbf2f392026 100644
--- a/tensorflow/python/kernel_tests/sparse_reorder_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_reorder_op_test.py
@@ -60,7 +60,7 @@ class SparseReorderTest(test.TestCase):
       input_val = self._SparseTensorValue_5x6(np.arange(6))
       sp_output = sparse_ops.sparse_reorder(input_val)
 
-      output_val = sess.run(sp_output)
+      output_val = self.evaluate(sp_output)
       self.assertAllEqual(output_val.indices, input_val.indices)
       self.assertAllEqual(output_val.values, input_val.values)
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
@@ -83,7 +83,7 @@ class SparseReorderTest(test.TestCase):
         input_val = self._SparseTensorValue_5x6(np.random.permutation(6))
         sp_output = sparse_ops.sparse_reorder(input_val)
 
-        output_val = sess.run(sp_output)
+        output_val = self.evaluate(sp_output)
         self.assertAllEqual(output_val.indices, expected_output_val.indices)
         self.assertAllEqual(output_val.values, expected_output_val.values)
         self.assertAllEqual(output_val.dense_shape,
diff --git a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
index f7be397c333..918af27091b 100644
--- a/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_reshape_op_test.py
@@ -81,7 +81,7 @@ class SparseReshapeTest(test.TestCase):
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(input_val, [5, 6])
 
-      output_val = sess.run(sp_output)
+      output_val = self.evaluate(sp_output)
       self.assertAllEqual(output_val.indices, input_val.indices)
       self.assertAllEqual(output_val.values, input_val.values)
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
@@ -151,7 +151,7 @@ class SparseReshapeTest(test.TestCase):
       input_val = self._SparseTensorValue_5x6()
       sp_output = sparse_ops.sparse_reshape(input_val, [2, 3, 5])
 
-      output_val = sess.run(sp_output)
+      output_val = self.evaluate(sp_output)
       self.assertAllEqual(output_val.indices,
                           np.array([[0, 0, 0], [0, 1, 1], [0, 1, 4], [0, 2, 0],
                                     [1, 1, 0], [1, 1, 1]]))
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index b24a0869699..39a9ab9b491 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -73,7 +73,7 @@ class SerializeSparseTest(test.TestCase):
       serialized = serialize_fn(sp_input, out_type=out_type)
       sp_deserialized = deserialize_fn(serialized, dtype=dtypes.int32)
 
-      indices, values, shape = sess.run(sp_deserialized)
+      indices, values, shape = self.evaluate(sp_deserialized)
 
       self.assertAllEqual(indices, sp_input[0])
       self.assertAllEqual(values, sp_input[1])
diff --git a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
index e08464a701c..538e7c69b57 100644
--- a/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_tensors_map_ops_test.py
@@ -88,7 +88,7 @@ class SparseTensorsMapTest(test.TestCase):
       sp_out = take_many_sparse_from_tensors_map(
           sparse_map_op=handle0.op, sparse_handles=handles_concat)
 
-      combined_indices, combined_values, combined_shape = sess.run(sp_out)
+      combined_indices, combined_values, combined_shape = self.evaluate(sp_out)
 
       self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
       self.assertAllEqual(combined_indices[:6, 1:], sp_input0[0])
@@ -114,7 +114,8 @@ class SparseTensorsMapTest(test.TestCase):
       sp_roundtrip = take_many_sparse_from_tensors_map(
           sparse_map_op=handle.op, sparse_handles=sparse_handles)
 
-      combined_indices, combined_values, combined_shape = sess.run(sp_roundtrip)
+      combined_indices, combined_values, combined_shape = self.evaluate(
+          sp_roundtrip)
 
       self.assertAllEqual(combined_indices[:6, 0], [0] * 6)  # minibatch 0
       self.assertAllEqual(combined_indices[:6, 1:], input0_val[0])
@@ -165,19 +166,19 @@ class SparseTensorsMapTest(test.TestCase):
       with self.assertRaisesOpError(
           r"Inconsistent rank across SparseTensors: rank prior to "
           r"SparseTensor\[1\] was: 3 but rank of SparseTensor\[1\] is: 4"):
-        sess.run(sp_roundtrip)
+        self.evaluate(sp_roundtrip)
 
   def testTakeManyFailsWrongInputOp(self):
     with self.session(use_gpu=False) as sess:
       input_val = self._SparseTensorValue_5x6(np.arange(6))
       handle = add_sparse_to_tensors_map(input_val)
-      handle_value = sess.run(handle)
+      handle_value = self.evaluate(handle)
       bad_handle = handle_value + 10
       sp_roundtrip = take_many_sparse_from_tensors_map(
           sparse_map_op=handle.op, sparse_handles=[handle_value, bad_handle])
 
       with self.assertRaisesOpError(r"Unable to find SparseTensor: 10"):
-        sess.run(sp_roundtrip)
+        self.evaluate(sp_roundtrip)
 
 
 class BenchmarkSparseTensorsMapVsSerialization(test.Benchmark):
@@ -212,8 +213,8 @@ class BenchmarkSparseTensorsMapVsSerialization(test.Benchmark):
 
         variables.global_variables_initializer().run()
 
-        st_roundtrip_values = sess.run(st_roundtrip)
-        st_deserialized_values = sess.run(st_deserialized)
+        st_roundtrip_values = self.evaluate(st_roundtrip)
+        st_deserialized_values = self.evaluate(st_deserialized)
         np.testing.assert_equal(st_roundtrip_values.values,
                                 st_deserialized_values.values)
         np.testing.assert_equal(st_roundtrip_values.indices,
diff --git a/tensorflow/python/kernel_tests/sparse_xent_op_test.py b/tensorflow/python/kernel_tests/sparse_xent_op_test.py
index 3f91131dab7..cc8c7c238f9 100644
--- a/tensorflow/python/kernel_tests/sparse_xent_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_xent_op_test.py
@@ -66,7 +66,7 @@ class SparseXentTest(test.TestCase):
     with self.cached_session(use_gpu=True) as sess:
       loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
           np_features, np_labels)
-      tf_loss, tf_backprop = sess.run([loss, backprop])
+      tf_loss, tf_backprop = self.evaluate([loss, backprop])
     self.assertAllCloseAccordingToType(np_loss, tf_loss)
     self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
 
@@ -76,7 +76,7 @@ class SparseXentTest(test.TestCase):
         loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
             np.array([[1.], [-1.], [0.]]).astype(np.float32),
             np.array([0, 0, 0]).astype(label_dtype))
-        tf_loss, tf_backprop = sess.run([loss, backprop])
+        tf_loss, tf_backprop = self.evaluate([loss, backprop])
       self.assertAllClose([0.0, 0.0, 0.0], tf_loss)
       self.assertAllClose([[0.0], [0.0], [0.0]], tf_backprop)
 
@@ -90,7 +90,7 @@ class SparseXentTest(test.TestCase):
         loss, backprop = (
             gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
                 features, labels))
-        tf_loss, tf_backprop = sess.run([loss, backprop])
+        tf_loss, tf_backprop = self.evaluate([loss, backprop])
         self.assertAllClose(
             [[np.nan] * 4, [0.25, 0.25, 0.25, -0.75],
              [-0.968, 0.087, 0.237, 0.6439], [np.nan] * 4],
@@ -104,7 +104,7 @@ class SparseXentTest(test.TestCase):
       loss, backprop = (
           gen_nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels))
       with self.assertRaisesOpError("Received a label value of"):
-        sess.run([loss, backprop])
+        self.evaluate([loss, backprop])
 
   def testNpXent(self):
     # We create 2 batches of logits for testing.
@@ -226,7 +226,7 @@ class SparseXentTest(test.TestCase):
       loss = nn_ops.sparse_softmax_cross_entropy_with_logits(
           labels=labels, logits=features)
       backprop = loss.op.inputs[0].op.outputs[1]
-      tf_loss, tf_backprop = sess.run([loss, backprop])
+      tf_loss, tf_backprop = self.evaluate([loss, backprop])
     self.assertAllCloseAccordingToType(np_loss, tf_loss)
     self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
 
diff --git a/tensorflow/python/kernel_tests/stack_ops_test.py b/tensorflow/python/kernel_tests/stack_ops_test.py
index 6c6fe8aba47..dffb260b5fa 100644
--- a/tensorflow/python/kernel_tests/stack_ops_test.py
+++ b/tensorflow/python/kernel_tests/stack_ops_test.py
@@ -131,7 +131,7 @@ class StackOpTest(test.TestCase):
         pop1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32)
         pop2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32)
 
-      out1, out2 = sess.run([pop1, pop2])
+      out1, out2 = self.evaluate([pop1, pop2])
       self.assertAllClose(out1, 4.0)
       self.assertAllClose(out2, 5.0)
 
@@ -144,7 +144,7 @@ class StackOpTest(test.TestCase):
       h = gen_data_flow_ops.stack_v2(
           -1, elem_type=dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_close_v2(h)
-      sess.run(c1)
+      self.evaluate(c1)
 
   def testCloseStack(self):
     self._testCloseStack(use_gpu=False)
@@ -157,7 +157,7 @@ class StackOpTest(test.TestCase):
       c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]])
       with ops.control_dependencies([c]):
         c1 = gen_data_flow_ops.stack_close_v2(h)
-      sess.run(c1)
+      self.evaluate(c1)
 
   def testPushCloseStack(self):
     self._testPushCloseStack(use_gpu=False)
@@ -263,7 +263,7 @@ class StackOpRefTest(test.TestCase):
     with self.cached_session(use_gpu=use_gpu) as sess:
       h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo")
       c1 = gen_data_flow_ops.stack_close(h)
-      sess.run(c1)
+      self.evaluate(c1)
 
   def testCloseStack(self):
     self._testCloseStack(use_gpu=False)
@@ -275,7 +275,7 @@ class StackOpRefTest(test.TestCase):
       c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]])
       with ops.control_dependencies([c]):
         c1 = gen_data_flow_ops.stack_close(h)
-      sess.run(c1)
+      self.evaluate(c1)
 
   def testPushCloseStack(self):
     self._testPushCloseStack(use_gpu=False)
diff --git a/tensorflow/python/kernel_tests/string_length_op_test.py b/tensorflow/python/kernel_tests/string_length_op_test.py
index 57db7302b15..06bf28ebcee 100644
--- a/tensorflow/python/kernel_tests/string_length_op_test.py
+++ b/tensorflow/python/kernel_tests/string_length_op_test.py
@@ -29,7 +29,7 @@ class StringLengthOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       lengths = string_ops.string_length(strings)
-      values = sess.run(lengths)
+      values = self.evaluate(lengths)
       self.assertAllEqual(values, [[[1, 2], [3, 4], [5, 6]]])
 
   def testUnit(self):
@@ -43,9 +43,9 @@ class StringLengthOpTest(test.TestCase):
       utf8_char_lengths = string_ops.string_length(
           utf8_strings, unit="UTF8_CHAR")
       self.assertAllEqual(
-          sess.run(utf8_byte_lengths), expected_utf8_byte_lengths)
+          self.evaluate(utf8_byte_lengths), expected_utf8_byte_lengths)
       self.assertAllEqual(
-          sess.run(utf8_char_lengths), expected_utf8_char_lengths)
+          self.evaluate(utf8_char_lengths), expected_utf8_char_lengths)
       with self.assertRaisesRegexp(
           ValueError, "Attr 'unit' of 'StringLength' Op passed string 'XYZ' "
           'not in: "BYTE", "UTF8_CHAR"'):
diff --git a/tensorflow/python/kernel_tests/string_split_op_test.py b/tensorflow/python/kernel_tests/string_split_op_test.py
index b968e885eda..92e13db0f73 100644
--- a/tensorflow/python/kernel_tests/string_split_op_test.py
+++ b/tensorflow/python/kernel_tests/string_split_op_test.py
@@ -34,7 +34,7 @@ class StringSplitOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split(strings)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
       self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"])
       self.assertAllEqual(shape, [2, 4])
@@ -44,7 +44,7 @@ class StringSplitOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, delimiter="")
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
                                     [1, 0], [1, 1], [1, 2], [1, 3], [2, 0],
                                     [2, 1], [2, 2], [2, 3]])
@@ -62,7 +62,7 @@ class StringSplitOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split(strings)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(
           indices,
           [[1, 0], [2, 0], [3, 0], [5, 0], [6, 0], [7, 0], [8, 0]])
@@ -74,7 +74,7 @@ class StringSplitOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, delimiter=" .")
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(
           indices,
           [[1, 0], [2, 0], [3, 0], [5, 0], [6, 0], [7, 0], [8, 0]])
@@ -92,13 +92,13 @@ class StringSplitOpTest(test.TestCase):
           ValueError, string_ops.string_split, strings, delimiter=["a"])
 
       tokens = string_ops.string_split(strings, delimiter="|")
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
       self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
       self.assertAllEqual(shape, [2, 2])
 
       tokens = string_ops.string_split(strings, delimiter="| ")
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0], [1, 1]])
       self.assertAllEqual(values, [b"hello", b"world", b"hello", b"world"])
       self.assertAllEqual(shape, [2, 2])
@@ -145,7 +145,7 @@ class StringSplitOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, "#", skip_empty=False)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1],
                                     [1, 0], [1, 1],
                                     [2, 0], [2, 1], [2, 2]])
@@ -154,7 +154,7 @@ class StringSplitOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split(strings, "#")
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(values, [b"a", b"b", b"c"])
       self.assertAllEqual(indices, [[0, 0], [1, 0], [2, 0]])
       self.assertAllEqual(shape, [3, 1])
@@ -167,7 +167,7 @@ class StringSplitV2OpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
       self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"])
       self.assertAllEqual(shape, [2, 4])
@@ -182,7 +182,7 @@ class StringSplitV2OpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, sep="<>")
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(
           indices, [[0, 0], [0, 1], [0, 2],
                     [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [1, 6]])
@@ -200,7 +200,7 @@ class StringSplitV2OpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, sep=',')
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2],
                                     [1, 0], [1, 1], [1, 2], [1, 3], [1, 4]])
       self.assertAllEqual(values, [b"1", b"2", b"3",
@@ -217,7 +217,7 @@ class StringSplitV2OpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2],
                                     [1, 0], [1, 1], [1, 2]])
       self.assertAllEqual(values, [b"1", b"2", b"3", b"4", b"5", b"6"])
@@ -233,7 +233,7 @@ class StringSplitV2OpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, sep=',', maxsplit=1)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1],
                                     [1, 0], [1, 1]])
       self.assertAllEqual(values, [b"1", b"2,3", b"4", b"5,,6,"])
@@ -249,7 +249,7 @@ class StringSplitV2OpTest(test.TestCase):
 
     with self.cached_session() as sess:
       tokens = string_ops.string_split_v2(strings, maxsplit=1)
-      indices, values, shape = sess.run(tokens)
+      indices, values, shape = self.evaluate(tokens)
       self.assertAllEqual(indices, [[0, 0], [0, 1],
                                     [1, 0], [1, 1]])
       self.assertAllEqual(values, [b"1", b"2 3", b"4", b"5    6  "])
diff --git a/tensorflow/python/kernel_tests/string_strip_op_test.py b/tensorflow/python/kernel_tests/string_strip_op_test.py
index 1e404b71462..edff3862ff6 100644
--- a/tensorflow/python/kernel_tests/string_strip_op_test.py
+++ b/tensorflow/python/kernel_tests/string_strip_op_test.py
@@ -30,7 +30,7 @@ class StringStripOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       output = string_ops.string_strip(strings)
-      output = sess.run(output)
+      output = self.evaluate(output)
       self.assertAllEqual(output, [b"pigs on the wing", b"animals"])
 
   def test_string_strip_2d(self):
@@ -39,7 +39,7 @@ class StringStripOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       output = string_ops.string_strip(strings)
-      output = sess.run(output)
+      output = self.evaluate(output)
       self.assertAllEqual(output, [[b"pigs on the wing", b"animals"],
                                    [b"hello", b"world"]])
 
@@ -48,7 +48,7 @@ class StringStripOpTest(test.TestCase):
 
     with self.cached_session() as sess:
       output = string_ops.string_strip(strings)
-      output = sess.run(output)
+      output = self.evaluate(output)
       self.assertAllEqual(output, [b"hello", b"", b"world", b""])
 
 
diff --git a/tensorflow/python/kernel_tests/summary_v1_audio_op_test.py b/tensorflow/python/kernel_tests/summary_v1_audio_op_test.py
index 63ce77b9d55..1547c55f8b0 100644
--- a/tensorflow/python/kernel_tests/summary_v1_audio_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_v1_audio_op_test.py
@@ -60,7 +60,7 @@ class SummaryV1AudioOpTest(test.TestCase):
         sample_rate = 8000
         summ = summary.audio(
             "snd", const, max_outputs=3, sample_rate=sample_rate)
-        value = sess.run(summ)
+        value = self.evaluate(summ)
         self.assertEqual([], summ.get_shape())
         audio_summ = self._AsSummary(value)
 
diff --git a/tensorflow/python/kernel_tests/summary_v1_image_op_test.py b/tensorflow/python/kernel_tests/summary_v1_image_op_test.py
index 094606944ff..e1b24756f3f 100644
--- a/tensorflow/python/kernel_tests/summary_v1_image_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_v1_image_op_test.py
@@ -70,7 +70,7 @@ class SummaryV1ImageOpTest(test.TestCase):
 
           # Summarize
           summ = summary.image("img", const)
-          value = sess.run(summ)
+          value = self.evaluate(summ)
           self.assertEqual([], summ.get_shape())
           image_summ = self._AsSummary(value)
 
@@ -97,7 +97,7 @@ class SummaryV1ImageOpTest(test.TestCase):
 
         # Summarize
         summ = summary.image("img", tf_images)
-        value = sess.run(summ)
+        value = self.evaluate(summ)
         self.assertEqual([], summ.get_shape())
         image_summ = self._AsSummary(value)
 
diff --git a/tensorflow/python/kernel_tests/summary_v1_ops_test.py b/tensorflow/python/kernel_tests/summary_v1_ops_test.py
index 6c4e106b118..1206cb7013f 100644
--- a/tensorflow/python/kernel_tests/summary_v1_ops_test.py
+++ b/tensorflow/python/kernel_tests/summary_v1_ops_test.py
@@ -42,7 +42,7 @@ class SummaryV1OpsTest(test.TestCase):
     with self.cached_session() as sess:
       const = constant_op.constant([10.0, 20.0])
       summ = logging_ops.scalar_summary(["c1", "c2"], const, name="mysumm")
-      value = sess.run(summ)
+      value = self.evaluate(summ)
     self.assertEqual([], summ.get_shape())
     self.assertProtoEquals("""
       value { tag: "c1" simple_value: 10.0 }
@@ -53,7 +53,7 @@ class SummaryV1OpsTest(test.TestCase):
     with self.cached_session() as sess:
       const = constant_op.constant([10.0, 20.0])
       summ = logging_ops.scalar_summary(["c1", "c2"], const)
-      value = sess.run(summ)
+      value = self.evaluate(summ)
     self.assertEqual([], summ.get_shape())
     self.assertProtoEquals("""
       value { tag: "c1" simple_value: 10.0 }
@@ -66,7 +66,7 @@ class SummaryV1OpsTest(test.TestCase):
       summ1 = summary.histogram("h", const)
       summ2 = logging_ops.scalar_summary("c", const)
       merge = summary.merge([summ1, summ2])
-      value = sess.run(merge)
+      value = self.evaluate(merge)
     self.assertEqual([], merge.get_shape())
     self.assertProtoEquals("""
       value {
diff --git a/tensorflow/python/kernel_tests/summary_v1_tensor_op_test.py b/tensorflow/python/kernel_tests/summary_v1_tensor_op_test.py
index 34f771679ae..b8e5b5b882a 100644
--- a/tensorflow/python/kernel_tests/summary_v1_tensor_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_v1_tensor_op_test.py
@@ -50,7 +50,7 @@ class SummaryV1TensorOpTest(test.TestCase):
         with ops.name_scope("zod"):
           s3 = summary_lib.tensor_summary("s3", c)
           s4 = summary_lib.tensor_summary("TensorSummary", c)
-      summ1, summ2, summ3, summ4 = sess.run([s1, s2, s3, s4])
+      summ1, summ2, summ3, summ4 = self.evaluate([s1, s2, s3, s4])
 
     v1 = self._SummarySingleValue(summ1)
     self.assertEqual(v1.tag, "s1")
@@ -68,7 +68,7 @@ class SummaryV1TensorOpTest(test.TestCase):
     with self.cached_session() as sess:
       const = constant_op.constant(10.0)
       summ = summary_lib.tensor_summary("foo", const)
-      result = sess.run(summ)
+      result = self.evaluate(summ)
 
     value = self._SummarySingleValue(result)
     n = tensor_util.MakeNdarray(value.tensor)
@@ -79,7 +79,7 @@ class SummaryV1TensorOpTest(test.TestCase):
     with self.cached_session() as sess:
       const = constant_op.constant(s)
       summ = summary_lib.tensor_summary("foo", const)
-      result = sess.run(summ)
+      result = self.evaluate(summ)
 
     value = self._SummarySingleValue(result)
     n = tensor_util.MakeNdarray(value.tensor)
@@ -89,7 +89,7 @@ class SummaryV1TensorOpTest(test.TestCase):
     with self.cached_session() as sess:
       const = array_ops.ones([5, 5, 5])
       summ = summary_lib.tensor_summary("foo", const)
-      result = sess.run(summ)
+      result = self.evaluate(summ)
     value = self._SummarySingleValue(result)
     n = tensor_util.MakeNdarray(value.tensor)
     self._AssertNumpyEq(n, np.ones([5, 5, 5]))
@@ -99,7 +99,7 @@ class SummaryV1TensorOpTest(test.TestCase):
     with self.cached_session() as sess:
       const = constant_op.constant(strings)
       summ = summary_lib.tensor_summary("foo", const)
-      result = sess.run(summ)
+      result = self.evaluate(summ)
     value = self._SummarySingleValue(result)
     n = tensor_util.MakeNdarray(value.tensor)
     self._AssertNumpyEq(n, strings)
@@ -109,7 +109,7 @@ class SummaryV1TensorOpTest(test.TestCase):
     with self.cached_session() as sess:
       const = constant_op.constant(bools)
       summ = summary_lib.tensor_summary("foo", const)
-      result = sess.run(summ)
+      result = self.evaluate(summ)
 
     value = self._SummarySingleValue(result)
     n = tensor_util.MakeNdarray(value.tensor)
@@ -119,7 +119,7 @@ class SummaryV1TensorOpTest(test.TestCase):
     with self.cached_session() as sess:
 
       def get_description(summary_op):
-        summ_str = sess.run(summary_op)
+        summ_str = self.evaluate(summary_op)
         summ = summary_pb2.Summary()
         summ.ParseFromString(summ_str)
         return summ.value[0].metadata
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index 32c97a7b191..97a280ef51c 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -68,7 +68,7 @@ class SvdOpTest(test.TestCase):
             s2 = linalg_ops.svd(
                 matrix2, compute_uv=compute_uv_, full_matrices=full_matrices_)
             all_ops += [s1, s2]
-      val = sess.run(all_ops)
+      val = self.evaluate(all_ops)
       for i in range(2):
         s = 6 * i
         self.assertAllEqual(val[s], val[s + 3])  # s1 == s2
@@ -150,7 +150,7 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
         s_tf, u_tf, v_tf = linalg_ops.svd(
             x_tf, compute_uv=compute_uv_, full_matrices=full_matrices_)
         if use_static_shape_:
-          s_tf_val, u_tf_val, v_tf_val = sess.run([s_tf, u_tf, v_tf])
+          s_tf_val, u_tf_val, v_tf_val = self.evaluate([s_tf, u_tf, v_tf])
         else:
           s_tf_val, u_tf_val, v_tf_val = sess.run(
               [s_tf, u_tf, v_tf], feed_dict={x_tf: x_np})
@@ -158,7 +158,7 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
         s_tf = linalg_ops.svd(
             x_tf, compute_uv=compute_uv_, full_matrices=full_matrices_)
         if use_static_shape_:
-          s_tf_val = sess.run(s_tf)
+          s_tf_val = self.evaluate(s_tf)
         else:
           s_tf_val = sess.run(s_tf, feed_dict={x_tf: x_np})
 
diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py
index 9dcdaa61ed2..a187fa115ce 100644
--- a/tensorflow/python/kernel_tests/template_test.py
+++ b/tensorflow/python/kernel_tests/template_test.py
@@ -104,10 +104,10 @@ class TemplateTest(test.TestCase):
     train_op = optimizer.minimize(train_loss)
 
     with session.Session() as sess:
-      sess.run(variables.global_variables_initializer())
-      initial_test_loss = sess.run(test_loss)
-      sess.run(train_op)
-      final_test_loss = sess.run(test_loss)
+      self.evaluate(variables.global_variables_initializer())
+      initial_test_loss = self.evaluate(test_loss)
+      self.evaluate(train_op)
+      final_test_loss = self.evaluate(test_loss)
 
     # Parameters are tied, so the loss should have gone down when we trained it.
     self.assertLess(final_test_loss, initial_test_loss)
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 7e8db8947b9..bb8645e2d54 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -751,7 +751,7 @@ class TensorArrayTest(test.TestCase):
                 [-0.5, 1.5],  # read(0) gradient
                 [20.0, 30.0, 40.0, 50.0]
             ])  # concat gradient
-      grad_vals = sess.run(grad_r)  # 2 + 2 entries
+      grad_vals = self.evaluate(grad_r)  # 2 + 2 entries
 
       self.assertAllClose([2.0 - 0.5 + 20.0, 3.0 + 1.5 + 30.0], grad_vals[0])
       self.assertAllEqual([4.0 + 40.0, 5.0 + 50.0], grad_vals[1])
@@ -1286,7 +1286,7 @@ class TensorArrayTest(test.TestCase):
       r = w1.stack()
       self.assertAllEqual(np.array([1.0, 2.0, 3.0, 4.0]), self.evaluate(r))
       grad = gradients_impl.gradients(ys=[r], xs=[x])
-      self.assertAllEqual(np.array([1.0, 1.0, 1.0]), sess.run(grad)[0])
+      self.assertAllEqual(np.array([1.0, 1.0, 1.0]), self.evaluate(grad)[0])
 
   @test_util.disable_control_flow_v2("b/117943489")
   def testSkipEagerTensorArrayUnpackDynamic(self):
@@ -1303,7 +1303,7 @@ class TensorArrayTest(test.TestCase):
       r = w1.concat()
       self.assertAllEqual(np.array([1.0, 2.0, 3.0, 4.0]), self.evaluate(r))
       grad = gradients_impl.gradients(ys=[r], xs=[x])
-      self.assertAllEqual(np.array([1.0, 1.0, 1.0]), sess.run(grad)[0])
+      self.assertAllEqual(np.array([1.0, 1.0, 1.0]), self.evaluate(grad)[0])
 
   def _testTensorArrayEvalEmpty(self):
     with self.cached_session(use_gpu=True):
@@ -1583,7 +1583,7 @@ class TensorArrayTest(test.TestCase):
       # wrap it in the correct name scope.
       dx, = gradients_impl.gradients(ys=[y], xs=[x], grad_ys=[dy])
       with self.cached_session(use_gpu=True) as sess:
-        vdx, vdy = sess.run([dx, dy])
+        vdx, vdy = self.evaluate([dx, dy])
       self.assertAllClose(vdx, vdy)
 
   def testSkipEagerTensorArrayInt64GPU(self):
diff --git a/tensorflow/python/kernel_tests/topk_op_test.py b/tensorflow/python/kernel_tests/topk_op_test.py
index d9f340de6b2..a72888c2567 100644
--- a/tensorflow/python/kernel_tests/topk_op_test.py
+++ b/tensorflow/python/kernel_tests/topk_op_test.py
@@ -48,7 +48,7 @@ class TopKTest(test.TestCase):
     np_expected_indices = np.array(expected_indices)
     with self.cached_session(use_gpu=True) as sess:
       values_op, indices_op = nn_ops.top_k(inputs, k, sorted=sorted)
-      values, indices = sess.run([values_op, indices_op])
+      values, indices = self.evaluate([values_op, indices_op])
 
       self.assertShapeEqual(np_expected_values, values_op)
       self.assertShapeEqual(np_expected_indices, indices_op)
diff --git a/tensorflow/python/kernel_tests/unicode_transcode_op_test.py b/tensorflow/python/kernel_tests/unicode_transcode_op_test.py
index 4ad5ee4103e..037ecd104b0 100644
--- a/tensorflow/python/kernel_tests/unicode_transcode_op_test.py
+++ b/tensorflow/python/kernel_tests/unicode_transcode_op_test.py
@@ -42,7 +42,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, strings)
 
       outputs = string_ops.unicode_transcode(
@@ -52,7 +52,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, strings)
 
       outputs = string_ops.unicode_transcode(
@@ -62,7 +62,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, strings)
 
   def test_transcode_utf16_to_utf8(self):
@@ -77,7 +77,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, expected)
 
   def test_transcode_bad_utf8(self):
@@ -90,7 +90,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=True)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"  ")
 
       outputs = string_ops.unicode_transcode(
@@ -100,7 +100,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"\x00 ")
 
   def test_transcode_bad_utf8_with_some_good(self):
@@ -113,7 +113,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="replace",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"abc abcdefg")
 
   def test_transcode_bad_utf8_with_defaults(self):
@@ -121,7 +121,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
     with self.cached_session() as sess:
       outputs = string_ops.unicode_transcode(
           bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"\x00\xef\xbf\xbd")
 
   def test_transcode_bad_utf8_with_space_replacement(self):
@@ -130,7 +130,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
       outputs = string_ops.unicode_transcode(
           bad_string, input_encoding="UTF-8", output_encoding="UTF-8",
           replacement_char=ord(" "))
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"\x00 ")
 
   def test_transcode_bad_utf8_with_strict_errors(self):
@@ -143,7 +143,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="strict")
       with self.assertRaisesOpError(
           "Invalid formatting on input string"):
-        sess.run(outputs)
+        self.evaluate(outputs)
 
   def test_transcode_bad_utf8_start_with_strict_errors(self):
     bad_string = b"\xffabcd"
@@ -155,7 +155,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           errors="strict")
       with self.assertRaisesOpError(
           "Invalid formatting on input string"):
-        sess.run(outputs)
+        self.evaluate(outputs)
 
   def test_transcode_bad_utf8_with_elision_of_malformatting(self):
     bad_string = b"\x00\xff"
@@ -165,7 +165,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           input_encoding="UTF-8",
           output_encoding="UTF-8",
           errors="ignore")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"\x00")
 
   def test_transcode_bad_utf8_with_elision_including_control_chars(self):
@@ -177,7 +177,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           output_encoding="UTF-8",
           errors="ignore",
           replace_control_characters=True)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"")
 
   def test_transcode_bad_utf8_termination_with_defaults(self):
@@ -185,7 +185,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
     with self.cached_session() as sess:
       outputs = string_ops.unicode_transcode(
           bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"a\xef\xbf\xbd")   # 0xFFFD
 
   def test_transcode_utf8_with_replacement_char(self):
@@ -194,13 +194,13 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
       outputs = string_ops.unicode_transcode(
           strings, input_encoding="UTF-8", output_encoding="UTF-8",
           errors="strict")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
 
       outputs = string_ops.unicode_transcode(
           strings, input_encoding="UTF-8", output_encoding="UTF-8",
           errors="replace", replacement_char=ord("?"))
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
 
   def test_transcode_utf8_to_utf16(self):
@@ -214,7 +214,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           output_encoding="UTF-16-BE",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       print("values=", values)
       self.assertAllEqual(values, expected)
 
@@ -230,7 +230,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           output_encoding="UTF-8",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, expected)
 
   def test_transcode_utf8_to_utf32(self):
@@ -243,7 +243,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           output_encoding="UTF-32-BE",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, expected)
 
   # Documentation in ICU suggests that getNextUChar may produce a different
@@ -258,7 +258,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           output_encoding="UTF-8",
           replacement_char=ord(" "),
           replace_control_characters=False)
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, strings)
 
   def test_transcode_utf8_with_bom(self):
@@ -266,12 +266,12 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
     with self.cached_session() as sess:
       outputs = string_ops.unicode_transcode(
           bom_string, input_encoding="UTF-8", output_encoding="UTF-8")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"\xef\xbb\xbfabcdefg")  # BOM preserved
 
       outputs = string_ops.unicode_transcode(
           bom_string, input_encoding="UTF-8", output_encoding="UTF-16-BE")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       utf16expected = bom_string.decode("UTF-8").encode("UTF-16-BE")
       self.assertAllEqual(values, utf16expected)
 
@@ -280,20 +280,20 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
     with self.cached_session() as sess:
       outputs = string_ops.unicode_transcode(
           bom_string, input_encoding="UTF-16-BE", output_encoding="UTF-8")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       # BOM is preserved in output
       self.assertAllEqual(values, b"\xef\xbb\xbfa")
 
       outputs = string_ops.unicode_transcode(
           bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       # mangled BOM and value from (incorrect) LE encoding
       self.assertAllEqual(values, b"\xef\xbf\xbe\xe6\x84\x80")
 
       bom_string = b"\xff\xfe\x61\x00"  # Little-endian BOM with 'a' encoded
       outputs = string_ops.unicode_transcode(
           bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
-      values = sess.run(outputs)
+      values = self.evaluate(outputs)
       self.assertAllEqual(values, b"\xef\xbb\xbfa")
 
   @parameterized.parameters(
@@ -336,7 +336,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
           replace_control_characters=False)
       with self.assertRaisesOpError(
           "Could not create converter for input encoding: invalid"):
-        sess.run(outputs)
+        self.evaluate(outputs)
 
     with self.assertRaisesRegexp(ValueError, "Op passed string 'invalid'"):
       with self.cached_session() as sess:
@@ -347,7 +347,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
             errors="replace",
             replacement_char=ord(" "),
             replace_control_characters=False)
-        sess.run(outputs)
+        self.evaluate(outputs)
 
   def test_invalid_error_policy_causes_errors(self):
     strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]
@@ -362,7 +362,7 @@ class UnicodeTranscodeOpTest(test.TestCase, parameterized.TestCase):
             errors="invalid",
             replacement_char=ord(" "),
             replace_control_characters=False)
-        sess.run(outputs)
+        self.evaluate(outputs)
 
   def test_forwarding(self):
     with self.cached_session():
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 316570e13e2..f203263e0c5 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -32,7 +32,7 @@ class UniqueTest(test.TestCase):
     x = np.random.randint(2, high=10, size=7000)
     with self.cached_session() as sess:
       y, idx = array_ops.unique(x)
-      tf_y, tf_idx = sess.run([y, idx])
+      tf_y, tf_idx = self.evaluate([y, idx])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -43,7 +43,7 @@ class UniqueTest(test.TestCase):
     x = np.random.randint(2, high=10, size=7000)
     with self.cached_session() as sess:
       y, idx = array_ops.unique(x, out_idx=dtypes.int64)
-      tf_y, tf_idx = sess.run([y, idx])
+      tf_y, tf_idx = self.evaluate([y, idx])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -55,7 +55,7 @@ class UniqueTest(test.TestCase):
     x = [chr(i) for i in indx]
     with self.cached_session() as sess:
       y, idx = array_ops.unique(x)
-      tf_y, tf_idx = sess.run([y, idx])
+      tf_y, tf_idx = self.evaluate([y, idx])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -67,9 +67,9 @@ class UniqueTest(test.TestCase):
       x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
       with self.cached_session() as sess:
         y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype))
-        tf_y0, tf_idx0 = sess.run([y0, idx0])
+        tf_y0, tf_idx0 = self.evaluate([y0, idx0])
         y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype))
-        tf_y1, tf_idx1 = sess.run([y1, idx1])
+        tf_y1, tf_idx1 = self.evaluate([y1, idx1])
       self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
       self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
       self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
@@ -81,7 +81,7 @@ class UniqueTest(test.TestCase):
     x = np.random.randint(2, high=10, size=7000)
     with self.cached_session() as sess:
       y, idx = gen_array_ops.unique_v2(x, axis=np.array([], np.int32))
-      tf_y, tf_idx = sess.run([y, idx])
+      tf_y, tf_idx = self.evaluate([y, idx])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -95,7 +95,7 @@ class UniqueWithCountsTest(test.TestCase):
     x = np.random.randint(2, high=10, size=7000)
     with self.cached_session() as sess:
       y, idx, count = array_ops.unique_with_counts(x)
-      tf_y, tf_idx, tf_count = sess.run([y, idx, count])
+      tf_y, tf_idx, tf_count = self.evaluate([y, idx, count])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -108,7 +108,7 @@ class UniqueWithCountsTest(test.TestCase):
     x = np.random.randint(2, high=10, size=7000)
     with self.cached_session() as sess:
       y, idx, count = array_ops.unique_with_counts(x, out_idx=dtypes.int64)
-      tf_y, tf_idx, tf_count = sess.run([y, idx, count])
+      tf_y, tf_idx, tf_count = self.evaluate([y, idx, count])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -123,7 +123,7 @@ class UniqueWithCountsTest(test.TestCase):
 
     with self.cached_session() as sess:
       y, idx, count = array_ops.unique_with_counts(x)
-      tf_y, tf_idx, tf_count = sess.run([y, idx, count])
+      tf_y, tf_idx, tf_count = self.evaluate([y, idx, count])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
@@ -139,10 +139,10 @@ class UniqueWithCountsTest(test.TestCase):
       with self.cached_session() as sess:
         y0, idx0, count0 = gen_array_ops.unique_with_counts_v2(
             x, axis=np.array([0], dtype))
-        tf_y0, tf_idx0, tf_count0 = sess.run([y0, idx0, count0])
+        tf_y0, tf_idx0, tf_count0 = self.evaluate([y0, idx0, count0])
         y1, idx1, count1 = gen_array_ops.unique_with_counts_v2(
             x, axis=np.array([1], dtype))
-        tf_y1, tf_idx1, tf_count1 = sess.run([y1, idx1, count1])
+        tf_y1, tf_idx1, tf_count1 = self.evaluate([y1, idx1, count1])
       self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
       self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
       self.assertAllEqual(tf_count0, np.array([2, 1]))
@@ -157,7 +157,7 @@ class UniqueWithCountsTest(test.TestCase):
     with self.cached_session() as sess:
       y, idx, count = gen_array_ops.unique_with_counts_v2(
           x, axis=np.array([], np.int32))
-      tf_y, tf_idx, tf_count = sess.run([y, idx, count])
+      tf_y, tf_idx, tf_count = self.evaluate([y, idx, count])
 
     self.assertEqual(len(x), len(tf_idx))
     self.assertEqual(len(tf_y), len(np.unique(x)))
diff --git a/tensorflow/python/kernel_tests/variable_ops_test.py b/tensorflow/python/kernel_tests/variable_ops_test.py
index 769bbba47ba..cdfd805a935 100644
--- a/tensorflow/python/kernel_tests/variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/variable_ops_test.py
@@ -220,7 +220,7 @@ class VariableOpTest(test.TestCase):
     with self.test_session(use_gpu=True):
       # The variable and an op to increment it are on the GPU.
       var = state_ops.variable_op([1], dtypes.float32)
-      state_ops.assign(var, [1.0]).eval()
+      self.evaluate(state_ops.assign(var, [1.0]))
       increment = state_ops.assign_add(var, [1.0])
       with ops.control_dependencies([increment]):
         with ops.device("/cpu:0"):
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 6267b01a299..37012af2991 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -434,19 +434,19 @@ class VariableScopeTest(test.TestCase):
         add = v1 + v0
       # v0 should be uninitialized.
       with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
-        sess.run(v0)
+        self.evaluate(v0)
       # We should be able to initialize and run v1 without initializing
       # v0, even if the variable was created with a control dep on v0.
-      sess.run(v1.initializer)
-      self.assertEqual(1, sess.run(v1))
+      self.evaluate(v1.initializer)
+      self.assertEqual(1, self.evaluate(v1))
       # v0 should still be uninitialized.
       with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
-        sess.run(v0)
+        self.evaluate(v0)
       with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
-        sess.run(add)
+        self.evaluate(add)
       # If we initialize v0 we should be able to run 'add'.
-      sess.run(v0.initializer)
-      sess.run(add)
+      self.evaluate(v0.initializer)
+      self.evaluate(add)
 
   # TODO(mihaimaruseac): Not converted to use wrap_function because of
   # AssertionError: True is not false (last assertFalse)
@@ -489,19 +489,19 @@ class VariableScopeTest(test.TestCase):
       v2 = var_dict["v2"]
       # We should be able to initialize and run v1 and v2 without initializing
       # v0, even if the variable was created with a control dep on v0.
-      sess.run(v1.initializer)
-      self.assertEqual([1], sess.run(v1))
-      sess.run(v2.initializer)
-      self.assertEqual([2], sess.run(v2))
+      self.evaluate(v1.initializer)
+      self.assertEqual([1], self.evaluate(v1))
+      self.evaluate(v2.initializer)
+      self.assertEqual([2], self.evaluate(v2))
       # v0 should still be uninitialized.
       with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
-        sess.run(v0)
+        self.evaluate(v0)
       # We should not be able to run 'add' yet.
       with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
-        sess.run(add)
+        self.evaluate(add)
       # If we initialize v0 we should be able to run 'add'.
-      sess.run(v0.initializer)
-      sess.run(add)
+      self.evaluate(v0.initializer)
+      self.evaluate(add)
 
   # TODO(mihaimaruseac): Not converted to use wrap_function because of
   # TypeError: Expected tf.group() expected Tensor arguments not 'None' with
@@ -1580,7 +1580,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
     self.assertEqual("custom_getter/add:0", v.name)
     with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
-      np_vars, np_v = sess.run([true_vars, v])
+      np_vars, np_v = self.evaluate([true_vars, v])
       self.assertAllClose(np_v, sum(np_vars))
 
   # TODO(mihaimaruseac): Not converted to use wrap_function because of
@@ -1625,7 +1625,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
 
     with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
-      np_vars, np_v = sess.run([true_vars, v])
+      np_vars, np_v = self.evaluate([true_vars, v])
       # take products of sums of products
       self.assertAllClose(
           np_v, (((np_vars[0] * np_vars[1]) + (np_vars[2] * np_vars[3])) + (
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index faa9f820676..14ec46dcb22 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -228,13 +228,13 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([2], self.evaluate(v2))
       # v0 should still be uninitialized.
       with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
-        sess.run(v0)
+        self.evaluate(v0)
       # We should not be able to run 'add' yet.
       with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
-        sess.run(add)
+        self.evaluate(add)
       # If we initialize v0 we should be able to run 'add'.
       self.evaluate(v0.initializer)
-      sess.run(add)
+      self.evaluate(add)
 
   def testControlFlowInitialization(self):
     """Expects an error if an initializer is in a control-flow scope."""
@@ -476,11 +476,11 @@ class VariablesTestCase(test.TestCase):
     with ops.Graph().as_default(), self.cached_session() as sess:
       # v describes a VariableDef-based variable without an initial value.
       v = variables.Variable(variable_def=v_def)
-      self.assertEqual(3.0, sess.run(v.initialized_value()))
+      self.assertEqual(3.0, self.evaluate(v.initialized_value()))
 
       # initialized_value should not rerun the initializer_op if the variable
       # has already been initialized elsewhere.
-      sess.run(v.assign(1.0))
+      self.evaluate(v.assign(1.0))
       self.assertEqual(1.0, v.initialized_value().eval())
 
     v_def.ClearField("initial_value_name")
@@ -492,7 +492,7 @@ class VariablesTestCase(test.TestCase):
       self.assertProtoEquals(v_def, v.to_proto())
       # But attempts to use initialized_value will result in errors.
       with self.assertRaises(ValueError):
-        sess.run(v.initialized_value())
+        self.evaluate(v.initialized_value())
 
   def testTrainableInProto(self):
     with ops.Graph().as_default():
@@ -579,7 +579,7 @@ class IsInitializedTest(test.TestCase):
       variables.global_variables_initializer().run()
       do_opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(
           objective)
-      sess.run([do_opt])
+      self.evaluate([do_opt])
       self.assertAllClose([[0.9, 0.9], [0.9, 0.9]], self.evaluate(b))
 
 
@@ -596,9 +596,9 @@ class ObsoleteIsInitializedTest(test.TestCase):
       _ = v, w
       inited = variables.assert_variables_initialized()
       with self.assertRaisesOpError("Attempting to use uninitialized value"):
-        sess.run(inited)
+        self.evaluate(inited)
       variables.global_variables_initializer().run()
-      sess.run(inited)
+      self.evaluate(inited)
 
   def testVariableList(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index 0634dfa2d8c..48b32f06aa1 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -48,8 +48,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     ret = while_loop_v2(lambda v: v < 8., lambda v: v * v, [x])
     grad = gradients_impl.gradients(ret, [x])
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(ret), 16.)
-      self.assertSequenceEqual(sess.run(grad), [32.])
+      self.assertEqual(self.evaluate(ret), 16.)
+      self.assertSequenceEqual(self.evaluate(grad), [32.])
 
   def testMultipleLoopVarsBasic(self):
     x = constant_op.constant(5.)
@@ -65,8 +65,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     # Note: This is simply d_ret[0]/d_x since d_ret[1]/d_x is 0.
     grad = gradients_impl.gradients(ret, [x])  # [2*x*y]
     with self.cached_session() as sess:
-      self.assertSequenceEqual(sess.run(ret), [45., 3.])
-      self.assertSequenceEqual(sess.run(grad), [9.])
+      self.assertSequenceEqual(self.evaluate(ret), [45., 3.])
+      self.assertSequenceEqual(self.evaluate(grad), [9.])
 
   def testMultipleLoopVars(self):
     x = constant_op.constant(5.)
@@ -88,13 +88,13 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     grady_1 = gradients_impl.gradients(ret[1], [y])  # [x + 1]
     grady_2 = gradients_impl.gradients(ret, [y])  # [2*x*y + x**2 + x + 1]
     with self.cached_session() as sess:
-      self.assertSequenceEqual(sess.run(ret), [120., 23.])
-      self.assertSequenceEqual(sess.run(gradx_0), [39.])
-      self.assertSequenceEqual(sess.run(gradx_1), [4.])
-      self.assertSequenceEqual(sess.run(gradx_2), [43.])
-      self.assertSequenceEqual(sess.run(grady_0), [55.])
-      self.assertSequenceEqual(sess.run(grady_1), [6.])
-      self.assertSequenceEqual(sess.run(grady_2), [61.])
+      self.assertSequenceEqual(self.evaluate(ret), [120., 23.])
+      self.assertSequenceEqual(self.evaluate(gradx_0), [39.])
+      self.assertSequenceEqual(self.evaluate(gradx_1), [4.])
+      self.assertSequenceEqual(self.evaluate(gradx_2), [43.])
+      self.assertSequenceEqual(self.evaluate(grady_0), [55.])
+      self.assertSequenceEqual(self.evaluate(grady_1), [6.])
+      self.assertSequenceEqual(self.evaluate(grady_2), [61.])
 
   def testMultipleWhileLoops(self):
     x = constant_op.constant(2.)
@@ -103,8 +103,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     grad = gradients_impl.gradients(ret2, [x])  # 4x**3
     grad_grad = gradients_impl.gradients(grad, [x])  # 12x**2
     with self.cached_session() as sess:
-      self.assertSequenceEqual(sess.run(grad), [32.])
-      self.assertSequenceEqual(sess.run(grad_grad), [48.])
+      self.assertSequenceEqual(self.evaluate(grad), [32.])
+      self.assertSequenceEqual(self.evaluate(grad_grad), [48.])
 
   def testDoubleDerivative(self):
     x = constant_op.constant(2.)
@@ -112,9 +112,9 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     grad = gradients_impl.gradients(ret, [x])  # 4x**3
     grad_grad = gradients_impl.gradients(grad, [x])  # 12x**2
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(ret), 16.)
-      self.assertSequenceEqual(sess.run(grad), [32.])
-      self.assertSequenceEqual(sess.run(grad_grad), [48.])
+      self.assertEqual(self.evaluate(ret), 16.)
+      self.assertSequenceEqual(self.evaluate(grad), [32.])
+      self.assertSequenceEqual(self.evaluate(grad_grad), [48.])
 
   def testPruning(self):
     x = constant_op.constant(1)
@@ -157,8 +157,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     ret = while_loop_v2(lambda v: v + y < 9., lambda v: v * 3., [x])
     grad = gradients_impl.gradients(ret, [x])
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(ret), 18.)
-      self.assertSequenceEqual(sess.run(grad), [9.])
+      self.assertEqual(self.evaluate(ret), 18.)
+      self.assertSequenceEqual(self.evaluate(grad), [9.])
 
   def testCaptureExternalTensorInBody(self):
     x = constant_op.constant(2.)
@@ -166,8 +166,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     ret = while_loop_v2(lambda v: v < 8., lambda v: v * y, [x])
     grad = gradients_impl.gradients(ret, [x])
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(ret), 18.)
-      self.assertSequenceEqual(sess.run(grad), [9.])
+      self.assertEqual(self.evaluate(ret), 18.)
+      self.assertSequenceEqual(self.evaluate(grad), [9.])
 
   def testLoopWithTensorListPushBack(self):
     x = constant_op.constant(2.)
@@ -188,7 +188,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     grad = gradients_impl.gradients(ret[0], x)
     with self.cached_session() as sess:
       self.assertEqual(sess.run(ret[0]), 16.)
-      self.assertSequenceEqual(sess.run(grad), [32.])
+      self.assertSequenceEqual(self.evaluate(grad), [32.])
 
   def testDuplicateAccumulator(self):
     x = constant_op.constant(2.)
@@ -222,7 +222,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     grad = gradients_impl.gradients(ret[0], x)
     with self.cached_session() as sess:
       self.assertEqual(sess.run(ret[0]), 16.)
-      self.assertSequenceEqual(sess.run(grad), [32.])
+      self.assertSequenceEqual(self.evaluate(grad), [32.])
 
   @parameterized.named_parameters(
       ("UnknownShape", None),
@@ -315,9 +315,9 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
       y0 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="elems")
       # map_fn uses TensorArray internally.
       r = functional_ops.map_fn(lambda x: math_ops.multiply(x, param), y0)
-      self.assertAllClose([2.0, 4.0, 6.0, 8.0, 10.0, 12.0], sess.run(r))
+      self.assertAllClose([2.0, 4.0, 6.0, 8.0, 10.0, 12.0], self.evaluate(r))
       r = gradients_impl.gradients(r, param)[0]
-      self.assertAllClose(21.0, sess.run(r))
+      self.assertAllClose(21.0, self.evaluate(r))
 
   def testNestedWhile(self):
     # Compute sum of geometric progression: n^0 + n^1 + ... + n^m
@@ -334,8 +334,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     result = while_loop_v2(lambda i, _: i >= 0, Body, [m, sum_of_powers])[1]
     grad = gradients_impl.gradients(result, [n])
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(result), 364.)
-      self.assertSequenceEqual(sess.run(grad), [547.])
+      self.assertEqual(self.evaluate(result), 364.)
+      self.assertSequenceEqual(self.evaluate(grad), [547.])
 
   def testIdentityNodeInBody(self):
 
@@ -348,8 +348,8 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     ret = while_loop_v2(lambda v: v < 8., Body, [x])
     grad = gradients_impl.gradients(ret, [x])
     with self.cached_session() as sess:
-      self.assertEqual(sess.run(ret), 16.)
-      self.assertSequenceEqual(sess.run(grad), [32.])
+      self.assertEqual(self.evaluate(ret), 16.)
+      self.assertSequenceEqual(self.evaluate(grad), [32.])
 
   def testNestedWhileAndTensorArray(self):
     n = constant_op.constant(3.0)
diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py
index c3c7f867a1e..77669f08cc1 100644
--- a/tensorflow/python/kernel_tests/xent_op_test.py
+++ b/tensorflow/python/kernel_tests/xent_op_test.py
@@ -56,7 +56,7 @@ class XentTest(test.TestCase):
     with self.cached_session(use_gpu=use_gpu) as sess:
       loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
           np_features, np_labels)
-      tf_loss, tf_backprop = sess.run([loss, backprop])
+      tf_loss, tf_backprop = self.evaluate([loss, backprop])
     self.assertAllCloseAccordingToType(np_loss, tf_loss)
     self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
 
@@ -65,7 +65,7 @@ class XentTest(test.TestCase):
     with self.cached_session(use_gpu=use_gpu) as sess:
       loss = nn_ops.softmax_cross_entropy_with_logits(
           labels=np_labels, logits=np_features, dim=dim)
-      tf_loss = sess.run(loss)
+      tf_loss = self.evaluate(loss)
     print("np_loss:", np_loss)
     print("tf_loss:", tf_loss)
     self.assertAllCloseAccordingToType(np_loss, tf_loss)
@@ -80,7 +80,7 @@ class XentTest(test.TestCase):
         loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
             np.array([[1.], [-1.], [0.]]).astype(dtype),
             np.array([[-1.], [0.], [1.]]).astype(dtype))
-        tf_loss, tf_backprop = sess.run([loss, backprop])
+        tf_loss, tf_backprop = self.evaluate([loss, backprop])
       self.assertAllClose([0.0, 0.0, 0.0], tf_loss)
       self.assertAllClose([[2.0], [1.0], [0.0]], tf_backprop)
 
@@ -148,7 +148,7 @@ class XentTest(test.TestCase):
       with self.cached_session(use_gpu=use_gpu) as sess:
         loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits(
             tf_f, tf_l)
-        tf_loss, tf_backprop = sess.run([loss, backprop])
+        tf_loss, tf_backprop = self.evaluate([loss, backprop])
       self.assertAllCloseAccordingToType(np_loss, tf_loss)
       self.assertAllCloseAccordingToType(np_backprop, tf_backprop)
 
@@ -280,7 +280,7 @@ class XentTest(test.TestCase):
     with self.session(use_gpu=True) as sess:
       loss = nn_ops.softmax_cross_entropy_with_logits(
           labels=labels, logits=features)
-      tf_loss = sess.run(loss)
+      tf_loss = self.evaluate(loss)
     self.assertAllEqual(np_loss, tf_loss)
 
 
diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py
index 257fa271567..d3200fa5b57 100644
--- a/tensorflow/python/layers/convolutional_test.py
+++ b/tensorflow/python/layers/convolutional_test.py
@@ -276,8 +276,8 @@ class ConvTest(test.TestCase):
         # Check the names of weights in order.
         self.assertTrue('kernel' in weights[0].name)
         self.assertTrue('bias' in weights[1].name)
-        sess.run(variables.global_variables_initializer())
-        weights = sess.run(weights)
+        self.evaluate(variables.global_variables_initializer())
+        weights = self.evaluate(weights)
         # Check that the kernel weights got initialized to ones (from scope)
         self.assertAllClose(weights[0], np.ones((3, 3, 3, 32)))
         # Check that the bias still got initialized to zeros.
@@ -663,8 +663,8 @@ class SeparableConv2DTest(test.TestCase):
         self.assertTrue('depthwise_kernel' in weights[0].name)
         self.assertTrue('pointwise_kernel' in weights[1].name)
         self.assertTrue('bias' in weights[2].name)
-        sess.run(variables.global_variables_initializer())
-        weights = sess.run(weights)
+        self.evaluate(variables.global_variables_initializer())
+        weights = self.evaluate(weights)
         # Check that the kernel weights got initialized to ones (from scope)
         self.assertAllClose(weights[0], np.ones((3, 3, 3, 1)))
         self.assertAllClose(weights[1], np.ones((1, 1, 3, 32)))
@@ -902,8 +902,8 @@ class Conv2DTransposeTest(test.TestCase):
         # Check the names of weights in order.
         self.assertTrue('kernel' in weights[0].name)
         self.assertTrue('bias' in weights[1].name)
-        sess.run(variables.global_variables_initializer())
-        weights = sess.run(weights)
+        self.evaluate(variables.global_variables_initializer())
+        weights = self.evaluate(weights)
         # Check that the kernel weights got initialized to ones (from scope)
         self.assertAllClose(weights[0], np.ones((3, 3, 32, 3)))
         # Check that the bias still got initialized to zeros.
@@ -1084,8 +1084,8 @@ class Conv3DTransposeTest(test.TestCase):
         # Check the names of weights in order.
         self.assertTrue('kernel' in weights[0].name)
         self.assertTrue('bias' in weights[1].name)
-        sess.run(variables.global_variables_initializer())
-        weights = sess.run(weights)
+        self.evaluate(variables.global_variables_initializer())
+        weights = self.evaluate(weights)
         # Check that the kernel weights got initialized to ones (from scope)
         self.assertAllClose(weights[0], np.ones((3, 3, 3, 4, 32)))
         # Check that the bias still got initialized to zeros.
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 0343bfa8bd2..a61639b2db8 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -443,7 +443,7 @@ class DropoutTest(test.TestCase):
       dp = core_layers.Dropout(rate, name='dropout')
       inputs = array_ops.ones((5, 5))
       dropped = dp.apply(inputs, training=True)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       np_output = sess.run(dropped, feed_dict={rate: 0.5})
       self.assertAlmostEqual(0., np_output.min())
       np_output = sess.run(dropped, feed_dict={rate: 0.0})
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index ba2bf10cf3a..cc3badbde1d 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -78,7 +78,7 @@ class BNTest(test.TestCase):
       if restore:
         saver.restore(sess, checkpoint_path)
       else:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
       np.random.seed(0)
       for _ in range(2):
         image_val = np.random.rand(*shape).astype(dtype.as_numpy_dtype)
@@ -321,9 +321,9 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 4, 1))
       np_beta = np.reshape(np_beta, (1, 4, 1))
 
@@ -336,8 +336,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 2))
       std = np.std(np_inputs, axis=(0, 2))
       variance = np.square(std)
@@ -363,8 +364,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 3))
       np_beta = np.reshape(np_beta, (1, 1, 3))
       for _ in range(100):
@@ -376,8 +377,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 1))
       std = np.std(np_inputs, axis=(0, 1))
       variance = np.square(std)
@@ -404,8 +406,8 @@ class BNTest(test.TestCase):
 
       with self.session(use_gpu=True) as sess:
         # Test training with placeholder learning phase.
-        sess.run(variables.global_variables_initializer())
-        np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+        self.evaluate(variables.global_variables_initializer())
+        np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
         np_gamma = np.reshape(np_gamma, (1, 4, 1, 1))
         np_beta = np.reshape(np_beta, (1, 4, 1, 1))
         for _ in range(100):
@@ -417,8 +419,9 @@ class BNTest(test.TestCase):
           self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
         # Verify that the statistics are updated during training.
-        moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-        np_inputs = sess.run(inputs)
+        moving_mean, moving_var = self.evaluate(
+            [bn.moving_mean, bn.moving_variance])
+        np_inputs = self.evaluate(inputs)
         mean = np.mean(np_inputs, axis=(0, 2, 3))
         std = np.std(np_inputs, axis=(0, 2, 3))
         variance = np.square(std)
@@ -444,8 +447,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 3, 1))
       np_beta = np.reshape(np_beta, (1, 1, 3, 1))
       for _ in range(100):
@@ -457,8 +460,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 1, 3))
       std = np.std(np_inputs, axis=(0, 1, 3))
       variance = np.square(std)
@@ -484,8 +488,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
       np_beta = np.reshape(np_beta, (1, 1, 1, 6))
       for _ in range(100):
@@ -497,8 +501,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 1, 2))
       std = np.std(np_inputs, axis=(0, 1, 2))
       variance = np.square(std)
@@ -524,8 +529,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
       np_beta = np.reshape(np_beta, (1, 1, 1, 6))
       for _ in range(100):
@@ -537,8 +542,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 1, 2))
       std = np.std(np_inputs, axis=(0, 1, 2))
       variance = np.square(std)
@@ -565,8 +571,8 @@ class BNTest(test.TestCase):
 
       with self.cached_session() as sess:
         # Test training with placeholder learning phase.
-        sess.run(variables.global_variables_initializer())
-        np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+        self.evaluate(variables.global_variables_initializer())
+        np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
         np_gamma = np.reshape(np_gamma, (1, 4, 1, 1))
         np_beta = np.reshape(np_beta, (1, 4, 1, 1))
         for _ in range(100):
@@ -578,8 +584,9 @@ class BNTest(test.TestCase):
           self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
         # Verify that the statistics are updated during training.
-        moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-        np_inputs = sess.run(inputs)
+        moving_mean, moving_var = self.evaluate(
+            [bn.moving_mean, bn.moving_variance])
+        np_inputs = self.evaluate(inputs)
         mean = np.mean(np_inputs, axis=(0, 2, 3))
         std = np.std(np_inputs, axis=(0, 2, 3))
         variance = np.square(std)
@@ -605,8 +612,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
       np_beta = np.reshape(np_beta, (1, 1, 1, 6))
       for _ in range(100):
@@ -619,8 +626,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 1, 2))
       std = np.std(np_inputs, axis=(0, 1, 2))
       variance = np.square(std)
@@ -646,8 +654,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
       np_beta = np.reshape(np_beta, (1, 1, 1, 6))
       for _ in range(100):
@@ -658,8 +666,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 1, 2))
       std = np.std(np_inputs, axis=(0, 1, 2))
       variance = np.square(std)
@@ -667,7 +676,7 @@ class BNTest(test.TestCase):
       self.assertAllClose(variance, moving_var, atol=1e-2)
 
       # Test inference with placeholder learning phase.
-      np_output = sess.run(outputs_infer)
+      np_output = self.evaluate(outputs_infer)
 
       # Verify that the axis is normalized during inference.
       normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta
@@ -696,8 +705,8 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
-      np_gamma, np_beta = sess.run([gamma, beta])
+      self.evaluate(variables.global_variables_initializer())
+      np_gamma, np_beta = self.evaluate([gamma, beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
       np_beta = np.reshape(np_beta, (1, 1, 1, 6))
       for _ in range(100):
@@ -709,8 +718,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      np_moving_mean, np_moving_var = sess.run([moving_mean, moving_variance])
-      np_inputs = sess.run(inputs)
+      np_moving_mean, np_moving_var = self.evaluate(
+          [moving_mean, moving_variance])
+      np_inputs = self.evaluate(inputs)
       np_mean = np.mean(np_inputs, axis=(0, 1, 2))
       np_std = np.std(np_inputs, axis=(0, 1, 2))
       np_variance = np.square(np_std)
@@ -758,14 +768,15 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(100):
         np_output, _, _ = sess.run([outputs2] + updates,
                                    feed_dict={training: True})
 
       # Verify that the statistics are updated during training.
-      np_moving_mean, np_moving_var = sess.run([moving_mean, moving_variance])
-      np_inputs = sess.run(inputs2)
+      np_moving_mean, np_moving_var = self.evaluate(
+          [moving_mean, moving_variance])
+      np_inputs = self.evaluate(inputs2)
       np_mean = np.mean(np_inputs, axis=(0, 1, 2))
       np_std = np.std(np_inputs, axis=(0, 1, 2))
       np_variance = np.square(np_std)
@@ -773,7 +784,7 @@ class BNTest(test.TestCase):
       self.assertAllClose(np_variance, np_moving_var, atol=1e-2)
 
       # Verify that the axis is normalized during training.
-      np_gamma, np_beta = sess.run([gamma, beta])
+      np_gamma, np_beta = self.evaluate([gamma, beta])
       np_gamma = np.reshape(np_gamma, (1, 1, 1, 6))
       np_beta = np.reshape(np_beta, (1, 1, 1, 6))
       normed_np_output = ((np_output - epsilon) * np_gamma) + np_beta
@@ -885,7 +896,7 @@ class BNTest(test.TestCase):
     renorm_mean = renorm_stddev = 0.
     renorm_weight = 0.
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
 
@@ -937,7 +948,7 @@ class BNTest(test.TestCase):
     moving_mean = 0.
     moving_variance = 1.
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
         yt_val_train, adj_scale_val, adj_bias_val = sess.run(
@@ -990,7 +1001,7 @@ class BNTest(test.TestCase):
     renorm_mean = renorm_stddev = 0.
     renorm_weight = 0.
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
         yt_val_train, adj_scale_val, adj_bias_val = sess.run(
@@ -1040,7 +1051,7 @@ class BNTest(test.TestCase):
         out1.shape.as_list(), out2.shape.as_list())
 
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       x = np.random.random(shape)
       y1, y2 = sess.run([out1, out2], feed_dict={inp: x})
@@ -1062,7 +1073,7 @@ class BNTest(test.TestCase):
         inp, virtual_batch_size=2)
 
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       x = np.random.random(np_shape)
       y = sess.run(out, feed_dict={inp: x})
@@ -1093,7 +1104,7 @@ class BNTest(test.TestCase):
                     shape[1]])
 
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
 
@@ -1146,7 +1157,7 @@ class BNTest(test.TestCase):
                    shape[1:])
 
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
 
@@ -1200,7 +1211,7 @@ class BNTest(test.TestCase):
                    shape[1:])
 
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
 
@@ -1256,9 +1267,9 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
 
       for _ in range(100):
         np_output, _, _ = sess.run([outputs] + bn.updates,
@@ -1269,8 +1280,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=0, keepdims=True)
       std = np.std(np_inputs, axis=0, keepdims=True)
       variance = np.square(std)
@@ -1296,9 +1308,9 @@ class BNTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Test training with placeholder learning phase.
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
-      np_gamma, np_beta = sess.run([bn.gamma, bn.beta])
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
 
       for _ in range(100):
         np_output, _, _ = sess.run([outputs] + bn.updates,
@@ -1309,8 +1321,9 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
       # Verify that the statistics are updated during training.
-      moving_mean, moving_var = sess.run([bn.moving_mean, bn.moving_variance])
-      np_inputs = sess.run(inputs)
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      np_inputs = self.evaluate(inputs)
       mean = np.mean(np_inputs, axis=(0, 4), keepdims=True)
       std = np.std(np_inputs, axis=(0, 4), keepdims=True)
       variance = np.square(std)
@@ -1350,7 +1363,7 @@ class BNTest(test.TestCase):
                    shape[1:])
 
     with self.session(use_gpu=True) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(5):
         x = np.random.random(shape)
 
diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py
index f6f35374c0e..739278273b9 100644
--- a/tensorflow/python/ops/bitwise_ops_test.py
+++ b/tensorflow/python/ops/bitwise_ops_test.py
@@ -66,7 +66,8 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
         inputs = np.array(raw_inputs, dtype=dtype.as_numpy_dtype)
         truth = [count_bits(x) for x in inputs]
         input_tensor = constant_op.constant(inputs, dtype=dtype)
-        popcnt_result = sess.run(gen_bitwise_ops.population_count(input_tensor))
+        popcnt_result = self.evaluate(
+            gen_bitwise_ops.population_count(input_tensor))
         self.assertAllEqual(truth, popcnt_result)
 
   def testInvertOp(self):
@@ -89,7 +90,7 @@ class BitwiseOpTest(test_util.TensorFlowTestCase):
         self.assertAllEqual(not_a_or_a, [not_0] * 4)
         # For unsigned dtypes let's also check the result directly.
         if dtype.is_unsigned:
-          inverted = sess.run(bitwise_ops.invert(input_tensor))
+          inverted = self.evaluate(bitwise_ops.invert(input_tensor))
           expected = [dtype.max - x for x in inputs]
           self.assertAllEqual(inverted, expected)
 
diff --git a/tensorflow/python/ops/clip_ops_test.py b/tensorflow/python/ops/clip_ops_test.py
index 8aa9c4ffb34..e9f7941b426 100644
--- a/tensorflow/python/ops/clip_ops_test.py
+++ b/tensorflow/python/ops/clip_ops_test.py
@@ -35,7 +35,7 @@ class ClipOpsTest(test.TestCase):
       input_op = constant_op.constant(inputs)
       clipped = clip_ops.clip_by_norm(input_op, max_norm)
       check_op = numerics.add_check_numerics_ops()
-      result, _ = sess.run([clipped, check_op])
+      result, _ = self.evaluate([clipped, check_op])
     self.assertAllClose(result, expected)
 
   def _testClipIndexedSlicesByNorm(self, values, indices, shape, max_norm,
@@ -54,7 +54,7 @@ class ClipOpsTest(test.TestCase):
       # Tensor mode
       dense_tensor = ops.convert_to_tensor(indixed_slices)
       dense_clipped = clip_ops.clip_by_norm(dense_tensor, max_norm, axes)
-      result, expected = sess.run([clipped, dense_clipped])
+      result, expected = self.evaluate([clipped, dense_clipped])
     self.assertAllClose(result, expected)
 
   def testClipTensorByNorm(self):
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 47675d3f343..c020189ad63 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -209,9 +209,9 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
       optimizer = momentum.MomentumOptimizer(0.1, 0.9)
       train_op = optimizer.minimize(cost)
       with self.cached_session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for _ in range(10):
-          sess.run([train_op])
+          self.evaluate([train_op])
 
   def testResourceReadInLoop(self):
     with ops.Graph().as_default():
@@ -232,7 +232,7 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
           cond, body, [constant_op.constant(0),
                        constant_op.constant(0.0)])
       with self.cached_session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllEqual(10.0, self.evaluate(cost))
 
   def doTestIndexedSlicesGradientInCondInWhileLoop(self, use_resource=False):
@@ -269,8 +269,8 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
                                           static_grads.indices)
 
       with self.cached_session() as sess:
-        sess.run(variables.global_variables_initializer())
-        self.assertAllEqual(*sess.run([static_grads, dynamic_grads]))
+        self.evaluate(variables.global_variables_initializer())
+        self.assertAllEqual(*self.evaluate([static_grads, dynamic_grads]))
 
   def testIndexedSlicesGradientInCondInWhileLoop(self):
     self.doTestIndexedSlicesGradientInCondInWhileLoop(use_resource=False)
@@ -398,9 +398,9 @@ class CondTest(test_util.TensorFlowTestCase):
             pred=bool_var,
             true_fn=lambda: state_ops.assign(bool_var, False),
             false_fn=lambda: True)
-        sess.run(bool_var.initializer)
-        self.assertEquals(sess.run(cond_on_bool_var), False)
-        self.assertEquals(sess.run(cond_on_bool_var), True)
+        self.evaluate(bool_var.initializer)
+        self.assertEquals(self.evaluate(cond_on_bool_var), False)
+        self.assertEquals(self.evaluate(cond_on_bool_var), True)
 
   def testCondMissingArg1(self):
     with ops.Graph().as_default():
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index 262b62e0131..a9058c4a341 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -365,7 +365,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       grads = gradients.gradients(
           [y], [x], unconnected_gradients="zero")
       with self.cached_session() as sess:
-        self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], sess.run(grads)[0])
+        self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(grads)[0])
 
   def testUnconnectedGradientsZeroConnectedGradients(self):
     with ops.Graph().as_default():
@@ -374,7 +374,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       grad = gradients.gradients(
           [y], [x], unconnected_gradients="zero")
       with self.cached_session() as sess:
-        self.assertEquals(3.0, sess.run(grad)[0])
+        self.assertEquals(3.0, self.evaluate(grad)[0])
 
   def testUnknownUnconnectedGradientsValueGiven(self):
     with ops.Graph().as_default():
@@ -438,8 +438,8 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
       grads = gradients.gradients(y, [x, b1])
 
       with self.cached_session() as sess:
-        self.assertAllEqual([40.0], sess.run(grads)[0])
-        self.assertAllEqual([10.0], sess.run(grads)[1])
+        self.assertAllEqual([40.0], self.evaluate(grads)[0])
+        self.assertAllEqual([10.0], self.evaluate(grads)[1])
 
   def testFunctionGradientsWithGradFunc(self):
     g = ops.Graph()
@@ -487,7 +487,7 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
       f = Foo()
       with self.cached_session() as sess:
-        self.assertEqual(sess.run(f), 2.0)
+        self.assertEqual(self.evaluate(f), 2.0)
 
   def testGradientOfCaptured(self):
     with ops.Graph().as_default():
@@ -501,7 +501,7 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
       f = Foo()
       with self.cached_session() as sess:
-        self.assertEqual(sess.run(f), 2.0)
+        self.assertEqual(self.evaluate(f), 2.0)
 
   def testCapturedResourceVariable(self):
     with ops.Graph().as_default():
@@ -515,8 +515,8 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
       f = Foo()
       with self.cached_session() as sess:
-        sess.run(variables.global_variables_initializer())
-        self.assertEqual(sess.run(f), 2.0)
+        self.evaluate(variables.global_variables_initializer())
+        self.assertEqual(self.evaluate(f), 2.0)
 
   def testCapturedNested(self):
     with ops.Graph().as_default():
@@ -541,9 +541,9 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
       x1_grad, x2_grad = Outer()
       with self.cached_session() as sess:
         # 1.0 + None + 2.0 + 1.0 = 4.0
-        self.assertEqual(sess.run(x1_grad), 4.0)
+        self.assertEqual(self.evaluate(x1_grad), 4.0)
         # None + 1.0 + 1.0 + None = 2.0
-        self.assertEqual(sess.run(x2_grad), 2.0)
+        self.assertEqual(self.evaluate(x2_grad), 2.0)
 
   def testCapturedFromFunction(self):
     with ops.Graph().as_default():
@@ -563,7 +563,7 @@ class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
       z_grad = Outer()
       with self.cached_session() as sess:
-        self.assertEqual(sess.run(z_grad), 3.0)
+        self.assertEqual(self.evaluate(z_grad), 3.0)
 
   def testCapturedEagerTensors(self):
     # Test that we can handle captured eager tensors unrelated to the gradient
@@ -873,7 +873,7 @@ class CustomGradientTest(test_util.TensorFlowTestCase):
       y = MyMultiply(x1, x2)
       dy = gradients.gradients(y, [x1, x2])
       with session.Session() as sess:
-        self.assertAllEqual([3., 5.], sess.run(dy))
+        self.assertAllEqual([3., 5.], self.evaluate(dy))
 
   def testCustomGradientErrors(self):
 
@@ -914,7 +914,7 @@ class CustomGradientTest(test_util.TensorFlowTestCase):
       for g in grads:
         self.assertTrue(g is not None)
       with session.Session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         dw = sess.run(math_ops.reduce_sum(grads[1]))
         self.assertEqual(12., dw)
 
@@ -1074,7 +1074,7 @@ class TensorListGradientsTest(test_util.TensorFlowTestCase):
 
       grad = gradients.gradients(tl, a, grad_ys=grad_tl)[0]
       with self.cached_session() as sess:
-        self.assertEquals(sess.run(grad), 5.)
+        self.assertEquals(self.evaluate(grad), 5.)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py
index 32c2f37c0b7..0ea15b0d23f 100644
--- a/tensorflow/python/ops/image_grad_test.py
+++ b/tensorflow/python/ops/image_grad_test.py
@@ -44,7 +44,7 @@ class ResizeNearestNeighborOpTest(test.TestCase):
                                                        out_shape[1:3])
         self.assertEqual(out_shape, list(resize_out.get_shape()))
 
-        resize_out = sess.run(resize_out)
+        resize_out = self.evaluate(resize_out)
       self.assertEqual(out_shape, list(resize_out.shape))
 
   def testGradFromResizeToLargerInBothDims(self):
@@ -113,7 +113,7 @@ class ResizeBilinearOpTest(test.TestCase):
       resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
       self.assertEqual(out_shape, list(resize_out.get_shape()))
 
-      resize_out = sess.run(resize_out)
+      resize_out = self.evaluate(resize_out)
       self.assertEqual(out_shape, list(resize_out.shape))
 
   def testGradFromResizeToLargerInBothDims(self):
@@ -196,7 +196,7 @@ class ResizeBicubicOpTest(test.TestCase):
                                               align_corners=align_corners)
         self.assertEqual(out_shape, list(resize_out.get_shape()))
 
-        resize_out = sess.run(resize_out)
+        resize_out = self.evaluate(resize_out)
         self.assertEqual(out_shape, list(resize_out.shape))
 
   def testGradFromResizeToLargerInBothDims(self):
@@ -273,7 +273,7 @@ class CropAndResizeOpTest(test.TestCase):
           constant_op.constant(
               crop_size, shape=[2]))
       self.assertEqual(crops_shape, list(crops.get_shape()))
-      crops = sess.run(crops)
+      crops = self.evaluate(crops)
       self.assertEqual(crops_shape, list(crops.shape))
 
   def _randomUniformAvoidAnchors(self, low, high, anchors, radius, num_samples):
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index ac2d2698b64..71a574e0a02 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -70,7 +70,8 @@ class RGBToHSVTest(test_util.TensorFlowTestCase):
         split2 = list(map(image_ops.hsv_to_rgb, split1))
         join1 = array_ops.stack(split1)
         join2 = array_ops.stack(split2)
-        batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2])
+        batch1, batch2, join1, join2 = self.evaluate(
+            [batch1, batch2, join1, join2])
 
       # Verify that processing batch elements together is the same as separate
       self.assertAllClose(batch1, join1)
@@ -109,7 +110,8 @@ class RGBToYIQTest(test_util.TensorFlowTestCase):
         split2 = list(map(image_ops.yiq_to_rgb, split1))
         join1 = array_ops.stack(split1)
         join2 = array_ops.stack(split2)
-        batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2])
+        batch1, batch2, join1, join2 = self.evaluate(
+            [batch1, batch2, join1, join2])
 
       # Verify that processing batch elements together is the same as separate
       self.assertAllClose(batch1, join1, rtol=1e-4, atol=1e-4)
@@ -138,7 +140,8 @@ class RGBToYUVTest(test_util.TensorFlowTestCase):
         split2 = list(map(image_ops.yuv_to_rgb, split1))
         join1 = array_ops.stack(split1)
         join2 = array_ops.stack(split2)
-        batch1, batch2, join1, join2 = sess.run([batch1, batch2, join1, join2])
+        batch1, batch2, join1, join2 = self.evaluate(
+            [batch1, batch2, join1, join2])
 
       # Verify that processing batch elements together is the same as separate
       self.assertAllClose(batch1, join1, rtol=1e-4, atol=1e-4)
@@ -488,11 +491,11 @@ class FlipImageBenchmark(test.Benchmark):
             trainable=False,
             dtype=dtypes.float32)
         run_op = image_ops.flip_left_right(inputs)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for i in xrange(warmup_rounds + benchmark_rounds):
           if i == warmup_rounds:
             start = time.time()
-          sess.run(run_op)
+          self.evaluate(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -518,11 +521,11 @@ class FlipImageBenchmark(test.Benchmark):
             trainable=False,
             dtype=dtypes.float32)
         run_op = image_ops.random_flip_left_right(inputs)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for i in xrange(warmup_rounds + benchmark_rounds):
           if i == warmup_rounds:
             start = time.time()
-          sess.run(run_op)
+          self.evaluate(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -548,11 +551,11 @@ class FlipImageBenchmark(test.Benchmark):
             trainable=False,
             dtype=dtypes.float32)
         run_op = image_ops.random_flip_left_right(inputs)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for i in xrange(warmup_rounds + benchmark_rounds):
           if i == warmup_rounds:
             start = time.time()
-          sess.run(run_op)
+          self.evaluate(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -610,11 +613,11 @@ class AdjustHueBenchmark(test.Benchmark):
       delta = constant_op.constant(0.1, dtype=dtypes.float32)
       outputs = image_ops.adjust_hue(inputs, delta)
       run_op = control_flow_ops.group(outputs)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for i in xrange(warmup_rounds + benchmark_rounds):
         if i == warmup_rounds:
           start = time.time()
-        sess.run(run_op)
+        self.evaluate(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -653,12 +656,12 @@ class AdjustSaturationBenchmark(test.Benchmark):
       delta = constant_op.constant(0.1, dtype=dtypes.float32)
       outputs = image_ops.adjust_saturation(inputs, delta)
       run_op = control_flow_ops.group(outputs)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       for _ in xrange(warmup_rounds):
-        sess.run(run_op)
+        self.evaluate(run_op)
       start = time.time()
       for _ in xrange(benchmark_rounds):
-        sess.run(run_op)
+        self.evaluate(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
     tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
@@ -698,7 +701,7 @@ class ResizeBilinearBenchmark(test.Benchmark):
       benchmark_op = control_flow_ops.group(*deps)
 
     with self.benchmark_session() as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
           benchmark_op,
@@ -746,7 +749,7 @@ class ResizeBicubicBenchmark(test.Benchmark):
       benchmark_op = control_flow_ops.group(*deps)
 
     with self.benchmark_session() as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
           benchmark_op,
@@ -803,7 +806,7 @@ class ResizeAreaBenchmark(test.Benchmark):
       benchmark_op = control_flow_ops.group(*deps)
 
     with self.benchmark_session() as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess,
           benchmark_op,
@@ -2265,7 +2268,7 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
           image = constant_op.constant(img_np, shape=img_shape)
           y = image_ops.resize_images(image, [target_height, target_width], opt)
           yshape = array_ops.shape(y)
-          resized, newshape = sess.run([y, yshape])
+          resized, newshape = self.evaluate([y, yshape])
           self.assertAllEqual(img_shape, newshape)
           self.assertAllClose(resized, img_np, atol=1e-5)
 
@@ -2379,7 +2382,7 @@ class ResizeImagesTest(test_util.TensorFlowTestCase):
         image = constant_op.constant(img_np, shape=img_shape)
         y = image_ops.resize_images(image, [height, width], opt)
         yshape = array_ops.shape(y)
-        resized, newshape = sess.run([y, yshape])
+        resized, newshape = self.evaluate([y, yshape])
         self.assertAllEqual(img_shape, newshape)
         self.assertAllClose(resized, img_np, atol=1e-5)
 
@@ -3066,7 +3069,7 @@ class JpegTest(test_util.TensorFlowTestCase):
       jpeg0 = io_ops.read_file(path)
       image0 = image_ops.decode_jpeg(jpeg0)
       image1 = image_ops.decode_jpeg(image_ops.encode_jpeg(image0))
-      jpeg0, image0, image1 = sess.run([jpeg0, image0, image1])
+      jpeg0, image0, image1 = self.evaluate([jpeg0, image0, image1])
       self.assertEqual(len(jpeg0), 3771)
       self.assertEqual(image0.shape, (256, 128, 3))
       self.assertLess(self.averageError(image0, image1), 1.4)
@@ -3083,7 +3086,7 @@ class JpegTest(test_util.TensorFlowTestCase):
             io_ops.read_file(rgb_path), channels=channels)
         cmyk = image_ops.decode_jpeg(
             io_ops.read_file(cmyk_path), channels=channels)
-        rgb, cmyk = sess.run([rgb, cmyk])
+        rgb, cmyk = self.evaluate([rgb, cmyk])
         self.assertEqual(rgb.shape, shape)
         self.assertEqual(cmyk.shape, shape)
         error = self.averageError(rgb, cmyk)
@@ -3112,7 +3115,7 @@ class JpegTest(test_util.TensorFlowTestCase):
                             image2.get_shape().as_list())
 
         # CropAndDecode should be equal to DecodeJpeg+Crop.
-        image1_crop, image2 = sess.run([image1_crop, image2])
+        image1_crop, image2 = self.evaluate([image1_crop, image2])
         self.assertAllEqual(image1_crop, image2)
 
   def testCropAndDecodeJpegWithInvalidCropWindow(self):
@@ -3131,7 +3134,7 @@ class JpegTest(test_util.TensorFlowTestCase):
         with self.assertRaisesWithPredicateMatch(
             errors.InvalidArgumentError,
             lambda e: "Invalid JPEG data or crop window" in str(e)):
-          sess.run(result)
+          self.evaluate(result)
 
   def testSynthetic(self):
     with self.test_session(use_gpu=True) as sess:
@@ -3141,7 +3144,8 @@ class JpegTest(test_util.TensorFlowTestCase):
       image1 = image_ops.decode_jpeg(jpeg0, dct_method="INTEGER_ACCURATE")
       image2 = image_ops.decode_jpeg(
           image_ops.encode_jpeg(image1), dct_method="INTEGER_ACCURATE")
-      jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
+      jpeg0, image0, image1, image2 = self.evaluate(
+          [jpeg0, image0, image1, image2])
 
       # The decoded-encoded image should be similar to the input
       self.assertLess(self.averageError(image0, image1), 0.6)
@@ -3161,7 +3165,8 @@ class JpegTest(test_util.TensorFlowTestCase):
       image1 = image_ops.decode_jpeg(jpeg0, dct_method="INTEGER_FAST")
       image2 = image_ops.decode_jpeg(
           image_ops.encode_jpeg(image1), dct_method="INTEGER_FAST")
-      jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
+      jpeg0, image0, image1, image2 = self.evaluate(
+          [jpeg0, image0, image1, image2])
 
       # The decoded-encoded image should be similar to the input, but
       # note this is worse than the slower algorithm because it is
@@ -3184,7 +3189,7 @@ class JpegTest(test_util.TensorFlowTestCase):
       jpeg0 = image_ops.encode_jpeg(image0)
       image1 = image_ops.decode_jpeg(jpeg0, dct_method="INTEGER_FAST")
       image2 = image_ops.decode_jpeg(jpeg0)
-      image1, image2 = sess.run([image1, image2])
+      image1, image2 = self.evaluate([image1, image2])
 
       # The images should be the same.
       self.assertAllClose(image1, image2)
@@ -3230,7 +3235,7 @@ class PngTest(test_util.TensorFlowTestCase):
         with self.test_session(use_gpu=True) as sess:
           png0 = io_ops.read_file(prefix + filename)
           image0 = image_ops.decode_png(png0, channels=channels)
-          png0, image0 = sess.run([png0, image0])
+          png0, image0 = self.evaluate([png0, image0])
           self.assertEqual(image0.shape, (26, 51, channels or channels_in))
           if channels == channels_in:
             image1 = image_ops.decode_png(image_ops.encode_png(image0))
@@ -3242,7 +3247,7 @@ class PngTest(test_util.TensorFlowTestCase):
       image0 = constant_op.constant(_SimpleColorRamp())
       png0 = image_ops.encode_png(image0, compression=7)
       image1 = image_ops.decode_png(png0)
-      png0, image0, image1 = sess.run([png0, image0, image1])
+      png0, image0, image1 = self.evaluate([png0, image0, image1])
 
       # PNG is lossless
       self.assertAllEqual(image0, image1)
@@ -3257,7 +3262,7 @@ class PngTest(test_util.TensorFlowTestCase):
       image0 = constant_op.constant(_SimpleColorRamp(), dtype=dtypes.uint16)
       png0 = image_ops.encode_png(image0, compression=7)
       image1 = image_ops.decode_png(png0, dtype=dtypes.uint16)
-      png0, image0, image1 = sess.run([png0, image0, image1])
+      png0, image0, image1 = self.evaluate([png0, image0, image1])
 
       # PNG is lossless
       self.assertAllEqual(image0, image1)
@@ -3273,7 +3278,7 @@ class PngTest(test_util.TensorFlowTestCase):
       image0 = constant_op.constant(gray_alpha)
       png0 = image_ops.encode_png(image0, compression=7)
       image1 = image_ops.decode_png(png0)
-      png0, image0, image1 = sess.run([png0, image0, image1])
+      png0, image0, image1 = self.evaluate([png0, image0, image1])
       self.assertEqual(2, image0.shape[-1])
       self.assertAllEqual(image0, image1)
 
@@ -3284,7 +3289,7 @@ class PngTest(test_util.TensorFlowTestCase):
       image0 = constant_op.constant(gray_alpha, dtype=dtypes.uint16)
       png0 = image_ops.encode_png(image0, compression=7)
       image1 = image_ops.decode_png(png0, dtype=dtypes.uint16)
-      png0, image0, image1 = sess.run([png0, image0, image1])
+      png0, image0, image1 = self.evaluate([png0, image0, image1])
       self.assertEqual(2, image0.shape[-1])
       self.assertAllEqual(image0, image1)
 
@@ -3310,7 +3315,7 @@ class GifTest(test_util.TensorFlowTestCase):
     with self.test_session(use_gpu=True) as sess:
       gif0 = io_ops.read_file(prefix + filename)
       image0 = image_ops.decode_gif(gif0)
-      gif0, image0 = sess.run([gif0, image0])
+      gif0, image0 = self.evaluate([gif0, image0])
 
       self.assertEqual(image0.shape, shape)
 
@@ -3829,7 +3834,7 @@ class PSNRTest(test_util.TensorFlowTestCase):
         "tensorflow/core/lib/psnr/testdata", filename))
     im = image_ops.decode_jpeg(content, dct_method="INTEGER_ACCURATE")
     im = image_ops.convert_image_dtype(im, dtypes.float32)
-    im, = sess.run([im])
+    im, = self.evaluate([im])
     return np.expand_dims(im, axis=0)
 
   def _LoadTestImages(self):
@@ -3936,7 +3941,7 @@ class SSIMTest(test_util.TensorFlowTestCase):
         "tensorflow/core/lib/ssim/testdata", filename))
     im = image_ops.decode_png(content)
     im = image_ops.convert_image_dtype(im, dtypes.float32)
-    im, = sess.run([im])
+    im, = self.evaluate([im])
     return np.expand_dims(im, axis=0)
 
   def _LoadTestImages(self):
@@ -4028,7 +4033,7 @@ class MultiscaleSSIMTest(test_util.TensorFlowTestCase):
         "tensorflow/core/lib/ssim/testdata", filename))
     im = image_ops.decode_png(content)
     im = image_ops.convert_image_dtype(im, dtypes.float32)
-    im, = sess.run([im])
+    im, = self.evaluate([im])
     return np.expand_dims(im, axis=0)
 
   def _LoadTestImages(self):
@@ -4110,7 +4115,7 @@ class MultiscaleSSIMTest(test_util.TensorFlowTestCase):
       images = [ops.convert_to_tensor(x, dtype=dtypes.float32) for x in images]
       msssim_ops = [image_ops.ssim_multiscale(x, y, 1.0)
                     for x, y in itertools.combinations(images, 2)]
-      msssim = sess.run(msssim_ops)
+      msssim = self.evaluate(msssim_ops)
       msssim = np.squeeze(msssim)
 
     self.assertTrue(np.all(msssim >= 0.0))
@@ -4223,7 +4228,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(jpeg0, dtype=dtypes.uint16)
       image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0),
                                              dtypes.uint16)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testPngUint16(self):
@@ -4233,7 +4238,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(png0, dtype=dtypes.uint16)
       image1 = image_ops.convert_image_dtype(
           image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.uint16)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testGifUint16(self):
@@ -4243,7 +4248,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(gif0, dtype=dtypes.uint16)
       image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0),
                                              dtypes.uint16)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testBmpUint16(self):
@@ -4253,7 +4258,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(bmp0, dtype=dtypes.uint16)
       image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0),
                                              dtypes.uint16)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testJpegFloat32(self):
@@ -4263,7 +4268,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(jpeg0, dtype=dtypes.float32)
       image1 = image_ops.convert_image_dtype(image_ops.decode_jpeg(jpeg0),
                                              dtypes.float32)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testPngFloat32(self):
@@ -4273,7 +4278,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(png0, dtype=dtypes.float32)
       image1 = image_ops.convert_image_dtype(
           image_ops.decode_png(png0, dtype=dtypes.uint16), dtypes.float32)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testGifFloat32(self):
@@ -4283,7 +4288,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(gif0, dtype=dtypes.float32)
       image1 = image_ops.convert_image_dtype(image_ops.decode_gif(gif0),
                                              dtypes.float32)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
   def testBmpFloat32(self):
@@ -4293,7 +4298,7 @@ class DecodeImageTest(test_util.TensorFlowTestCase):
       image0 = image_ops.decode_image(bmp0, dtype=dtypes.float32)
       image1 = image_ops.convert_image_dtype(image_ops.decode_bmp(bmp0),
                                              dtypes.float32)
-      image0, image1 = sess.run([image0, image1])
+      image0, image1 = self.evaluate([image0, image1])
       self.assertAllEqual(image0, image1)
 
 
diff --git a/tensorflow/python/ops/init_ops_test.py b/tensorflow/python/ops/init_ops_test.py
index 5693c3caaf5..1f222480046 100644
--- a/tensorflow/python/ops/init_ops_test.py
+++ b/tensorflow/python/ops/init_ops_test.py
@@ -45,8 +45,8 @@ class InitializersTest(test.TestCase):
       output = variable.numpy()
     else:
       sess = ops.get_default_session()
-      sess.run(variable.initializer)
-      output = sess.run(variable)
+      self.evaluate(variable.initializer)
+      output = self.evaluate(variable)
     lim = 3e-2
     if target_std is not None:
       self.assertGreater(lim, abs(output.std() - target_std))
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index adcaa7abffb..cd45b6f1364 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -373,7 +373,7 @@ class AddNTest(test_util.TensorFlowTestCase):
             for i in range(0, num_inputs)
         ]
         addn = math_ops.add_n(input_vars)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         add_n_grad = gradients.gradients(addn, input_vars)
         self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1
                             [g.eval() for g in add_n_grad])
@@ -461,7 +461,7 @@ class DivAndModTest(test_util.TensorFlowTestCase):
       a = variables.Variable(2.)
       b = variables.Variable(4.)
       with self.cached_session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         c_grad = gradients.gradients(math_ops.divide(a, b), [a, b])
         self.assertAllEqual([x.eval() for x in c_grad], [.25, -.125])
         c_grad = gradients.gradients(math_ops.div(a, b), [a, b])
diff --git a/tensorflow/python/ops/nccl_ops_test.py b/tensorflow/python/ops/nccl_ops_test.py
index 1b496fec473..3b2e2b0175f 100644
--- a/tensorflow/python/ops/nccl_ops_test.py
+++ b/tensorflow/python/ops/nccl_ops_test.py
@@ -102,7 +102,7 @@ class NcclTestCase(test.TestCase):
             continue
 
           # Test execution and results.
-          for t in sess.run(result_tensors):
+          for t in self.evaluate(result_tensors):
             self.assertAllClose(t, np_ans)
 
   def _TestGradient(self, nccl_reduce, numpy_fn):
diff --git a/tensorflow/python/ops/nn_batchnorm_test.py b/tensorflow/python/ops/nn_batchnorm_test.py
index b50bccfde22..31b2790f2b8 100644
--- a/tensorflow/python/ops/nn_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_batchnorm_test.py
@@ -235,10 +235,11 @@ class BatchNormalizationTest(test.TestCase):
           odx, odm, odv, odb, odg = gradients_impl.gradients(
               [on], [x, m, v, beta, gamma], [backprop])
           if scale_after_normalization:
-            all_grads = sess.run([dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
+            all_grads = self.evaluate(
+                [dx, dm, dv, db, dg, odx, odm, odv, odb, odg])
             to_check = ["dx", "dm", "dv", "db", "dg"]
           else:
-            all_grads = sess.run([dx, dm, dv, db, odx, odm, odv, odb])
+            all_grads = self.evaluate([dx, dm, dv, db, odx, odm, odv, odb])
             to_check = ["dx", "dm", "dv", "db"]
           for i, _ in enumerate(to_check):
             self.assertAllClose(
@@ -318,7 +319,7 @@ class BatchNormalizationTest(test.TestCase):
                                               gamma_val, epsilon,
                                               scale_after_normalization,
                                               shift_after_normalization)
-            [tf_batch_norm] = sess.run([bn])
+            [tf_batch_norm] = self.evaluate([bn])
             self.assertEquals(x_shape, np_batch_norm.shape)
             self.assertEquals(x_shape, tf_batch_norm.shape)
             self.assertAllClose(np_batch_norm, tf_batch_norm, atol=atol)
@@ -371,9 +372,9 @@ class SufficientStatisticsTest(test.TestCase):
           x.set_shape(x_shape)
           op_c, op_m, op_v, op_s = self._opSuffStats(x, axes, shift, keep_dims)
           if shift:
-            tf_c, tf_m, tf_v, tf_s = sess.run([op_c, op_m, op_v, op_s])
+            tf_c, tf_m, tf_v, tf_s = self.evaluate([op_c, op_m, op_v, op_s])
           else:
-            tf_c, tf_m, tf_v = sess.run([op_c, op_m, op_v])
+            tf_c, tf_m, tf_v = self.evaluate([op_c, op_m, op_v])
         else:
           x = array_ops.placeholder(
               dtype=dtypes.float32, shape=[None] * len(x_shape), name="x")
@@ -432,7 +433,7 @@ class NormalizeMomentsTest(test.TestCase):
           tf_shift_v = None
         opm, opv = self._opNormalizeMoments(tf_counts, tf_mean_ss,
                                             tf_variance_ss, tf_shift_v)
-        tfm, tfv = sess.run([opm, opv])
+        tfm, tfv = self.evaluate([opm, opv])
         self.assertAllClose(npm, tfm, atol=0.000001)
         self.assertAllClose(npv, tfv, atol=0.000001)
 
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index a6c582fcac8..4bc33ff8bdb 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -82,7 +82,7 @@ class BatchNormalizationTest(test.TestCase):
           epsilon=epsilon,
           data_format=data_format,
           is_training=False)
-      y_val = sess.run(y)
+      y_val = self.evaluate(y)
       y_ref = self._inference_ref(x, scale, offset, mean, var, epsilon,
                                   data_format)
     # An atol value of 1e-3 is too small for float16's, because some adjacent
@@ -127,7 +127,7 @@ class BatchNormalizationTest(test.TestCase):
           epsilon=epsilon,
           data_format=data_format,
           is_training=True)
-      y_val, mean_val, var_val = sess.run([y, mean, var])
+      y_val, mean_val, var_val = self.evaluate([y, mean, var])
       y_ref, mean_ref, var_ref = self._training_ref(x, scale, offset, epsilon,
                                                     data_format)
     y_atol = 2e-3 if x_dtype == np.float16 else 1e-3
@@ -277,10 +277,10 @@ class BatchNormalizationTest(test.TestCase):
       if is_training:
         epsilon = y.op.get_attr('epsilon')
         data_format = y.op.get_attr('data_format')
-        grad_vals = sess.run([grad_x, grad_scale, grad_offset])
+        grad_vals = self.evaluate([grad_x, grad_scale, grad_offset])
         grad_internal = nn_grad._BatchNormGrad(grad_y, x, scale, pop_mean,
                                                pop_var, epsilon, data_format)
-        grad_internal_vals = sess.run(list(grad_internal))
+        grad_internal_vals = self.evaluate(list(grad_internal))
         for grad_val, grad_internal_val in zip(grad_vals, grad_internal_vals):
           self.assertAllClose(grad_val, grad_internal_val, atol=err_tolerance)
 
diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
index 72db0952b43..017bc9dae53 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
@@ -1090,7 +1090,7 @@ class TensorArrayTest(PForTest):
     # y = x * x. Hence dy/dx = 2 * x.
     actual_grad = 2.0 * x
     with session.Session() as sess:
-      actual_grad, computed_grad = sess.run([t1, actual_grad])
+      actual_grad, computed_grad = self.evaluate([t1, actual_grad])
       self.assertAllClose(actual_grad, computed_grad)
 
 
@@ -1244,7 +1244,7 @@ class ControlFlowTest(PForTest):
     expected_output = array_ops.transpose(expected_output, [1, 0])
 
     with session.Session() as sess:
-      out, expected = sess.run([out, expected_output])
+      out, expected = self.evaluate([out, expected_output])
       self.assertAllClose(expected, out)
 
   def test_tensor_array_as_loop_variable(self):
@@ -1432,7 +1432,7 @@ class Benchmarks(test.Benchmark):
     sess = session.Session()
     with sess:
       init = variables.global_variables_initializer()
-      sess.run(init)
+      self.evaluate(init)
       run_fn = sess.make_callable(targets)
       run_fn()  # Warm up
       begin = time.time()
diff --git a/tensorflow/python/ops/parallel_for/gradients_test.py b/tensorflow/python/ops/parallel_for/gradients_test.py
index bbb46539ea2..4342833e3eb 100644
--- a/tensorflow/python/ops/parallel_for/gradients_test.py
+++ b/tensorflow/python/ops/parallel_for/gradients_test.py
@@ -484,8 +484,8 @@ class GradientsTest(test.TestCase):
     pfor_jacobian, while_gradients = create_dynamic_lstm_batch_jacobian(8, 4, 3)
     with session.Session() as sess:
       init = variables.global_variables_initializer()
-      sess.run(init)
-      pfor = sess.run(pfor_jacobian)
+      self.evaluate(init)
+      pfor = self.evaluate(pfor_jacobian)
       for i in range(4):
         while_i = sess.run(while_gradients[i])
         self.assertAllClose(while_i, pfor[:, i, ...])
@@ -560,11 +560,11 @@ class GradientsBenchmarks(test.Benchmark):
     sess = session.Session()
     with sess:
       init = variables.global_variables_initializer()
-      sess.run(init)
-      sess.run(targets)
+      self.evaluate(init)
+      self.evaluate(targets)
       begin = time.time()
       for _ in range(iters):
-        sess.run(targets)
+        self.evaluate(targets)
       end = time.time()
     avg_time_ms = 1000 * (end - begin) / iters
     self.report_benchmark(iters=iters, wall_time=avg_time_ms, name=name)
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index f7fa264461e..6b469a954f6 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -73,7 +73,7 @@ class Conv2DTest(test.TestCase):
           max_input=x1_max,
           min_filter=x2_min,
           max_filter=x2_max)
-      value = sess.run(conv)
+      value = self.evaluate(conv)
     quantized_output = value[0]
     output_min = value[1]
     output_max = value[2]
diff --git a/tensorflow/python/ops/quantized_ops_test.py b/tensorflow/python/ops/quantized_ops_test.py
index 0f3b04e4ad0..b81843d1748 100644
--- a/tensorflow/python/ops/quantized_ops_test.py
+++ b/tensorflow/python/ops/quantized_ops_test.py
@@ -41,7 +41,7 @@ class QuantizedOpsTest(test.TestCase):
       x_min = 0.0
       x_max = 255.0
       op = array_ops.quantize(x, x_min, x_max, dtypes.quint8, mode="MIN_FIRST")
-      value = sess.run(op)
+      value = self.evaluate(op)
       self.assertArrayNear(expected_output, value.output, 0.1)
 
   def testDequantizeOp(self):
@@ -52,7 +52,7 @@ class QuantizedOpsTest(test.TestCase):
       x_min = 0.0
       x_max = 255.0
       op = array_ops.dequantize(x, x_min, x_max, mode="MIN_FIRST")
-      value = sess.run(op)
+      value = self.evaluate(op)
       self.assertArrayNear(expected_output, value, 0.1)
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
index dcf1feaa696..c52db9e2a16 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
@@ -190,7 +190,7 @@ class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
     with self.test_session() as sess:
       if hasattr(expected, 'tolist'):
         expected = expected.tolist()
-      self.assertEqual(sess.run(result).tolist(), expected)
+      self.assertEqual(self.evaluate(result).tolist(), expected)
 
   def testRaggedGatherNdUnknownRankError(self):
     params = ragged.constant([['a', 'b'], ['c', 'd']])
diff --git a/tensorflow/python/ops/ragged/ragged_segment_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
index 373a332f135..228c9bc5e49 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
@@ -119,7 +119,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
 
     segmented = segment_op(rt, segment_ids, num_segments)
     with self.test_session():
-      self.assertListEqual(segmented.eval().tolist(), expected)
+      self.assertListEqual(self.evaluate(segmented).tolist(), expected)
 
   @parameterized.parameters(
       (ragged.segment_sum, sum, [0, 0, 1, 1, 2, 2]),
@@ -173,7 +173,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                  [[411, 412], [321, 322], [331]]    # row 2
                 ]  # pyformat: disable
     with self.test_session():
-      self.assertEqual(segmented1.eval().tolist(), expected1)
+      self.assertEqual(self.evaluate(segmented1).tolist(), expected1)
 
     segment_ids2 = [1, 2, 1, 1]
     segmented2 = ragged.segment_sum(rt, segment_ids2, 3)
@@ -181,7 +181,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                  [[111+411, 112+412, 113, 114], [121+321, 322], [331]],
                  []]  # pyformat: disable
     with self.test_session():
-      self.assertEqual(segmented2.eval().tolist(), expected2)
+      self.assertEqual(self.evaluate(segmented2).tolist(), expected2)
 
   def testRaggedSegmentIds(self):
     rt = ragged.constant([
@@ -196,7 +196,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                 [111+321, 112+322, 113, 114],
                 [121+331+411, 412]]  # pyformat: disable
     with self.test_session():
-      self.assertEqual(segmented.eval().tolist(), expected)
+      self.assertEqual(self.evaluate(segmented).tolist(), expected)
 
   def testShapeMismatchError1(self):
     dt = constant_op.constant([1, 2, 3, 4, 5, 6])
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py b/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
index a1c10aff9de..cd382fe0b8e 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
@@ -29,7 +29,8 @@ class RaggedTensorBoundingShapeOp(test_util.TensorFlowTestCase):
     # This is the example from ragged.bounding_shape.__doc__.
     rt = ragged.constant([[1, 2, 3, 4], [5], [], [6, 7, 8, 9], [10]])
     with self.test_session():
-      self.assertEqual(ragged.bounding_shape(rt).eval().tolist(), [5, 4])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt)).tolist(), [5, 4])
 
   def test2DRaggedTensorWithOneRaggedDimension(self):
     values = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
@@ -37,9 +38,12 @@ class RaggedTensorBoundingShapeOp(test_util.TensorFlowTestCase):
     rt2 = ragged.from_row_splits(values, [0, 7])
     rt3 = ragged.from_row_splits(values, [0, 0, 7, 7])
     with self.test_session():
-      self.assertEqual(ragged.bounding_shape(rt1).eval().tolist(), [5, 3])
-      self.assertEqual(ragged.bounding_shape(rt2).eval().tolist(), [1, 7])
-      self.assertEqual(ragged.bounding_shape(rt3).eval().tolist(), [3, 7])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt1)).tolist(), [5, 3])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt2)).tolist(), [1, 7])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt3)).tolist(), [3, 7])
 
   def test3DRaggedTensorWithOneRaggedDimension(self):
     values = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]]
@@ -47,22 +51,26 @@ class RaggedTensorBoundingShapeOp(test_util.TensorFlowTestCase):
     rt2 = ragged.from_row_splits(values, [0, 7])
     rt3 = ragged.from_row_splits(values, [0, 0, 7, 7])
     with self.test_session():
-      self.assertEqual(ragged.bounding_shape(rt1).eval().tolist(), [5, 3, 2])
-      self.assertEqual(ragged.bounding_shape(rt2).eval().tolist(), [1, 7, 2])
-      self.assertEqual(ragged.bounding_shape(rt3).eval().tolist(), [3, 7, 2])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt1)).tolist(), [5, 3, 2])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt2)).tolist(), [1, 7, 2])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(rt3)).tolist(), [3, 7, 2])
 
   def testNonRaggedTensor(self):
     dt = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
     with self.test_session():
-      self.assertEqual(ragged.bounding_shape(dt).eval().tolist(), [4, 3])
+      self.assertEqual(
+          self.evaluate(ragged.bounding_shape(dt)).tolist(), [4, 3])
 
   def testExplicitAxisOptimizations(self):
     rt = ragged.from_row_splits(b'a b c d e f g'.split(), [0, 2, 5, 6, 6, 7])
     with self.test_session():
-      self.assertEqual(ragged.bounding_shape(rt, 0).eval().tolist(), 5)
-      self.assertEqual(ragged.bounding_shape(rt, 1).eval().tolist(), 3)
+      self.assertEqual(self.evaluate(ragged.bounding_shape(rt, 0)).tolist(), 5)
+      self.assertEqual(self.evaluate(ragged.bounding_shape(rt, 1)).tolist(), 3)
       self.assertEqual(
-          ragged.bounding_shape(rt, [1, 0]).eval().tolist(), [3, 5])
+          self.evaluate(ragged.bounding_shape(rt, [1, 0])).tolist(), [3, 5])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py
index f66ca102ef9..66b15d9bcc3 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@@ -264,7 +264,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertIs(rt_value_rowids, value_rowids)  # cached_value_rowids
     with self.test_session():
       self.assertAllEqual(rt_value_rowids, value_rowids)
-      self.assertEqual(rt_nrows.eval(), 5)
+      self.assertEqual(self.evaluate(rt_nrows), 5)
       self.assertEqual(rt.tolist(),
                        [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
@@ -287,7 +287,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertIs(rt_value_rowids, value_rowids)  # cached_value_rowids
     with self.test_session():
       self.assertAllEqual(rt_value_rowids, value_rowids)
-      self.assertEqual(rt_nrows.eval(), 5)
+      self.assertEqual(self.evaluate(rt_nrows), 5)
       self.assertEqual(rt.tolist(),
                        [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
@@ -345,7 +345,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertEqual(rt.values.shape.as_list(), [0])
     self.assertEqual(ragged.value_rowids(rt).shape.as_list(), [0])
     with self.test_session():
-      self.assertEqual(rt_nrows.eval().tolist(), 0)
+      self.assertEqual(self.evaluate(rt_nrows).tolist(), 0)
       self.assertEqual(rt.tolist(), [])
 
   def testFromRowSplits(self):
@@ -364,7 +364,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertIs(rt_values, values)
     self.assertIs(rt_row_splits, row_splits)
     with self.test_session():
-      self.assertEqual(rt_nrows.eval(), 5)
+      self.assertEqual(self.evaluate(rt_nrows), 5)
       self.assertEqual(rt.tolist(),
                        [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
@@ -388,7 +388,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     self.assertIs(rt_values, values)
     with self.test_session():
-      self.assertEqual(rt_nrows.eval(), 5)
+      self.assertEqual(self.evaluate(rt_nrows), 5)
       self.assertAllEqual(rt_row_starts, row_starts)
       self.assertEqual(rt.tolist(),
                        [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
@@ -408,7 +408,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     self.assertIs(rt_values, values)
     with self.test_session():
-      self.assertEqual(rt_nrows.eval(), 5)
+      self.assertEqual(self.evaluate(rt_nrows), 5)
       self.assertAllEqual(rt_row_limits, row_limits)
       self.assertEqual(rt.tolist(),
                        [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
@@ -429,7 +429,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertIs(rt_values, values)
     self.assertIs(rt_row_lengths, row_lengths)  # cached_nrows
     with self.test_session():
-      self.assertEqual(rt_nrows.eval(), 5)
+      self.assertEqual(self.evaluate(rt_nrows), 5)
       self.assertAllEqual(rt_row_lengths, row_lengths)
       self.assertEqual(rt.tolist(),
                        [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
@@ -606,21 +606,28 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       with self.test_session():
         self.assertEqual(rt.tolist(),
                          [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
-        self.assertEqual(rt.values.eval().tolist(),
-                         [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
+        self.assertEqual(
+            self.evaluate(rt.values).tolist(),
+            [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
         self.assertEqual(rt.values.shape.dims[0].value, 7)
         self.assertEqual(
-            ragged.value_rowids(rt).eval().tolist(), [0, 0, 2, 2, 2, 3, 4])
-        self.assertEqual(ragged.nrows(rt).eval().tolist(), 5)
-        self.assertEqual(rt.row_splits.eval().tolist(), [0, 2, 2, 5, 6, 7])
-        self.assertEqual(ragged.row_starts(rt).eval().tolist(), [0, 2, 2, 5, 6])
-        self.assertEqual(ragged.row_limits(rt).eval().tolist(), [2, 2, 5, 6, 7])
+            self.evaluate(ragged.value_rowids(rt)).tolist(),
+            [0, 0, 2, 2, 2, 3, 4])
+        self.assertEqual(self.evaluate(ragged.nrows(rt)).tolist(), 5)
         self.assertEqual(
-            ragged.row_lengths(rt).eval().tolist(), [2, 0, 3, 1, 1])
-        self.assertEqual(rt.inner_values.eval().tolist(),
-                         [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
-        self.assertEqual([s.eval().tolist() for s in rt.nested_row_splits],
-                         [[0, 2, 2, 5, 6, 7]])
+            self.evaluate(rt.row_splits).tolist(), [0, 2, 2, 5, 6, 7])
+        self.assertEqual(
+            self.evaluate(ragged.row_starts(rt)).tolist(), [0, 2, 2, 5, 6])
+        self.assertEqual(
+            self.evaluate(ragged.row_limits(rt)).tolist(), [2, 2, 5, 6, 7])
+        self.assertEqual(
+            self.evaluate(ragged.row_lengths(rt)).tolist(), [2, 0, 3, 1, 1])
+        self.assertEqual(
+            self.evaluate(rt.inner_values).tolist(),
+            [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
+        self.assertEqual(
+            [self.evaluate(s).tolist() for s in rt.nested_row_splits],
+            [[0, 2, 2, 5, 6, 7]])
 
   def testRaggedTensorAccessors_3d_with_ragged_rank_1(self):
     values = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]]
@@ -635,22 +642,27 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
                          [[[0, 1], [2, 3]], [], [[4, 5], [6, 7], [8, 9]],
                           [[10, 11]], [[12, 13]]])
         self.assertEqual(
-            rt.values.eval().tolist(),
+            self.evaluate(rt.values).tolist(),
             [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]])
         self.assertEqual(rt.values.shape.dims[0].value, 7)
         self.assertEqual(
-            ragged.value_rowids(rt).eval().tolist(), [0, 0, 2, 2, 2, 3, 4])
-        self.assertEqual(ragged.nrows(rt).eval().tolist(), 5)
-        self.assertEqual(rt.row_splits.eval().tolist(), [0, 2, 2, 5, 6, 7])
-        self.assertEqual(ragged.row_starts(rt).eval().tolist(), [0, 2, 2, 5, 6])
-        self.assertEqual(ragged.row_limits(rt).eval().tolist(), [2, 2, 5, 6, 7])
+            self.evaluate(ragged.value_rowids(rt)).tolist(),
+            [0, 0, 2, 2, 2, 3, 4])
+        self.assertEqual(self.evaluate(ragged.nrows(rt)).tolist(), 5)
         self.assertEqual(
-            ragged.row_lengths(rt).eval().tolist(), [2, 0, 3, 1, 1])
+            self.evaluate(rt.row_splits).tolist(), [0, 2, 2, 5, 6, 7])
         self.assertEqual(
-            rt.inner_values.eval().tolist(),
+            self.evaluate(ragged.row_starts(rt)).tolist(), [0, 2, 2, 5, 6])
+        self.assertEqual(
+            self.evaluate(ragged.row_limits(rt)).tolist(), [2, 2, 5, 6, 7])
+        self.assertEqual(
+            self.evaluate(ragged.row_lengths(rt)).tolist(), [2, 0, 3, 1, 1])
+        self.assertEqual(
+            self.evaluate(rt.inner_values).tolist(),
             [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]])
-        self.assertEqual([s.eval().tolist() for s in rt.nested_row_splits],
-                         [[0, 2, 2, 5, 6, 7]])
+        self.assertEqual(
+            [self.evaluate(s).tolist() for s in rt.nested_row_splits],
+            [[0, 2, 2, 5, 6, 7]])
 
   def testRaggedTensorAccessors_3d_with_ragged_rank_2(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
@@ -670,32 +682,38 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         self.assertEqual(
             rt.tolist(),
             [[[b'a', b'b'], []], [[b'c', b'd', b'e']], [], [[b'f'], [b'g']]])
-        self.assertEqual(rt.values.eval().tolist(),
-                         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
+        self.assertEqual(
+            self.evaluate(rt.values).tolist(),
+            [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
         self.assertEqual(rt.values.shape.dims[0].value, 5)
         self.assertEqual(
-            ragged.value_rowids(rt).eval().tolist(), [0, 0, 1, 3, 3])
-        self.assertEqual(ragged.nrows(rt).eval().tolist(), 4)
-        self.assertEqual(rt.row_splits.eval().tolist(), [0, 2, 3, 3, 5])
-        self.assertEqual(ragged.row_starts(rt).eval().tolist(), [0, 2, 3, 3])
-        self.assertEqual(ragged.row_limits(rt).eval().tolist(), [2, 3, 3, 5])
-        self.assertEqual(ragged.row_lengths(rt).eval().tolist(), [2, 1, 0, 2])
-        self.assertEqual(rt.inner_values.eval().tolist(),
-                         [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
-        self.assertEqual([s.eval().tolist() for s in rt.nested_row_splits],
-                         [[0, 2, 3, 3, 5], [0, 2, 2, 5, 6, 7]])
+            self.evaluate(ragged.value_rowids(rt)).tolist(), [0, 0, 1, 3, 3])
+        self.assertEqual(self.evaluate(ragged.nrows(rt)).tolist(), 4)
+        self.assertEqual(self.evaluate(rt.row_splits).tolist(), [0, 2, 3, 3, 5])
+        self.assertEqual(
+            self.evaluate(ragged.row_starts(rt)).tolist(), [0, 2, 3, 3])
+        self.assertEqual(
+            self.evaluate(ragged.row_limits(rt)).tolist(), [2, 3, 3, 5])
+        self.assertEqual(
+            self.evaluate(ragged.row_lengths(rt)).tolist(), [2, 1, 0, 2])
+        self.assertEqual(
+            self.evaluate(rt.inner_values).tolist(),
+            [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
+        self.assertEqual(
+            [self.evaluate(s).tolist() for s in rt.nested_row_splits],
+            [[0, 2, 3, 3, 5], [0, 2, 2, 5, 6, 7]])
 
   def testNRowsWithTensorInput(self):
     dt = constant_op.constant([[1, 2, 3], [4, 5, 6]])
     nrows = ragged.nrows(dt)
     with self.test_session():
-      self.assertEqual(nrows.eval(), 2)
+      self.assertEqual(self.evaluate(nrows), 2)
 
   def testRowLengthsWithTensorInput(self):
     dt = constant_op.constant([[1, 2, 3], [4, 5, 6]])
     row_lengths = ragged.row_lengths(dt)
     with self.test_session():
-      self.assertEqual(row_lengths.eval().tolist(), [3, 3])
+      self.assertEqual(self.evaluate(row_lengths).tolist(), [3, 3])
 
   #=============================================================================
   # RaggedTensor.shape
@@ -751,9 +769,9 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     with self.test_session():
       tensor_slice_spec1 = _make_tensor_slice_spec(slice_spec, True)
       tensor_slice_spec2 = _make_tensor_slice_spec(slice_spec, False)
-      value1 = rt.__getitem__(slice_spec).eval()
-      value2 = rt.__getitem__(tensor_slice_spec1).eval()
-      value3 = rt.__getitem__(tensor_slice_spec2).eval()
+      value1 = self.evaluate(rt.__getitem__(slice_spec))
+      value2 = self.evaluate(rt.__getitem__(tensor_slice_spec1))
+      value3 = self.evaluate(rt.__getitem__(tensor_slice_spec2))
       if hasattr(value1, 'tolist'):
         value1 = value1.tolist()
       if hasattr(value2, 'tolist'):
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 94c685274a7..8648f0b5148 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -93,10 +93,10 @@ class PrintModelAnalysisTest(test.TestCase):
           config=self._no_rewrite_session_config()) as sess, ops.device(dev):
         x = lib.BuildSmallModel()
 
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         pctx.trace_next_step()
         pctx.dump_next_step()
-        _ = sess.run(x)
+        _ = self.evaluate(x)
 
         pctx.profiler.profile_name_scope(options=opts)
 
@@ -160,7 +160,7 @@ class PrintModelAnalysisTest(test.TestCase):
                         ) as sess, ops.device('/device:CPU:0'):
       x = lib.BuildSmallModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(x,
                    options=config_pb2.RunOptions(
@@ -186,7 +186,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(x,
                    options=config_pb2.RunOptions(
@@ -220,9 +220,9 @@ class PrintModelAnalysisTest(test.TestCase):
       with session.Session(config=self._no_rewrite_session_config()) as sess:
         x = lib.BuildFullModel()
 
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         pctx.trace_next_step()
-        _ = sess.run(x)
+        _ = self.evaluate(x)
         tfprof_node = pctx.profiler.profile_python(options=opts)
 
         # pylint: disable=line-too-long
@@ -281,7 +281,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(x,
                    options=config_pb2.RunOptions(
@@ -309,7 +309,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(
           x,
@@ -345,7 +345,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(x,
                    options=config_pb2.RunOptions(
@@ -391,7 +391,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(
           x,
@@ -424,7 +424,7 @@ class PrintModelAnalysisTest(test.TestCase):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildFullModel()
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(
           x,
@@ -490,7 +490,7 @@ class PrintModelAnalysisTest(test.TestCase):
 
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(x,
                    options=config_pb2.RunOptions(
@@ -555,7 +555,7 @@ class PrintModelAnalysisTest(test.TestCase):
 
     with session.Session(config=self._no_rewrite_session_config()) as sess:
       x = lib.BuildSmallModel()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       run_meta = config_pb2.RunMetadata()
       _ = sess.run(x,
                    options=config_pb2.RunOptions(
@@ -587,10 +587,10 @@ class PrintModelAnalysisTest(test.TestCase):
   def _trainLoop(self, train_op, train_steps, time_dir, time_step,
                  memory_dir, memory_step, profile_dir, dump_step):
     with session.Session(config=self._no_rewrite_session_config()) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       # start from 1 because variable_initializer took one step.
       for i in range(1, train_steps + 1):
-        _ = sess.run(train_op)
+        _ = self.evaluate(train_op)
         if i in time_step:
           ret = gfile.ListDirectory(time_dir)
           self.assertEqual(len(ret), 1)
diff --git a/tensorflow/python/profiler/profile_context_test.py b/tensorflow/python/profiler/profile_context_test.py
index 107ad443c32..680cd71d1fc 100644
--- a/tensorflow/python/profiler/profile_context_test.py
+++ b/tensorflow/python/profiler/profile_context_test.py
@@ -48,10 +48,10 @@ class ProfilerContextTest(test.TestCase):
     with profile_context.ProfileContext(test.get_temp_dir()) as pctx:
       pctx.add_auto_profiling("op", options=opts, profile_steps=[15, 50, 100])
       with session.Session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         total_steps = 101
         for i in range(total_steps):
-          sess.run(x)
+          self.evaluate(x)
           if i == 14 or i == 49:
             self.assertTrue(gfile.Exists(outfile))
             gfile.Remove(outfile)
@@ -75,18 +75,18 @@ class ProfilerContextTest(test.TestCase):
 
     with profile_context.ProfileContext(test.get_temp_dir(), debug=True):
       with session.Session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for _ in range(10):
-          sess.run(x)
+          self.evaluate(x)
           for f in gfile.ListDirectory(test.get_temp_dir()):
             # Warm up, no tracing.
             self.assertFalse("run_meta" in f)
-        sess.run(x)
+        self.evaluate(x)
         self.assertTrue(
             gfile.Exists(os.path.join(test.get_temp_dir(), "run_meta_11")))
         gfile.Remove(os.path.join(test.get_temp_dir(), "run_meta_11"))
         # fetched already.
-        sess.run(x)
+        self.evaluate(x)
         for f in gfile.ListDirectory(test.get_temp_dir()):
           self.assertFalse("run_meta" in f)
 
@@ -96,18 +96,18 @@ class ProfilerContextTest(test.TestCase):
     with profile_context.ProfileContext(test.get_temp_dir(),
                                         enabled=False) as pctx:
       with session.Session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for _ in range(10):
-          sess.run(x)
+          self.evaluate(x)
       self.assertTrue(pctx.profiler is None)
       self.assertTrue(
           getattr(session.BaseSession, "profile_context", None) is None)
 
     with profile_context.ProfileContext(test.get_temp_dir()) as pctx:
       with session.Session() as sess:
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for _ in range(10):
-          sess.run(x)
+          self.evaluate(x)
       self.assertFalse(pctx.profiler is None)
       self.assertFalse(
           getattr(session.BaseSession, "profile_context", None) is None)
diff --git a/tensorflow/python/saved_model/loader_test.py b/tensorflow/python/saved_model/loader_test.py
index 648c1c59284..3678e505bda 100644
--- a/tensorflow/python/saved_model/loader_test.py
+++ b/tensorflow/python/saved_model/loader_test.py
@@ -50,7 +50,7 @@ class SavedModelLoaderTest(test.TestCase):
       x = variables.VariableV1(5, name="x")
       y = variables.VariableV1(11, name="y")
       z = x + y
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       foo_sig_def = signature_def_utils.build_signature_def(
           {"foo_input": utils.build_tensor_info(x)},
@@ -104,9 +104,9 @@ class SavedModelLoaderTest(test.TestCase):
     with self.session(graph=graph) as sess:
       # Check that x and y are not initialized
       with self.assertRaises(errors.FailedPreconditionError):
-        sess.run(x)
+        self.evaluate(x)
       with self.assertRaises(errors.FailedPreconditionError):
-        sess.run(y)
+        self.evaluate(y)
 
   def test_load_with_import_scope(self):
     loader = loader_impl.SavedModelLoader(SAVED_MODEL_WITH_MAIN_OP)
@@ -138,7 +138,7 @@ class SavedModelLoaderTest(test.TestCase):
       y = variables.VariableV1(0, name="y")
       z = x * y
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       # There are variables to restore, so a saver must be created.
       with self.assertRaises(ValueError):
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index a40ea7687f5..e722b6ceaea 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -61,7 +61,7 @@ class SavedModelTestBase(test.TestCase):
 
   def _init_and_validate_variable(self, sess, variable_name, variable_value):
     v = variables.VariableV1(variable_value, name=variable_name)
-    sess.run(variables.global_variables_initializer())
+    self.evaluate(variables.global_variables_initializer())
     self.assertEqual(variable_value, self.evaluate(v))
 
   def _build_asset_collection(self, asset_file_name, asset_file_contents,
@@ -389,7 +389,7 @@ class SavedModelTest(SavedModelTestBase):
       a = ops.get_default_graph().get_tensor_by_name(constant_5_name)
       b = constant_op.constant(6.0)
       c = a * b
-      self.assertEqual(30.0, sess.run(c))
+      self.assertEqual(30.0, self.evaluate(c))
 
     # Restore the graph with tag "bar".
     with self.session(graph=ops.Graph()) as sess:
@@ -398,7 +398,7 @@ class SavedModelTest(SavedModelTestBase):
       a = ops.get_default_graph().get_tensor_by_name(constant_6_name)
       b = constant_op.constant(5.0)
       c = a * b
-      self.assertEqual(30.0, sess.run(c))
+      self.assertEqual(30.0, self.evaluate(c))
 
   def testNoOverwrite(self):
     export_dir = self._get_export_dir("test_no_overwrite")
@@ -464,7 +464,7 @@ class SavedModelTest(SavedModelTestBase):
     with self.session(graph=ops.Graph()) as sess:
       v = variables.VariableV1(42, name="v")
       ops.add_to_collection("foo_vars", v)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(42, self.evaluate(v))
       builder.add_meta_graph_and_variables(sess, ["foo"])
 
@@ -474,7 +474,7 @@ class SavedModelTest(SavedModelTestBase):
     with self.session(graph=ops.Graph()) as sess:
       v = variables.VariableV1(43, name="v")
       ops.add_to_collection("bar_vars", v)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(43, self.evaluate(v))
       builder.add_meta_graph(["bar"])
 
@@ -802,7 +802,7 @@ class SavedModelTest(SavedModelTestBase):
         add_v1_v2 = math_ops.add(v1._ref(), v2._ref())
         custom_main_op = control_flow_ops.group(state_ops.assign(v3, add_v1_v2))
 
-      sess.run(custom_main_op)
+      self.evaluate(custom_main_op)
       builder.add_meta_graph_and_variables(
           sess, ["foo"], main_op=custom_main_op)
 
@@ -836,7 +836,7 @@ class SavedModelTest(SavedModelTestBase):
       assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2))
       legacy_init_op = control_flow_ops.group(assign_v3, name="legacy_init_op")
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(
           sess, ["foo"], legacy_init_op=legacy_init_op)
 
@@ -879,7 +879,7 @@ class SavedModelTest(SavedModelTestBase):
       assign_v2 = state_ops.assign(v2, v1)
       init_op = control_flow_ops.group(assign_v2, name="init_op")
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       ops.add_to_collection(key, control_flow_ops.no_op())
       # ValueError should be raised since the LEGACY_INIT_OP_KEY collection
@@ -902,10 +902,10 @@ class SavedModelTest(SavedModelTestBase):
       v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       train_op = state_ops.assign_add(v1, v2)
 
-      sess.run(train_op)
+      self.evaluate(train_op)
       # TODO(karmel): remove explicit call when in the public method.
       builder._add_train_op(train_op)
       builder.add_meta_graph_and_variables(sess, ["foo"])
@@ -931,10 +931,10 @@ class SavedModelTest(SavedModelTestBase):
       v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       train_op = control_flow_ops.group()
 
-      sess.run(train_op)
+      self.evaluate(train_op)
       # TODO(karmel): remove explicit call when in the public method.
       builder._add_train_op(train_op)
       builder.add_meta_graph_and_variables(sess, ["foo"])
@@ -960,11 +960,11 @@ class SavedModelTest(SavedModelTestBase):
       v2 = variables.VariableV1(2, name="v2")
       ops.add_to_collection("v", v2)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(sess, ["pre_foo"])
 
       train_op = state_ops.assign_add(v1, v2)
-      sess.run(train_op)
+      self.evaluate(train_op)
       # TODO(karmel): remove explicit call when in the public method.
       builder._add_train_op(train_op)
       builder.add_meta_graph(["foo"])
@@ -1090,7 +1090,7 @@ class SavedModelTest(SavedModelTestBase):
       ops.add_to_collection("v", v3)
       ops.add_to_collection("init_op", init_op)
 
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(1, ops.get_collection("v")[0].eval())
       self.assertEqual(2, ops.get_collection("v")[1].eval())
 
@@ -1145,7 +1145,7 @@ class SavedModelTest(SavedModelTestBase):
 
     with self.session(graph=ops.Graph()) as sess:
       variables.VariableV1(1, name="v1")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       custom_saver = training.Saver(name="my_saver")
       builder.add_meta_graph_and_variables(sess, ["tag"], saver=custom_saver)
 
@@ -1167,7 +1167,7 @@ class SavedModelTest(SavedModelTestBase):
 
     with self.session(graph=ops.Graph()) as sess:
       variables.VariableV1(1, name="v1")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       training.Saver(name="my_saver")
       builder.add_meta_graph_and_variables(sess, ["tag"])
 
@@ -1189,7 +1189,7 @@ class SavedModelTest(SavedModelTestBase):
 
     with self.session(graph=ops.Graph()) as sess:
       variables.VariableV1(1, name="v1")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(sess, ["tag_0"])
 
       saver_1 = training.Saver()
@@ -1298,7 +1298,7 @@ class SavedModelTest(SavedModelTestBase):
       real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real")
       imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag")
       math_ops.complex(real_num, imag_num, name="complex")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(
           sess, ["foo"], strip_default_attrs=True)
 
@@ -1308,7 +1308,7 @@ class SavedModelTest(SavedModelTestBase):
       real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real")
       imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag")
       math_ops.complex(real_num, imag_num, name="complex")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       builder.add_meta_graph(["bar"], strip_default_attrs=False)
 
     # Save the SavedModel to disk in text format.
@@ -1370,7 +1370,7 @@ class SavedModelTest(SavedModelTestBase):
     with session.Session(graph=ops.Graph()) as sess:
       variables.VariableV1(1.0, dtype=dtypes.float64, name="var")
       test_ops.test_attr(T=dtypes.float32, name="test_attr")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       builder.add_meta_graph_and_variables(sess, ["foo"])
 
     # Save the SavedModel to disk in text format.
diff --git a/tensorflow/python/saved_model/simple_save_test.py b/tensorflow/python/saved_model/simple_save_test.py
index 2d404dcea44..0d0665072ac 100644
--- a/tensorflow/python/saved_model/simple_save_test.py
+++ b/tensorflow/python/saved_model/simple_save_test.py
@@ -33,7 +33,7 @@ class SimpleSaveTest(test.TestCase):
 
   def _init_and_validate_variable(self, sess, variable_name, variable_value):
     v = variables.Variable(variable_value, name=variable_name)
-    sess.run(variables.global_variables_initializer())
+    self.evaluate(variables.global_variables_initializer())
     self.assertEqual(variable_value, self.evaluate(v))
     return v
 
diff --git a/tensorflow/python/tools/strip_unused_test.py b/tensorflow/python/tools/strip_unused_test.py
index 7cf0c3e3ed9..e906ff94ba8 100644
--- a/tensorflow/python/tools/strip_unused_test.py
+++ b/tensorflow/python/tools/strip_unused_test.py
@@ -50,7 +50,7 @@ class StripUnusedTest(test_util.TensorFlowTestCase):
           wanted_input_node, 2.0, name="output_node")
       math_ops.add(output_node, 2.0, name="later_node")
       sess = session.Session()
-      output = sess.run(output_node)
+      output = self.evaluate(output_node)
       self.assertNear(-4.0, output, 0.00001)
       graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name)
 
@@ -113,7 +113,7 @@ class StripUnusedTest(test_util.TensorFlowTestCase):
           input_node1, input_node2, name="output_node")
       math_ops.add(output_node, 2.0, name="later_node")
       sess = session.Session()
-      output = sess.run(output_node)
+      output = self.evaluate(output_node)
       self.assertNear(6.0, output, 0.00001)
       graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name)
 
diff --git a/tensorflow/python/training/adagrad_da_test.py b/tensorflow/python/training/adagrad_da_test.py
index 761f703cb5b..c7c47206a9c 100644
--- a/tensorflow/python/training/adagrad_da_test.py
+++ b/tensorflow/python/training/adagrad_da_test.py
@@ -54,14 +54,14 @@ class AdagradDAOptimizerTest(test.TestCase):
             zip([grads0, grads1], [var0, var1]), global_step=global_step)
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllClose([0.0, 0.0], v0_val)
         self.assertAllClose([0.0, 0.0], v1_val)
 
         # Run a step of AdagradDA
         update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         # Let g to be gradient accumulator, gg to be gradient squared
         # accumulator, T be the global step, lr is the learning rate, and k the
         # initial gradient squared accumulator value.
@@ -119,14 +119,14 @@ class AdagradDAOptimizerTest(test.TestCase):
             zip([grads0, grads1], [var0, var1]), global_step=global_step)
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run a step of AdagradDA
         update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.904534, -1.603567]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -151,14 +151,14 @@ class AdagradDAOptimizerTest(test.TestCase):
             zip([grads0, grads1], [var0, var1]), global_step=global_step)
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run a step of AdagradDA
         update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.895489, -1.59555]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -183,14 +183,14 @@ class AdagradDAOptimizerTest(test.TestCase):
             zip([grads0, grads1], [var0, var1]), global_step=global_step)
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run a step of AdagradDA
         update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.046907, -0.093659]), v0_val)
         self.assertAllCloseAccordingToType(
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index 3fabb3e0861..03810b57e37 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -249,7 +249,7 @@ class LoggingTensorHookTest(test.TestCase):
           tensors=[t.name], at_end=True)
       hook.begin()
       mon_sess = monitored_session._HookedSession(sess, [hook])
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       self.logged_message = ''
       for _ in range(3):
         mon_sess.run(train_op)
@@ -267,7 +267,7 @@ class LoggingTensorHookTest(test.TestCase):
         tensors=[t.name], every_n_iter=10, at_end=at_end)
     hook.begin()
     mon_sess = monitored_session._HookedSession(sess, [hook])
-    sess.run(variables_lib.global_variables_initializer())
+    self.evaluate(variables_lib.global_variables_initializer())
     mon_sess.run(train_op)
     self.assertRegexpMatches(str(self.logged_message), t.name)
     for _ in range(3):
@@ -314,7 +314,7 @@ class LoggingTensorHookTest(test.TestCase):
           tensors={'foo': t}, every_n_iter=1)
       hook.begin()
       mon_sess = monitored_session._HookedSession(sess, [hook])
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess.run(train_op)
       self.assertRegexpMatches(str(self.logged_message), 'foo')
       # in first run, elapsed time is None.
@@ -328,7 +328,7 @@ class LoggingTensorHookTest(test.TestCase):
         tensors=[t.name], every_n_secs=1.0, at_end=at_end)
     hook.begin()
     mon_sess = monitored_session._HookedSession(sess, [hook])
-    sess.run(variables_lib.global_variables_initializer())
+    self.evaluate(variables_lib.global_variables_initializer())
 
     mon_sess.run(train_op)
     self.assertRegexpMatches(str(self.logged_message), t.name)
@@ -376,7 +376,7 @@ class LoggingTensorHookTest(test.TestCase):
           formatter=lambda items: 'qqq=%s' % items[t.name])
       hook.begin()
       mon_sess = monitored_session._HookedSession(sess, [hook])
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess.run(train_op)
       self.assertEqual(self.logged_message[0], 'qqq=42.0')
 
@@ -927,7 +927,7 @@ class StepCounterHookTest(test.TestCase):
       hook = basic_session_run_hooks.StepCounterHook(
           summary_writer=summary_writer, every_n_steps=10)
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       with test.mock.patch.object(tf_logging, 'warning') as mock_log:
         for _ in range(30):
@@ -958,7 +958,7 @@ class StepCounterHookTest(test.TestCase):
           summary_writer=summary_writer, every_n_steps=None, every_n_secs=0.1)
 
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       mon_sess.run(train_op)
       mock_time.return_value += 0.2
@@ -995,7 +995,7 @@ class StepCounterHookTest(test.TestCase):
           summary_writer=summary_writer, every_n_steps=1, every_n_secs=None)
 
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       mon_sess.run(train_op)
       mon_sess.run(train_op)
@@ -1015,7 +1015,7 @@ class StepCounterHookTest(test.TestCase):
     with ops.Graph().as_default(), session_lib.Session() as sess:
       variables.get_or_create_global_step()
       train_op = training_util._increment_global_step(0)  # keep same.
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       hook = basic_session_run_hooks.StepCounterHook(
           every_n_steps=1, every_n_secs=None)
       hook.begin()
@@ -1042,7 +1042,7 @@ class StepCounterHookTest(test.TestCase):
         summary_writer=self.summary_writer, every_n_steps=every_n_steps)
     self.hook._set_steps_per_run(steps_per_run)
     self.hook.begin()
-    sess.run(variables_lib.global_variables_initializer())
+    self.evaluate(variables_lib.global_variables_initializer())
     self.mon_sess = monitored_session._HookedSession(sess, [self.hook])
 
   @test.mock.patch.object(time, 'time')
@@ -1161,7 +1161,7 @@ class SummarySaverHookTest(test.TestCase):
 
     with self.cached_session() as sess:
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       for _ in range(30):
         mon_sess.run(self.train_op)
@@ -1193,7 +1193,7 @@ class SummarySaverHookTest(test.TestCase):
 
     with self.cached_session() as sess:
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       for _ in range(10):
         mon_sess.run(self.train_op)
@@ -1223,7 +1223,7 @@ class SummarySaverHookTest(test.TestCase):
 
     with self.cached_session() as sess:
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       for _ in range(4):
         mon_sess.run(self.train_op)
@@ -1258,7 +1258,7 @@ class SummarySaverHookTest(test.TestCase):
 
     with self.cached_session() as sess:
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       for _ in range(8):
         mon_sess.run(self.train_op)
@@ -1327,7 +1327,7 @@ class GlobalStepWaiterHookTest(test.TestCase):
         mock_sleep.side_effect = mock_sleep_side_effect
 
         # Run the mocked-out interaction with the hook.
-        sess.run(variables_lib.global_variables_initializer())
+        self.evaluate(variables_lib.global_variables_initializer())
         run_context = session_run_hook.SessionRunContext(
             original_args=None, session=sess)
         hook.before_run(run_context)
@@ -1422,7 +1422,7 @@ class ResourceSummarySaverHookTest(test.TestCase):
 
     with self.cached_session() as sess:
       hook.begin()
-      sess.run(variables_lib.global_variables_initializer())
+      self.evaluate(variables_lib.global_variables_initializer())
       mon_sess = monitored_session._HookedSession(sess, [hook])
       for _ in range(30):
         mon_sess.run(self.train_op)
diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py
index 38d4acf85fc..21ad3df1c8f 100644
--- a/tensorflow/python/training/checkpoint_ops_test.py
+++ b/tensorflow/python/training/checkpoint_ops_test.py
@@ -47,7 +47,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
       with variable_scope.variable_scope('some_scope'):
         variable_scope.get_variable(name='embeddings', shape=[5, 16],
                                     initializer=initializer)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       saver = saver_lib.Saver()
       saver.save(sess, checkpoint_prefix, global_step=5)
     self.checkpoint_file = '{}-5'.format(checkpoint_prefix)
diff --git a/tensorflow/python/training/ftrl_test.py b/tensorflow/python/training/ftrl_test.py
index a61132a9667..70b5db31f80 100644
--- a/tensorflow/python/training/ftrl_test.py
+++ b/tensorflow/python/training/ftrl_test.py
@@ -54,7 +54,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllClose([0.0, 0.0], v0_val)
         self.assertAllClose([0.0, 0.0], v1_val)
 
@@ -62,7 +62,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(3):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-2.60260963, -4.29698515]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -90,14 +90,14 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run 3 steps FTRL
         for _ in range(3):
           update.run()
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-2.55607247, -3.98729396]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -137,14 +137,14 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
         # Run 10 steps FTRL
         for _ in range(10):
           update.run()
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-7.66718769, -10.91273689]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -166,7 +166,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
@@ -174,7 +174,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(10):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.24059935, -0.46829352]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -203,7 +203,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
 
@@ -211,7 +211,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(10):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType(
             np.array([-0.22578995, -0.44345796]), v0_val)
         self.assertAllCloseAccordingToType(
@@ -239,7 +239,7 @@ class FtrlOptimizerTest(test.TestCase):
         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([[1.0], [2.0]], v0_val)
         self.assertAllCloseAccordingToType([[4.0], [3.0]], v1_val)
 
@@ -247,7 +247,7 @@ class FtrlOptimizerTest(test.TestCase):
         for _ in range(10):
           update.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([[-0.22578995], [2.]], v0_val)
         self.assertAllCloseAccordingToType([[4.], [-0.13229476]], v1_val)
 
@@ -275,7 +275,7 @@ class FtrlOptimizerTest(test.TestCase):
         update1 = opt1.apply_gradients([(grads1, var1)])
         variables.global_variables_initializer().run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
         self.assertAllCloseAccordingToType([1.0, 2.0], v1_val)
 
@@ -284,12 +284,12 @@ class FtrlOptimizerTest(test.TestCase):
           update0.run()
           update1.run()
 
-        v0_val, v1_val = sess.run([var0, var1])
+        v0_val, v1_val = self.evaluate([var0, var1])
         # var0 is experiencing L2 shrinkage so it should be smaller than var1
         # in magnitude.
         self.assertTrue((v0_val**2 < v1_val**2).all())
-        accum0 = list(sess.run(opt0._slots)["accum"].values())[0]
-        accum1 = list(sess.run(opt1._slots)["accum"].values())[0]
+        accum0 = list(self.evaluate(opt0._slots)["accum"].values())[0]
+        accum1 = list(self.evaluate(opt1._slots)["accum"].values())[0]
         # L2 shrinkage should not change how we update grad accumulator.
         self.assertAllCloseAccordingToType(accum0, accum1)
 
@@ -313,7 +313,7 @@ class FtrlOptimizerTest(test.TestCase):
     variables.global_variables_initializer().run()
 
     sess = ops.get_default_session()
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     if is_sparse:
       self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val)
       self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val)
@@ -325,7 +325,7 @@ class FtrlOptimizerTest(test.TestCase):
     for _ in range(steps):
       update.run()
 
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     return v0_val, v1_val
 
   # When variables are initialized with Zero, FTRL-Proximal has two properties:
diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py
index e5aac5da187..327f0871383 100644
--- a/tensorflow/python/training/input_test.py
+++ b/tensorflow/python/training/input_test.py
@@ -256,7 +256,7 @@ class StringInputProducerTest(test_lib.TestCase):
           # writing of the `tf.Graph` object. However, many users
           # write code this way, so we include this test to ensure
           # that we can support it.
-          self.assertEquals(string, sess.run(queue.dequeue()))
+          self.assertEquals(string, self.evaluate(queue.dequeue()))
       coord.request_stop()
       coord.join(threads)
 
@@ -348,14 +348,14 @@ class SliceInputProducerTest(test_lib.TestCase):
 
       # No randomness, so just see repeated copies of the input.
       num_items = len(source_strings) * num_epochs
-      output = [sess.run(slices) for _ in range(num_items)]
+      output = [self.evaluate(slices) for _ in range(num_items)]
       out_strings, out_ints = zip(*output)
       self.assertAllEqual(source_strings * num_epochs, out_strings)
       self.assertAllEqual(source_ints * num_epochs, out_ints)
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(slices)
+        self.evaluate(slices)
       for thread in threads:
         thread.join()
 
@@ -383,7 +383,7 @@ class SliceInputProducerTest(test_lib.TestCase):
       for e in expected:
         frequency[e] = 0
       for _ in range(num_epochs):
-        output = [sess.run(slices) for _ in range(len(source_strings))]
+        output = [self.evaluate(slices) for _ in range(len(source_strings))]
         key = b",".join([s + compat.as_bytes(str(i)) for s, i in output])
         self.assertIn(key, expected)
         frequency[key] += 1
@@ -399,7 +399,7 @@ class SliceInputProducerTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(slices)
+        self.evaluate(slices)
       for thread in threads:
         thread.join()
 
@@ -474,7 +474,7 @@ class BatchTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for i in range(num_batches):
-        results = sess.run(batched_fetch)
+        results = self.evaluate(batched_fetch)
         self.assertAllEqual(results[0],
                             np.arange(i * batch_size, (i + 1) * batch_size))
         self.assertAllEqual(
@@ -491,7 +491,7 @@ class BatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched_fetch)
+        self.evaluate(batched_fetch)
       for thread in threads:
         thread.join()
 
@@ -507,7 +507,7 @@ class BatchTest(test_lib.TestCase):
     with self.cached_session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-      sess.run(batched)
+      self.evaluate(batched)
       coord.request_stop()
       for thread in threads:
         thread.join()
@@ -518,7 +518,7 @@ class BatchTest(test_lib.TestCase):
     with self.cached_session() as sess:
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-      sess.run(batched)
+      self.evaluate(batched)
       coord.request_stop()
       for thread in threads:
         thread.join()
@@ -539,7 +539,7 @@ class BatchTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         expected_results = np.arange(i * batch_size, (i + 1) * batch_size)
         max_len = expected_results[-1]
         self.assertAllEqual(results[0], expected_results)
@@ -549,7 +549,7 @@ class BatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -571,7 +571,7 @@ class BatchTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertAllEqual(results[0],
                             np.arange(i * batch_size, (i + 1) * batch_size))
         self.assertAllEqual(
@@ -584,7 +584,7 @@ class BatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -610,7 +610,7 @@ class BatchTest(test_lib.TestCase):
 
       all_counts = []
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         self.assertAllEqual(results[0], results[1].values)
@@ -624,7 +624,7 @@ class BatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -651,7 +651,7 @@ class BatchTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertAllEqual(results[0],
                             np.arange(i * batch_size, (i + 1) * batch_size))
         self.assertAllEqual(
@@ -667,7 +667,7 @@ class BatchTest(test_lib.TestCase):
         self.assertAllEqual(results[2], [b"string"] * batch_size)
 
       # Reached the final batch with extra_elements.
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       self.assertAllEqual(results[0],
                           np.arange(num_batches * batch_size,
                                     num_batches * batch_size + extra_elements))
@@ -681,7 +681,7 @@ class BatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -709,7 +709,7 @@ class BatchTest(test_lib.TestCase):
 
       all_counts = []
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         self.assertAllEqual(results[0], results[1].values)
@@ -721,7 +721,7 @@ class BatchTest(test_lib.TestCase):
         self.assertAllEqual(results[2], [b"string"] * batch_size)
 
       # Reached the final batch with extra_elements.
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       tf_logging.info("Last Batch: %s", results[0])
       self.assertEqual(len(results[0]), extra_elements)
       self.assertAllEqual(results[0], results[1].values)
@@ -736,7 +736,7 @@ class BatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -827,14 +827,14 @@ class BatchTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for _ in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertAllEqual([0] * batch_size, np.mod(results[0], 2))
         self.assertAllEqual([0] * batch_size, np.mod(results[1].values, 2))
         self.assertAllEqual([b"string"] * batch_size, results[2])
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1020,7 +1020,7 @@ class BatchJoinTest(test_lib.TestCase):
       saw_both = 0
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
-        results = sess.run(batched_fetch)
+        results = self.evaluate(batched_fetch)
         self.assertEqual(3, len(results))
         self.assertEqual(batch_size, len(results[0]))
         self.assertEqual(batch_size, len(results[2]))
@@ -1051,7 +1051,7 @@ class BatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched_fetch)
+        self.evaluate(batched_fetch)
       for thread in threads:
         thread.join()
 
@@ -1116,7 +1116,7 @@ class BatchJoinTest(test_lib.TestCase):
       saw_both = 0
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertEqual(2, len(results))
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[1]), batch_size)
@@ -1148,7 +1148,7 @@ class BatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1201,7 +1201,7 @@ class BatchJoinTest(test_lib.TestCase):
       saw_both = 0
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[2]), batch_size)
@@ -1221,7 +1221,7 @@ class BatchJoinTest(test_lib.TestCase):
                             [results[0][i] for i in which_b])
 
       # Reached the final batch with 2 * extra_elements.
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       tf_logging.info("Last Batch: %s", results[0])
       self.assertEqual(len(results[0]), 2 * extra_elements)
       self.assertEqual(len(results[2]), 2 * extra_elements)
@@ -1249,7 +1249,7 @@ class BatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1296,7 +1296,7 @@ class BatchJoinTest(test_lib.TestCase):
       saw_both = 0
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[1]), batch_size)
@@ -1316,7 +1316,7 @@ class BatchJoinTest(test_lib.TestCase):
                             [results[0][i] for i in which_b])
 
       # Reached the final batch with 2 * extra_elements.
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       tf_logging.info("Last Batch: %s", results[0])
       self.assertEqual(len(results[0]), 2 * extra_elements)
       self.assertEqual(len(results[1]), 2 * extra_elements)
@@ -1347,7 +1347,7 @@ class BatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1410,7 +1410,7 @@ class BatchJoinTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for _ in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertAllEqual(
             [0] * batch_size,
             np.mod(results[0], 2),)
@@ -1421,7 +1421,7 @@ class BatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1579,7 +1579,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       all_counts = []
       for i in range(num_batches):
-        results = sess.run(batched_fetch)
+        results = self.evaluate(batched_fetch)
         self.assertEqual(len(results[0]), batch_size)
         all_counts.extend(results[0])
         self.assertAllEqual(
@@ -1597,7 +1597,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched_fetch)
+        self.evaluate(batched_fetch)
       for thread in threads:
         thread.join()
 
@@ -1634,7 +1634,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       all_counts = []
       for _ in range(num_batches):
-        results = sess.run(batched_fetch)
+        results = self.evaluate(batched_fetch)
         self.assertEqual(len(results[0]), batch_size)
         all_counts.extend(results[0])
         self.assertAllEqual(
@@ -1645,7 +1645,7 @@ class ShuffleBatchTest(test_lib.TestCase):
         self.assertAllEqual(results[2], [b"string"] * batch_size)
 
       # Reached the final batch with extra elements.
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       self.assertAllEqual(results[1].dense_shape, [extra_elements, 1])
       self.assertAllEqual(results[2], [b"string"] * extra_elements)
       all_counts.extend(results[0])
@@ -1659,7 +1659,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched_fetch)
+        self.evaluate(batched_fetch)
       for thread in threads:
         thread.join()
 
@@ -1687,7 +1687,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       all_counts = []
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         all_counts.extend(results[0])
@@ -1706,7 +1706,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1737,7 +1737,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       all_counts = []
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         all_counts.extend(results[0])
@@ -1749,7 +1749,7 @@ class ShuffleBatchTest(test_lib.TestCase):
         self.assertAllEqual(results[2], [b"string"] * batch_size)
 
       # Reached the final batch with extra elements.
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       self.assertAllEqual(results[0].shape, [extra_elements])
       self.assertAllEqual(results[1].dense_shape, [extra_elements, 1])
       self.assertAllEqual(results[2], [b"string"] * extra_elements)
@@ -1764,7 +1764,7 @@ class ShuffleBatchTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1817,14 +1817,14 @@ class ShuffleBatchTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for _ in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertAllEqual([0] * batch_size, np.mod(results[0], 2))
         self.assertAllEqual([0] * batch_size, np.mod(results[1].values, 2))
         self.assertAllEqual([b"string"] * batch_size, results[2])
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -1990,7 +1990,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
       saw_both = 0
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
-        results = sess.run(batched_fetch)
+        results = self.evaluate(batched_fetch)
         self.assertEqual(3, len(results))
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[2]), batch_size)
@@ -2020,7 +2020,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched_fetch)
+        self.evaluate(batched_fetch)
       for thread in threads:
         thread.join()
 
@@ -2082,7 +2082,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
       saw_both = 0
       num_batches = (num_a + num_b) // batch_size
       for i in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         tf_logging.info("Batch %d: %s", i, results[0])
         self.assertEqual(len(results[0]), batch_size)
         self.assertEqual(len(results[2]), batch_size)
@@ -2102,7 +2102,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
                             [results[0][i] for i in which_b])
 
       # Reached end with 2 * extra_elements left
-      results = sess.run(batched)
+      results = self.evaluate(batched)
       self.assertEqual(len(results[0]), 2 * extra_elements)
       self.assertAllEqual(results[1].dense_shape, [2 * extra_elements, 1])
       self.assertEqual(len(results[2]), 2 * extra_elements)
@@ -2129,7 +2129,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
@@ -2203,14 +2203,14 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
       threads = queue_runner_impl.start_queue_runners()
 
       for _ in range(num_batches):
-        results = sess.run(batched)
+        results = self.evaluate(batched)
         self.assertAllEqual([0] * batch_size, np.mod(results[0], 2))
         self.assertAllEqual([0] * batch_size, np.mod(results[1].values, 2))
         self.assertAllEqual([b"string"] * batch_size, results[2])
 
       # Reached the limit.
       with self.assertRaises(errors_impl.OutOfRangeError):
-        sess.run(batched)
+        self.evaluate(batched)
       for thread in threads:
         thread.join()
 
diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py
index 03a32f6ca09..9c31c0924f5 100644
--- a/tensorflow/python/training/learning_rate_decay_test.py
+++ b/tensorflow/python/training/learning_rate_decay_test.py
@@ -62,23 +62,22 @@ class LRDecayTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6)
 
   def testVariables(self):
-    with self.cached_session():
-      step = variables.VariableV1(1)
-      assign_1 = step.assign(1)
-      assign_2 = step.assign(2)
-      assign_100 = step.assign(100)
-      decayed_lr = learning_rate_decay.exponential_decay(.1, step, 3, 0.96,
-                                                         staircase=True)
-      variables.global_variables_initializer().run()
-      # No change to learning rate
-      assign_1.op.run()
-      self.assertAllClose(decayed_lr.eval(), .1, 1e-6)
-      assign_2.op.run()
-      self.assertAllClose(decayed_lr.eval(), .1, 1e-6)
-      # Decayed learning rate
-      assign_100.op.run()
-      expected = .1 * 0.96 ** (100 // 3)
-      self.assertAllClose(decayed_lr.eval(), expected, 1e-6)
+    step = variables.VariableV1(1)
+    assign_1 = step.assign(1)
+    assign_2 = step.assign(2)
+    assign_100 = step.assign(100)
+    decayed_lr = learning_rate_decay.exponential_decay(
+        .1, step, 3, 0.96, staircase=True)
+    self.evaluate(variables.global_variables_initializer())
+    # No change to learning rate
+    self.evaluate(assign_1.op)
+    self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6)
+    self.evaluate(assign_2.op)
+    self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6)
+    # Decayed learning rate
+    self.evaluate(assign_100.op)
+    expected = .1 * 0.96**(100 // 3)
+    self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6)
 
   @test_util.run_in_graph_and_eager_modes
   def testPiecewiseConstant(self):
diff --git a/tensorflow/python/training/learning_rate_decay_v2_test.py b/tensorflow/python/training/learning_rate_decay_v2_test.py
index b2ac93f06fe..354ddb25be5 100644
--- a/tensorflow/python/training/learning_rate_decay_v2_test.py
+++ b/tensorflow/python/training/learning_rate_decay_v2_test.py
@@ -62,23 +62,22 @@ class LRDecayTestV2(test_util.TensorFlowTestCase):
       self.assertAllClose(self.evaluate(decayed_lr()), expected, 1e-6)
 
   def testVariables(self):
-    with self.cached_session():
-      step = variables.Variable(1)
-      assign_1 = step.assign(1)
-      assign_2 = step.assign(2)
-      assign_100 = step.assign(100)
-      decayed_lr = learning_rate_decay_v2.exponential_decay(.1, step, 3, 0.96,
-                                                            staircase=True)
-      variables.global_variables_initializer().run()
-      # No change to learning rate
-      assign_1.op.run()
-      self.assertAllClose(decayed_lr().eval(), .1, 1e-6)
-      assign_2.op.run()
-      self.assertAllClose(decayed_lr().eval(), .1, 1e-6)
-      # Decayed learning rate
-      assign_100.op.run()
-      expected = .1 * 0.96 ** (100 // 3)
-      self.assertAllClose(decayed_lr().eval(), expected, 1e-6)
+    step = variables.Variable(1)
+    assign_1 = step.assign(1)
+    assign_2 = step.assign(2)
+    assign_100 = step.assign(100)
+    decayed_lr = learning_rate_decay_v2.exponential_decay(
+        .1, step, 3, 0.96, staircase=True)
+    self.evaluate(variables.global_variables_initializer())
+    # No change to learning rate
+    self.evaluate(assign_1.op)
+    self.assertAllClose(self.evaluate(decayed_lr()), .1, 1e-6)
+    self.evaluate(assign_2.op)
+    self.assertAllClose(self.evaluate(decayed_lr()), .1, 1e-6)
+    # Decayed learning rate
+    self.evaluate(assign_100.op)
+    expected = .1 * 0.96**(100 // 3)
+    self.assertAllClose(self.evaluate(decayed_lr()), expected, 1e-6)
 
   @test_util.run_in_graph_and_eager_modes
   def testPiecewiseConstant(self):
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index b828be44991..2ceb387ec34 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -1170,7 +1170,7 @@ class HookedSessionTest(test.TestCase):
       mock_run = FakeSession(sess)
       mon_sess = monitored_session._HookedSession(sess=mock_run, hooks=[])
       a_tensor = constant_op.constant([0], name='a_tensor')
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       output = mon_sess.run(fetches=a_tensor,
                             feed_dict='a_feed',
                             options='an_option',
@@ -1189,7 +1189,7 @@ class HookedSessionTest(test.TestCase):
       mon_sess = monitored_session._HookedSession(
           sess=sess, hooks=[mock_hook, mock_hook2])
       a_tensor = constant_op.constant([0], name='a_tensor')
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       mon_sess.run(a_tensor)
 
       for hook in [mock_hook, mock_hook2]:
@@ -1214,7 +1214,7 @@ class HookedSessionTest(test.TestCase):
       mon_sess = monitored_session._HookedSession(
           sess=sess, hooks=[mock_hook, mock_hook2])
       constant_op.constant([0], name='a_tensor')
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       mon_sess.run(fetches='a_tensor')
       self.assertFalse(mon_sess.should_stop())
@@ -1234,7 +1234,7 @@ class HookedSessionTest(test.TestCase):
       third_tensor = constant_op.constant([10], name='third_tensor')
       mock_hook.request = session_run_hook.SessionRunArgs([another_tensor])
       mock_hook2.request = session_run_hook.SessionRunArgs([third_tensor])
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       output = mon_sess.run(fetches=a_tensor)
       self.assertEqual(output, [0])
@@ -1254,7 +1254,7 @@ class HookedSessionTest(test.TestCase):
           None, feed_dict={a_tensor: [5]})
       mock_hook2.request = session_run_hook.SessionRunArgs(
           None, feed_dict={b_tensor: [10]})
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual(mon_sess.run(fetches=add_tensor), [15])
 
@@ -1272,7 +1272,7 @@ class HookedSessionTest(test.TestCase):
           None, feed_dict={a_tensor: [5]})
       mock_hook2.request = session_run_hook.SessionRunArgs(
           None, feed_dict={b_tensor: [10]})
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       feed_dict = {c_tensor: [20]}
       self.assertEqual(
@@ -1293,7 +1293,7 @@ class HookedSessionTest(test.TestCase):
           None, feed_dict={a_tensor: [5]})
       mock_hook2.request = session_run_hook.SessionRunArgs(
           None, feed_dict={a_tensor: [10]})
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       with self.assertRaisesRegexp(RuntimeError, 'Same tensor is fed'):
         mon_sess.run(fetches=add_tensor)
@@ -1311,7 +1311,7 @@ class HookedSessionTest(test.TestCase):
           None, feed_dict={a_tensor: [5]})
       mock_hook2.request = session_run_hook.SessionRunArgs(
           None, feed_dict={b_tensor: [10]})
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
 
       with self.assertRaisesRegexp(RuntimeError, 'Same tensor is fed'):
         mon_sess.run(fetches=add_tensor, feed_dict={b_tensor: [10]})
diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py
index 8009e3c24e7..6ce5de6663d 100644
--- a/tensorflow/python/training/moving_averages_test.py
+++ b/tensorflow/python/training/moving_averages_test.py
@@ -274,14 +274,14 @@ class ExponentialMovingAverageTest(test.TestCase):
       self.assertEqual([], v1_avg.value().op.control_inputs)
       self.assertEqual([], v1_avg.value().op.control_inputs)
       # We should be able to initialize v1_avg before v0.
-      sess.run(v1_avg.initializer)
-      sess.run(v0.initializer)
-      self.assertEqual([10.0], sess.run(v1_avg))
+      self.evaluate(v1_avg.initializer)
+      self.evaluate(v0.initializer)
+      self.assertEqual([10.0], self.evaluate(v1_avg))
       # running ema_op should add to v0 (in addition to updating v1_avg)
-      sess.run(assign_to_v1)
-      sess.run(ema_op)
-      self.assertEqual(1, sess.run(v0))
-      self.assertEqual([17.5], sess.run(v1_avg))
+      self.evaluate(assign_to_v1)
+      self.evaluate(ema_op)
+      self.assertEqual(1, self.evaluate(v0))
+      self.assertEqual([17.5], self.evaluate(v1_avg))
 
   @test_util.run_in_graph_and_eager_modes
   def testBasicEager(self):
diff --git a/tensorflow/python/training/proximal_adagrad_test.py b/tensorflow/python/training/proximal_adagrad_test.py
index 272f9019e7d..9d46a6682d9 100644
--- a/tensorflow/python/training/proximal_adagrad_test.py
+++ b/tensorflow/python/training/proximal_adagrad_test.py
@@ -48,7 +48,7 @@ class ProximalAdagradOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([0.0, 0.0], v0_val)
       self.assertAllClose([0.0, 0.0], v1_val)
 
@@ -56,7 +56,7 @@ class ProximalAdagradOptimizerTest(test.TestCase):
       for _ in range(3):
         update.run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([-2.60260963, -4.29698515]), v0_val)
       self.assertAllClose(np.array([-0.28432083, -0.56694895]), v1_val)
       opt_vars = opt.variables()
@@ -85,14 +85,14 @@ class ProximalAdagradOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([1.0, 2.0], v0_val)
       self.assertAllClose([4.0, 3.0], v1_val)
 
       # Run 3 steps Proximal Adagrad.
       for _ in range(3):
         update.run()
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([-1.60261, -2.296985]), v0_val)
       self.assertAllClose(np.array([3.715679, 2.433051]), v1_val)
 
@@ -129,14 +129,14 @@ class ProximalAdagradOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([1.0, 2.0], v0_val)
       self.assertAllClose([4.0, 3.0], v1_val)
 
       # Run 10 steps Proximal Adagrad
       for _ in range(10):
         update.run()
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([-6.663634, -9.190331]), v0_val)
       self.assertAllClose(np.array([2.959304, 1.029232]), v1_val)
 
@@ -155,7 +155,7 @@ class ProximalAdagradOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([1.0, 2.0], v0_val)
       self.assertAllClose([4.0, 3.0], v1_val)
 
@@ -163,7 +163,7 @@ class ProximalAdagradOptimizerTest(test.TestCase):
       for _ in range(10):
         update.run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([-0.0495, -0.0995]), v0_val)
       self.assertAllClose(np.array([-0.0045, -0.0095]), v1_val)
 
@@ -191,7 +191,7 @@ class ProximalAdagradOptimizerTest(test.TestCase):
     variables.global_variables_initializer().run()
 
     sess = ops.get_default_session()
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     if is_sparse:
       self.assertAllClose([[1.0], [2.0]], v0_val)
       self.assertAllClose([[3.0], [4.0]], v1_val)
@@ -203,7 +203,7 @@ class ProximalAdagradOptimizerTest(test.TestCase):
     for _ in range(steps):
       update.run()
 
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     return v0_val, v1_val
 
   def testEquivAdagradwithoutRegularization(self):
diff --git a/tensorflow/python/training/proximal_gradient_descent_test.py b/tensorflow/python/training/proximal_gradient_descent_test.py
index a9355f48246..8797b308ebd 100644
--- a/tensorflow/python/training/proximal_gradient_descent_test.py
+++ b/tensorflow/python/training/proximal_gradient_descent_test.py
@@ -50,7 +50,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([0.0, 0.0], v0_val)
       self.assertAllClose([0.0, 0.0], v1_val)
 
@@ -58,7 +58,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
       for _ in range(3):
         update.run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([-0.9, -1.8]), v0_val)
       self.assertAllClose(np.array([-0.09, -0.18]), v1_val)
 
@@ -80,7 +80,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([1.0, 2.0], v0_val)
       self.assertAllClose([4.0, 3.0], v1_val)
 
@@ -88,7 +88,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
       for _ in range(3):
         update.run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([0.1, 0.2]), v0_val)
       self.assertAllClose(np.array([3.91, 2.82]), v1_val)
 
@@ -123,7 +123,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
       update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
       variables.global_variables_initializer().run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose([1.0, 2.0], v0_val)
       self.assertAllClose([4.0, 3.0], v1_val)
 
@@ -131,7 +131,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
       for _ in range(10):
         update.run()
 
-      v0_val, v1_val = sess.run([var0, var1])
+      v0_val, v1_val = self.evaluate([var0, var1])
       self.assertAllClose(np.array([-0.0495, -0.0995]), v0_val)
       self.assertAllClose(np.array([-0.0045, -0.0095]), v1_val)
 
@@ -159,7 +159,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
     variables.global_variables_initializer().run()
 
     sess = ops.get_default_session()
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     if is_sparse:
       self.assertAllClose([[1.0], [2.0]], v0_val)
       self.assertAllClose([[3.0], [4.0]], v1_val)
@@ -171,7 +171,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
     for _ in range(steps):
       update.run()
 
-    v0_val, v1_val = sess.run([var0, var1])
+    v0_val, v1_val = self.evaluate([var0, var1])
     return v0_val, v1_val
 
   def testEquivSparseGradientDescentwithoutRegularization(self):
diff --git a/tensorflow/python/training/quantize_training_test.py b/tensorflow/python/training/quantize_training_test.py
index 6edbf7665fb..07fd488563e 100644
--- a/tensorflow/python/training/quantize_training_test.py
+++ b/tensorflow/python/training/quantize_training_test.py
@@ -73,11 +73,11 @@ class PywrapQuantizeTrainingTest(test.TestCase):
       _ = importer.import_graph_def(result, name='')
 
       # Initialize the variable.
-      sess.run(g.get_operation_by_name(init_op.name))
+      self.evaluate(g.get_operation_by_name(init_op.name))
 
       # Run the graph for one step to assign values to the quantization min/max
       # variables.
-      sess.run(g.get_tensor_by_name(c.name))
+      self.evaluate(g.get_tensor_by_name(c.name))
 
       saver.save(sess, save_path)
 
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index be49e6e7157..6b2177b0bb7 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -227,7 +227,7 @@ class SaverTest(test.TestCase):
         w1 = resource_variable_ops.ResourceVariable(1.0, name="w1")
         w2 = resource_variable_ops.ResourceVariable(2.0, name="w2")
         graph_saver = saver_module.Saver([w1, w2])
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         graph_saver.save(sess, graph_ckpt_prefix)
 
     with context.eager_mode():
@@ -260,7 +260,7 @@ class SaverTest(test.TestCase):
         w3 = resource_variable_ops.ResourceVariable(0.0, name="w3")
         w4 = resource_variable_ops.ResourceVariable(0.0, name="w4")
         graph_saver = saver_module.Saver([w3, w4])
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         graph_saver.restore(sess, eager_ckpt_prefix)
         self.assertAllEqual(w3.eval(), 3.0)
         self.assertAllEqual(w4.eval(), 4.0)
@@ -326,7 +326,7 @@ class SaverTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Initialize all variables
-      sess.run(init_all_op)
+      self.evaluate(init_all_op)
 
       # Check that the parameter nodes have been initialized.
       self.assertEqual(10.0, v0.eval())
@@ -376,7 +376,7 @@ class SaverTest(test.TestCase):
     with self.cached_session() as sess:
       tensor = sess.graph.get_tensor_by_name(
           save.saver_def.filename_tensor_name)
-      self.assertEqual(sess.run(tensor), filename)
+      self.assertEqual(self.evaluate(tensor), filename)
 
   def testInvalidPath(self):
     v0 = variables.VariableV1(0, name="v0")
@@ -407,7 +407,7 @@ class SaverTest(test.TestCase):
 
       with self.assertRaisesWithPredicateMatch(
           errors_impl.OpError, lambda e: "uninitialized value v" in e.message):
-        sess.run(v)
+        self.evaluate(v)
 
       # Restore the saved values in the parameter nodes.
       save.restore(sess, save_path)
@@ -497,10 +497,10 @@ class SaverTest(test.TestCase):
 
       with self.assertRaisesWithPredicateMatch(
           errors_impl.OpError, lambda e: "uninitialized value v0" in e.message):
-        sess.run(v0)
+        self.evaluate(v0)
       with self.assertRaisesWithPredicateMatch(
           errors_impl.OpError, lambda e: "uninitialized value v1" in e.message):
-        sess.run(v1)
+        self.evaluate(v1)
       self.assertEqual(0, len(v2.keys().eval()))
       self.assertEqual(0, len(v2.values().eval()))
 
@@ -742,7 +742,7 @@ class SaverTest(test.TestCase):
       try:
         with self.cached_session() as sess:
           # Initialize all variables
-          sess.run(init_all_op)
+          self.evaluate(init_all_op)
 
           # Check that the parameter nodes have been initialized.
           self.assertEqual(10.0, v0.eval())
@@ -777,7 +777,7 @@ class SaverTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Initialize all variables
-      sess.run(init_all_op)
+      self.evaluate(init_all_op)
 
       # Check that the parameter nodes have been initialized.
       self.assertEqual(10.0, v0.eval())
@@ -824,11 +824,11 @@ class SaverTest(test.TestCase):
     save_graph = ops_lib.Graph()
     with save_graph.as_default(), self.session(graph=save_graph) as sess:
       orig_vars = _model()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       save = saver_module.Saver(max_to_keep=1)
       variables.global_variables_initializer().run()
       save.save(sess, save_dir)
-      orig_vals = sess.run(orig_vars)
+      orig_vals = self.evaluate(orig_vars)
 
     restore_graph = ops_lib.Graph()
     with restore_graph.as_default(), self.session(
@@ -836,7 +836,7 @@ class SaverTest(test.TestCase):
       restored_vars = _model()
       save = saver_module.Saver(max_to_keep=1)
       save.restore(sess, save_dir)
-      restored_vals = sess.run(restored_vars)
+      restored_vals = self.evaluate(restored_vars)
 
     for orig, restored in zip(orig_vals, restored_vals):
       self.assertAllEqual(orig, restored)
@@ -1747,7 +1747,7 @@ class MetaGraphTest(test.TestCase):
       self.assertEqual([], v1.get_shape())
       with self.assertRaisesWithPredicateMatch(
           errors_impl.OpError, lambda e: "uninitialized value v1" in e.message):
-        sess.run(v1)
+        self.evaluate(v1)
       # Retrieves saver1. Verifies that new_saver1 can restore v1.
       new_saver1 = savers[1]
       new_saver1.restore(sess, saver1_ckpt)
@@ -1927,9 +1927,9 @@ class MetaGraphTest(test.TestCase):
 
     with self.cached_session() as sess:
       # Initializes all the variables.
-      sess.run(init_all_op)
+      self.evaluate(init_all_op)
       # Runs to logit.
-      sess.run(logits)
+      self.evaluate(logits)
       # Creates a saver.
       saver0 = saver_module.Saver()
       saver0.save(sess, saver0_ckpt)
@@ -1969,7 +1969,7 @@ class MetaGraphTest(test.TestCase):
       ops_lib.add_to_collection("train_op", train_op)
 
       # Runs train_op.
-      sess.run(train_op)
+      self.evaluate(train_op)
 
       # Generates MetaGraphDef.
       saver_module.export_meta_graph(train_filename)
@@ -1983,7 +1983,7 @@ class MetaGraphTest(test.TestCase):
       # Restores from checkpoint.
       new_saver.restore(sess, saver0_ckpt)
       train_op = ops_lib.get_collection("train_op")[0]
-      sess.run(train_op)
+      self.evaluate(train_op)
 
   def testGraphExtension(self):
     test_dir = self._get_test_dir("graph_extension")
@@ -2015,8 +2015,8 @@ class MetaGraphTest(test.TestCase):
 
       # Generate a MetaGraphDef containing the while loop.
       with session.Session() as sess:
-        sess.run(init_op)
-        sess.run(output)
+        self.evaluate(init_op)
+        self.evaluate(output)
         saver = saver_module.Saver()
         saver.save(sess, saver_ckpt)
         saver.export_meta_graph(filename)
@@ -2031,8 +2031,8 @@ class MetaGraphTest(test.TestCase):
       no_constfold_config.graph_options.rewrite_options.constant_folding = (
           rewriter_config_pb2.RewriterConfig.OFF)
       with session.Session(config=no_constfold_config) as sess:
-        sess.run(init_op)
-        expected_grad_value = sess.run(grad)
+        self.evaluate(init_op)
+        expected_grad_value = self.evaluate(grad)
 
     # Restore the MetaGraphDef into a new Graph.
     with ops_lib.Graph().as_default():
@@ -2048,8 +2048,8 @@ class MetaGraphTest(test.TestCase):
       init_op = variables.global_variables_initializer()
 
       with session.Session(config=no_constfold_config) as sess:
-        sess.run(init_op)
-        actual_grad_value = sess.run(grad)
+        self.evaluate(init_op)
+        actual_grad_value = self.evaluate(grad)
         self.assertEqual(expected_grad_value, actual_grad_value)
 
   def _testWhileLoopAndGradientSerDes(self, outer_body_fn):
@@ -2187,7 +2187,7 @@ class MetaGraphTest(test.TestCase):
                                                       logits=logit, name="cost")
       adam.AdamOptimizer().minimize(cost, name="optimize")
       saver = saver_module.Saver()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       saver.save(sess, filename)
 
     graph = ops_lib.Graph()
@@ -2224,7 +2224,7 @@ class MetaGraphTest(test.TestCase):
 
       # Create a variable in graph_2 under scope "my_scope".
       variables.VariableV1(array_ops.zeros([10]), name="my_scope/my_var")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       # Restore the checkpoint into a different scope "subgraph_2".
       new_saver_2 = saver_module.import_meta_graph(
           filename + ".meta", graph=graph_2, import_scope="subgraph_2")
@@ -2257,7 +2257,7 @@ class MetaGraphTest(test.TestCase):
                                                       logits=logit, name="cost")
       adam.AdamOptimizer().minimize(cost, name="optimize")
       saver = saver_module.Saver()
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       saver.save(sess, filename)
 
     graph = ops_lib.Graph()
@@ -2294,12 +2294,12 @@ class MetaGraphTest(test.TestCase):
           meta_graph_def, clear_devices=False, import_scope="new_model")
       # Device refers to GPU, which is not available here.
       with self.assertRaises(errors_impl.InvalidArgumentError):
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
 
     with session.Session(graph=ops_lib.Graph()) as sess:
       saver_module.import_meta_graph(
           meta_graph_def, clear_devices=True, import_scope="new_model")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       sess.run(["new_model/optimize"], {
           "new_model/image:0": np.random.random([1, 784]),
           "new_model/label:0": np.random.randint(
@@ -2326,7 +2326,7 @@ class MetaGraphTest(test.TestCase):
 
     with session.Session(graph=ops_lib.Graph()) as sess:
       saver_module.import_meta_graph(meta_graph_def, import_scope="new_model")
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       sess.run(["new_model/optimize"], {
           "new_model/image:0": np.random.random([1, 784]),
           "new_model/label:0": np.random.randint(
@@ -2352,7 +2352,7 @@ class MetaGraphTest(test.TestCase):
                            meta_graph_def_from_graph_def]:
       with session.Session(graph=ops_lib.Graph()) as sess:
         saver_module.import_meta_graph(meta_graph_def, import_scope="new_model")
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         for i in range(10):
           self.assertEqual(i * i, sess.run("new_model/output:0"))
         with self.assertRaises(errors.OutOfRangeError):
@@ -2378,7 +2378,7 @@ class CheckpointReaderTest(test.TestCase):
     save_path = os.path.join(self.get_temp_dir(),
                              "ckpt_for_debug_string" + str(self._WRITE_VERSION))
     with self.cached_session() as sess:
-      sess.run(init_all_op)
+      self.evaluate(init_all_op)
       # Saves a checkpoint.
       save.save(sess, save_path)
 
@@ -2524,7 +2524,7 @@ class ScopedGraphTest(test.TestCase):
       self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys()))
 
     with self.session(graph=graph) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       saver = saver_module.Saver(var_list=var_list, max_to_keep=1)
       saver.save(sess, os.path.join(test_dir, ckpt_filename), write_state=False)
 
@@ -2587,10 +2587,10 @@ class ScopedGraphTest(test.TestCase):
       saver = saver_module.Saver(var_list=var_list, max_to_keep=1)
       saver.restore(sess, os.path.join(test_dir, ckpt_filename))
       # Verify that we have restored weights1 and biases1.
-      sess.run([weights1, biases1])
+      self.evaluate([weights1, biases1])
       # Initialize the rest of the variables and run logits.
-      sess.run(init_rest_op)
-      sess.run(logits)
+      self.evaluate(init_rest_op)
+      self.evaluate(logits)
 
   # Verifies that we can save the subgraph under "hidden1" and restore it
   # into "new_hidden1" in the new graph.
@@ -2618,7 +2618,7 @@ class ScopedGraphTest(test.TestCase):
 
     # Run the graph and save scoped checkpoint.
     with self.session(graph=graph1) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       _, var_list_1 = meta_graph.export_scoped_meta_graph(
           export_scope="hidden1")
       saver = saver_module.Saver(var_list=var_list_1, max_to_keep=1)
@@ -2674,7 +2674,7 @@ class ScopedGraphTest(test.TestCase):
 
     # Run the graph and save scoped checkpoint.
     with self.session(graph=graph1) as sess:
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       _, var_list_1 = meta_graph.export_scoped_meta_graph(
           graph_def=graph1.as_graph_def(), export_scope="hidden1")
       saver = saver_module.Saver(var_list=var_list_1, max_to_keep=1)
@@ -2942,7 +2942,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
     a_saver = saver_module.Saver([a])
     b_saver = saver_module.Saver([b])
     with self.cached_session() as sess:
-      sess.run(a.initializer)
+      self.evaluate(a.initializer)
       save_path = a_saver.save(sess=sess, save_path=checkpoint_prefix)
       with self.assertRaisesRegexp(
           errors.NotFoundError, "Key b not found in checkpoint"):
@@ -2964,7 +2964,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
       a_saver = saver_module.Saver([a])
 
       with self.session(graph=g) as sess:
-        sess.run(a.initializer)
+        self.evaluate(a.initializer)
         save_path = a_saver.save(sess=sess, save_path=checkpoint_prefix)
 
     with ops_lib.Graph().as_default() as g:
diff --git a/tensorflow/python/training/server_lib_same_variables_clear_container_test.py b/tensorflow/python/training/server_lib_same_variables_clear_container_test.py
index 11e6f28ab05..3a5eb712c6f 100644
--- a/tensorflow/python/training/server_lib_same_variables_clear_container_test.py
+++ b/tensorflow/python/training/server_lib_same_variables_clear_container_test.py
@@ -60,9 +60,9 @@ class SameVariablesClearContainerTest(test.TestCase):
     session.Session.reset(server0.target, ["local0"])
     sess = session.Session(server0.target)
     with self.assertRaises(errors_impl.FailedPreconditionError):
-      sess.run(v0)
+      self.evaluate(v0)
     # Reinitializes v0 for the following test.
-    sess.run(v0.initializer)
+    self.evaluate(v0.initializer)
 
     # Verifies that v1 is still valid.
     self.assertAllEqual(2.0, sess_1.run(v1))
@@ -71,10 +71,10 @@ class SameVariablesClearContainerTest(test.TestCase):
     session.Session.reset(server1.target, ["local1"])
     sess = session.Session(server1.target)
     with self.assertRaises(errors_impl.FailedPreconditionError):
-      sess.run(v1)
+      self.evaluate(v1)
     # Verifies that v0 is still valid.
     sess = session.Session(server0.target)
-    self.assertAllEqual(1.0, sess.run(v0))
+    self.assertAllEqual(1.0, self.evaluate(v0))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/training/server_lib_sparse_job_test.py b/tensorflow/python/training/server_lib_sparse_job_test.py
index 1a6b44b90e8..8c2745b51aa 100644
--- a/tensorflow/python/training/server_lib_sparse_job_test.py
+++ b/tensorflow/python/training/server_lib_sparse_job_test.py
@@ -36,7 +36,7 @@ class SparseJobTest(test.TestCase):
       a = constant_op.constant(1.0)
 
     with session.Session(server.target) as sess:
-      self.assertEqual(1.0, sess.run(a))
+      self.assertEqual(1.0, self.evaluate(a))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/training/supervisor_test.py b/tensorflow/python/training/supervisor_test.py
index b734e9653ea..9dc88d78ccc 100644
--- a/tensorflow/python/training/supervisor_test.py
+++ b/tensorflow/python/training/supervisor_test.py
@@ -100,7 +100,7 @@ class SupervisorTest(test.TestCase):
       sv = supervisor.Supervisor(logdir=logdir)
       sess = sv.prepare_or_wait_for_session("")
       for _ in xrange(10):
-        sess.run(my_op)
+        self.evaluate(my_op)
       sess.close()
       sv.stop()
 
@@ -111,7 +111,7 @@ class SupervisorTest(test.TestCase):
       sv = supervisor.Supervisor(logdir=logdir)
       with sv.managed_session("") as sess:
         for _ in xrange(10):
-          sess.run(my_op)
+          self.evaluate(my_op)
       # Supervisor has been stopped.
       self.assertTrue(sv.should_stop())
 
@@ -128,7 +128,7 @@ class SupervisorTest(test.TestCase):
             if step == 1:
               raise RuntimeError("failing here")
             else:
-              sess.run(my_op)
+              self.evaluate(my_op)
       # Supervisor has been stopped.
       self.assertTrue(sv.should_stop())
       self.assertEqual(1, last_step)
@@ -146,7 +146,7 @@ class SupervisorTest(test.TestCase):
             raise errors_impl.OutOfRangeError(my_op.op.node_def, my_op.op,
                                               "all done")
           else:
-            sess.run(my_op)
+            self.evaluate(my_op)
       # Supervisor has been stopped.  OutOfRangeError was not thrown.
       self.assertTrue(sv.should_stop())
       self.assertEqual(3, last_step)
@@ -335,7 +335,7 @@ class SupervisorTest(test.TestCase):
       sess = sv.prepare_or_wait_for_session(
           "", config=config_pb2.ConfigProto(device_count={"CPU": 2}))
       for _ in xrange(10):
-        sess.run(my_op)
+        self.evaluate(my_op)
       sess.close()
       sv.stop()
 
diff --git a/tensorflow/python/training/warm_starting_util_test.py b/tensorflow/python/training/warm_starting_util_test.py
index b575b8d364c..fa1f370f41e 100644
--- a/tensorflow/python/training/warm_starting_util_test.py
+++ b/tensorflow/python/training/warm_starting_util_test.py
@@ -49,7 +49,7 @@ class WarmStartingUtilTest(test.TestCase):
     return vocab_file
 
   def _write_checkpoint(self, sess):
-    sess.run(variables.global_variables_initializer())
+    self.evaluate(variables.global_variables_initializer())
     saver = saver_lib.Saver()
     ckpt_prefix = os.path.join(self.get_temp_dir(), "model")
     saver.save(sess, ckpt_prefix, global_step=0)
@@ -70,7 +70,7 @@ class WarmStartingUtilTest(test.TestCase):
         if partitioner:
           self.assertTrue(isinstance(var, variables.PartitionedVariable))
           var = var._get_variable_list()
-        return var, sess.run(var)
+        return var, self.evaluate(var)
 
   def _create_prev_run_vars(self,
                             var_names,
@@ -86,7 +86,7 @@ class WarmStartingUtilTest(test.TestCase):
               shape=shape,
               initializer=initializer))
         self._write_checkpoint(sess)
-        return [sess.run(var) for var in all_vars]
+        return [self.evaluate(var) for var in all_vars]
 
   def _create_dummy_inputs(self):
     return {
@@ -125,7 +125,7 @@ class WarmStartingUtilTest(test.TestCase):
         prev_tensor_name, var = ws_util._get_var_info(fruit_weights)
         checkpoint_utils.init_from_checkpoint(self.get_temp_dir(),
                                               {prev_tensor_name: var})
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose(prev_val, fruit_weights.eval(sess))
 
   def testWarmStartVarPrevVarPartitioned(self):
@@ -143,7 +143,7 @@ class WarmStartingUtilTest(test.TestCase):
         prev_tensor_name, var = ws_util._get_var_info(fruit_weights)
         checkpoint_utils.init_from_checkpoint(self.get_temp_dir(),
                                               {prev_tensor_name: var})
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose(prev_val, fruit_weights.eval(sess))
 
   def testWarmStartVarCurrentVarPartitioned(self):
@@ -162,7 +162,7 @@ class WarmStartingUtilTest(test.TestCase):
         prev_tensor_name, var = ws_util._get_var_info(fruit_weights)
         checkpoint_utils.init_from_checkpoint(self.get_temp_dir(),
                                               {prev_tensor_name: var})
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         fruit_weights = fruit_weights._get_variable_list()
         new_val = np.concatenate(
             [fruit_weights[0].eval(sess), fruit_weights[1].eval(sess)], axis=0)
@@ -189,7 +189,7 @@ class WarmStartingUtilTest(test.TestCase):
             fruit_weights, prev_tensor_name="old_scope/fruit_weights")
         checkpoint_utils.init_from_checkpoint(self.get_temp_dir(),
                                               {prev_tensor_name: var})
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         fruit_weights = fruit_weights._get_variable_list()
         new_val = np.concatenate(
             [fruit_weights[0].eval(sess), fruit_weights[1].eval(sess)], axis=0)
@@ -211,7 +211,7 @@ class WarmStartingUtilTest(test.TestCase):
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.], [0.]])
         ws_util._warm_start_var_with_vocab(fruit_weights, new_vocab_path, 5,
                                            self.get_temp_dir(), prev_vocab_path)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose([[2.], [1.5], [1.], [0.5], [0.]],
                             fruit_weights.eval(sess))
 
@@ -236,7 +236,7 @@ class WarmStartingUtilTest(test.TestCase):
                                            prev_ckpt=self.get_temp_dir(),
                                            prev_vocab_path=prev_vocab_path,
                                            axis=1)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose([[0.3, 0.5, 0.], [0.8, 1.0, 0.], [1.2, 1.5, 0.],
                              [2.3, 2., 0.]], fruit_output_layer.eval(sess))
 
@@ -261,7 +261,7 @@ class WarmStartingUtilTest(test.TestCase):
             self.get_temp_dir(),
             prev_vocab_path,
             previous_vocab_size=2)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Old vocabulary limited to ['apple', 'banana'].
         self.assertAllClose([[0.], [0.], [1.], [0.5], [0.]],
                             fruit_weights.eval(sess))
@@ -285,7 +285,7 @@ class WarmStartingUtilTest(test.TestCase):
             "fruit_weights", initializer=[[0.], [0.], [0.], [0.], [0.]])
         ws_util._warm_start_var_with_vocab(fruit_weights, new_vocab_path, 5,
                                            self.get_temp_dir(), prev_vocab_path)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose([[2.], [1.5], [1.], [0.5], [0.]],
                             fruit_weights.eval(sess))
 
@@ -312,7 +312,7 @@ class WarmStartingUtilTest(test.TestCase):
                                            prev_ckpt=self.get_temp_dir(),
                                            prev_vocab_path=prev_vocab_path,
                                            axis=1)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose([[0.3, 0.5, 0.], [0.8, 1.0, 0.], [1.2, 1.5, 0.],
                              [2.3, 2., 0.]], fruit_output_layer.eval(sess))
 
@@ -340,7 +340,7 @@ class WarmStartingUtilTest(test.TestCase):
             self.get_temp_dir(),
             prev_vocab_path,
             current_oov_buckets=1)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertTrue(
             isinstance(fruit_weights, variables.PartitionedVariable))
         fruit_weights_vars = fruit_weights._get_variable_list()
@@ -372,7 +372,7 @@ class WarmStartingUtilTest(test.TestCase):
                                            prev_ckpt=self.get_temp_dir(),
                                            prev_vocab_path=prev_vocab_path,
                                            axis=1)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertTrue(
             isinstance(fruit_output_layer, variables.PartitionedVariable))
         fruit_output_layer_vars = fruit_output_layer._get_variable_list()
@@ -404,7 +404,7 @@ class WarmStartingUtilTest(test.TestCase):
             partitioner=lambda shape, dtype: [2, 1])
         ws_util._warm_start_var_with_vocab(fruit_weights, new_vocab_path, 6,
                                            self.get_temp_dir(), prev_vocab_path)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertTrue(
             isinstance(fruit_weights, variables.PartitionedVariable))
         fruit_weights_vars = fruit_weights._get_variable_list()
@@ -438,7 +438,7 @@ class WarmStartingUtilTest(test.TestCase):
                                            prev_ckpt=self.get_temp_dir(),
                                            prev_vocab_path=prev_vocab_path,
                                            axis=1)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         self.assertTrue(
             isinstance(fruit_output_layer, variables.PartitionedVariable))
         fruit_output_layer_vars = fruit_output_layer._get_variable_list()
@@ -463,7 +463,7 @@ class WarmStartingUtilTest(test.TestCase):
             shape=[10, 1],
             initializer=zeros())
         ws_util.warm_start(self.get_temp_dir(), vars_to_warm_start=[var])
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started (init overridden to ones).
         self.assertAllEqual(var.eval(), prev_int_val)
 
@@ -483,7 +483,7 @@ class WarmStartingUtilTest(test.TestCase):
             shape=[10, 1],
             initializer=zeros())
         ws_util.warm_start(self.get_temp_dir(), vars_to_warm_start=["v1"])
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started (init overridden to ones).
         self.assertAllEqual(var.eval(), prev_int_val)
 
@@ -519,7 +519,7 @@ class WarmStartingUtilTest(test.TestCase):
                            # This warm-starts both v1 and v1/Momentum, but only
                            # v2 (and not v2/Momentum).
                            vars_to_warm_start=["v1", "v2[^/]"])
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify the selection of weights were correctly warm-started (init
         # overridden to ones).
         self.assertAllEqual(v1.eval(), prev_v1_val)
@@ -542,7 +542,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_int], partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_int: [np.zeros([10, 1])]},
@@ -553,7 +553,7 @@ class WarmStartingUtilTest(test.TestCase):
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_int], partitioner)
         ws_util.warm_start(self.get_temp_dir(), vars_to_warm_start=".*sc_int.*")
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_int: [prev_int_val]}, sess)
 
@@ -571,7 +571,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_hash], partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_hash: [np.zeros([15, 1])]},
@@ -583,7 +583,7 @@ class WarmStartingUtilTest(test.TestCase):
         cols_to_vars = self._create_linear_model([sc_hash], partitioner)
         ws_util.warm_start(
             self.get_temp_dir(), vars_to_warm_start=".*sc_hash.*")
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_hash: [prev_hash_val]},
                                   sess)
@@ -605,7 +605,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]},
@@ -619,7 +619,7 @@ class WarmStartingUtilTest(test.TestCase):
         # vocab is assumed to be same as new vocab.
         ws_util.warm_start(
             self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*")
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]},
                                   sess)
@@ -641,7 +641,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([4, 1])]},
@@ -657,7 +657,7 @@ class WarmStartingUtilTest(test.TestCase):
             # Explicitly provide the file prefix instead of just the dir.
             os.path.join(self.get_temp_dir(), "model-0"),
             vars_to_warm_start=".*sc_vocab.*")
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]},
                                   sess)
@@ -686,7 +686,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([sc_vocab], partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [np.zeros([2, 1])]},
@@ -708,7 +708,7 @@ class WarmStartingUtilTest(test.TestCase):
             var_name_to_vocab_info={
                 "linear_model/sc_vocab/weights": vocab_info
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.  'banana' isn't in the
         # first two entries of the old vocabulary, so it's newly initialized.
         self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [[[1], [0]]]}, sess)
@@ -729,7 +729,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model([real_bucket], partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, the weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars,
@@ -741,7 +741,7 @@ class WarmStartingUtilTest(test.TestCase):
         cols_to_vars = self._create_linear_model([real_bucket], partitioner)
         ws_util.warm_start(
             self.get_temp_dir(), vars_to_warm_start=".*real_bucketized.*")
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars,
                                   {real_bucket: [prev_bucket_val]}, sess)
@@ -800,7 +800,7 @@ class WarmStartingUtilTest(test.TestCase):
     with ops.Graph().as_default() as g:
       with self.session(graph=g) as sess:
         cols_to_vars = self._create_linear_model(all_linear_cols, partitioner)
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Without warm-starting, all weights should be initialized using default
         # initializer (which is init_ops.zeros_initializer).
         self._assert_cols_to_vars(cols_to_vars, {
@@ -826,7 +826,7 @@ class WarmStartingUtilTest(test.TestCase):
             var_name_to_vocab_info={
                 "linear_model/sc_vocab/weights": vocab_info
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.
         self._assert_cols_to_vars(cols_to_vars, {
             sc_int: [prev_int_val],
@@ -865,7 +865,7 @@ class WarmStartingUtilTest(test.TestCase):
             "linear_model/sc_vocab/weights",
             initializer=[[0.5], [1.], [2.], [3.]])
         self._write_checkpoint(sess)
-        prev_keys_val = sess.run(sc_keys_weights)
+        prev_keys_val = self.evaluate(sc_keys_weights)
 
     def _partitioner(shape, dtype):  # pylint:disable=unused-argument
       # Partition each var into 2 equal slices.
@@ -892,7 +892,7 @@ class WarmStartingUtilTest(test.TestCase):
                 ws_util._infer_var_name(cols_to_vars[sc_keys]):
                     "some_other_name"
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.  Var corresponding to
         # sc_hash should not be warm-started.  Var corresponding to sc_vocab
         # should be correctly warm-started after vocab remapping.
@@ -933,7 +933,7 @@ class WarmStartingUtilTest(test.TestCase):
             "linear_model/sc_vocab/weights",
             initializer=[[0.5], [1.], [2.], [3.]])
         self._write_checkpoint(sess)
-        prev_keys_val = sess.run(sc_keys_weights)
+        prev_keys_val = self.evaluate(sc_keys_weights)
 
     # New graph, new session with warm-starting.
     with ops.Graph().as_default() as g:
@@ -955,7 +955,7 @@ class WarmStartingUtilTest(test.TestCase):
                 ws_util._infer_var_name(cols_to_vars[sc_keys]):
                     "some_other_name"
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.  Var corresponding to
         # sc_hash should not be warm-started.  Var corresponding to sc_vocab
         # should be correctly warm-started after vocab remapping.
@@ -1024,7 +1024,7 @@ class WarmStartingUtilTest(test.TestCase):
                 ws_util._infer_var_name(cols_to_vars[sc_keys]):
                     "some_other_name"
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started.  Var corresponding to
         # sc_vocab should be correctly warm-started after vocab remapping,
         # and neither of the other two should be warm-started..
@@ -1091,7 +1091,7 @@ class WarmStartingUtilTest(test.TestCase):
                 ws_util._infer_var_name(cols_to_vars[emb_vocab_column]):
                     vocab_info
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started. Var corresponding to
         # emb_vocab_column should be correctly warm-started after vocab
         # remapping. Missing values are filled in with the EmbeddingColumn's
@@ -1163,7 +1163,7 @@ class WarmStartingUtilTest(test.TestCase):
             var_name_to_vocab_info={
                 "linear_model/sc_vocab_embedding/embedding_weights": vocab_info
             })
-        sess.run(variables.global_variables_initializer())
+        self.evaluate(variables.global_variables_initializer())
         # Verify weights were correctly warm-started. Var corresponding to
         # emb_vocab should be correctly warm-started after vocab remapping.
         # Missing values are filled in with the EmbeddingColumn's initializer.