Use register when we could to avoid extra memory access.

PiperOrigin-RevId: 271405064
2019-09-26 12:22:28 -07:00 · 2019-09-26 12:22:28 -07:00 · e8f581bf5b
commit e8f581bf5b
parent 616e9d41e3
2 changed files with 8 additions and 8 deletions
--- a/tensorflow/core/kernels/training_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc
@ -43,7 +43,7 @@ __global__ void ApplyAdamKernel(int32 data_dim, T* var, T* m, T* v,
                       (static_cast<T>(1.0) - (*beta1_power_));
  const T epsilon = (*epsilon_);
  const T beta1 = (*beta1_);
-  const T one_minus_beta1 = static_cast<T>(1.0) - (*beta1_);
+  const T one_minus_beta1 = static_cast<T>(1.0) - (beta1);
  const T one_minus_beta2 = static_cast<T>(1.0) - (*beta2_);
  const int32 stripe = gridDim.x * blockDim.x;

--- a/tensorflow/python/keras/optimizer_v2/adam_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adam_test.py
@ -113,7 +113,7 @@ class AdamOptimizerTest(test.TestCase):
  @test_util.run_deprecated_v1
  def testSparse(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.cached_session():
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -202,7 +202,7 @@ class AdamOptimizerTest(test.TestCase):

  def doTestBasic(self, use_callable_params=False):
    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
-      with self.session(graph=ops.Graph()):
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -262,7 +262,7 @@ class AdamOptimizerTest(test.TestCase):
  @test_util.run_in_graph_and_eager_modes(reset_test=True)
  def testBasicWithAmsgrad(self):
    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
-      with self.session(graph=ops.Graph()):
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -356,7 +356,7 @@ class AdamOptimizerTest(test.TestCase):
  @test_util.run_deprecated_v1
  def testBasicWithLearningRateDecay(self):
    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
-      with self.session(graph=ops.Graph()):
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -403,7 +403,7 @@ class AdamOptimizerTest(test.TestCase):
  @test_util.run_deprecated_v1
  def testBasicWithLearningRateInverseTimeDecay(self):
    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
-      with self.session(graph=ops.Graph()):
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -452,7 +452,7 @@ class AdamOptimizerTest(test.TestCase):
  @test_util.run_deprecated_v1
  def testTensorLearningRate(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.cached_session():
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -491,7 +491,7 @@ class AdamOptimizerTest(test.TestCase):
  @test_util.run_deprecated_v1
  def testSharing(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.cached_session():
+      with self.cached_session(use_gpu=True):
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)