From e8f581bf5b8312a3b0f131bb3294c0ce1cd70047 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Thu, 26 Sep 2019 12:22:28 -0700 Subject: [PATCH] Use register when we could to avoid extra memory access. PiperOrigin-RevId: 271405064 --- tensorflow/core/kernels/training_ops_gpu.cu.cc | 2 +- tensorflow/python/keras/optimizer_v2/adam_test.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 4a9ddda7c71..17ab7c59d12 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -43,7 +43,7 @@ __global__ void ApplyAdamKernel(int32 data_dim, T* var, T* m, T* v, (static_cast(1.0) - (*beta1_power_)); const T epsilon = (*epsilon_); const T beta1 = (*beta1_); - const T one_minus_beta1 = static_cast(1.0) - (*beta1_); + const T one_minus_beta1 = static_cast(1.0) - (beta1); const T one_minus_beta2 = static_cast(1.0) - (*beta2_); const int32 stripe = gridDim.x * blockDim.x; diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py index 1c982a16ee8..47a0cf67247 100644 --- a/tensorflow/python/keras/optimizer_v2/adam_test.py +++ b/tensorflow/python/keras/optimizer_v2/adam_test.py @@ -113,7 +113,7 @@ class AdamOptimizerTest(test.TestCase): @test_util.run_deprecated_v1 def testSparse(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.cached_session(): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -202,7 +202,7 @@ class AdamOptimizerTest(test.TestCase): def doTestBasic(self, use_callable_params=False): for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): - with self.session(graph=ops.Graph()): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -262,7 +262,7 @@ class AdamOptimizerTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(reset_test=True) def testBasicWithAmsgrad(self): for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): - with self.session(graph=ops.Graph()): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -356,7 +356,7 @@ class AdamOptimizerTest(test.TestCase): @test_util.run_deprecated_v1 def testBasicWithLearningRateDecay(self): for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): - with self.session(graph=ops.Graph()): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -403,7 +403,7 @@ class AdamOptimizerTest(test.TestCase): @test_util.run_deprecated_v1 def testBasicWithLearningRateInverseTimeDecay(self): for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): - with self.session(graph=ops.Graph()): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -452,7 +452,7 @@ class AdamOptimizerTest(test.TestCase): @test_util.run_deprecated_v1 def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.cached_session(): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -491,7 +491,7 @@ class AdamOptimizerTest(test.TestCase): @test_util.run_deprecated_v1 def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.cached_session(): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)