Use register when we could to avoid extra memory access.

PiperOrigin-RevId: 271405064
This commit is contained in:
Zhenyu Tan 2019-09-26 12:22:28 -07:00 committed by TensorFlower Gardener
parent 616e9d41e3
commit e8f581bf5b
2 changed files with 8 additions and 8 deletions

View File

@ -43,7 +43,7 @@ __global__ void ApplyAdamKernel(int32 data_dim, T* var, T* m, T* v,
(static_cast<T>(1.0) - (*beta1_power_));
const T epsilon = (*epsilon_);
const T beta1 = (*beta1_);
const T one_minus_beta1 = static_cast<T>(1.0) - (*beta1_);
const T one_minus_beta1 = static_cast<T>(1.0) - (beta1);
const T one_minus_beta2 = static_cast<T>(1.0) - (*beta2_);
const int32 stripe = gridDim.x * blockDim.x;

View File

@ -113,7 +113,7 @@ class AdamOptimizerTest(test.TestCase):
@test_util.run_deprecated_v1
def testSparse(self):
for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
with self.cached_session():
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -202,7 +202,7 @@ class AdamOptimizerTest(test.TestCase):
def doTestBasic(self, use_callable_params=False):
for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
with self.session(graph=ops.Graph()):
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -262,7 +262,7 @@ class AdamOptimizerTest(test.TestCase):
@test_util.run_in_graph_and_eager_modes(reset_test=True)
def testBasicWithAmsgrad(self):
for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
with self.session(graph=ops.Graph()):
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, v0hat, m1, v1, v1hat = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -356,7 +356,7 @@ class AdamOptimizerTest(test.TestCase):
@test_util.run_deprecated_v1
def testBasicWithLearningRateDecay(self):
for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
with self.session(graph=ops.Graph()):
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -403,7 +403,7 @@ class AdamOptimizerTest(test.TestCase):
@test_util.run_deprecated_v1
def testBasicWithLearningRateInverseTimeDecay(self):
for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
with self.session(graph=ops.Graph()):
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -452,7 +452,7 @@ class AdamOptimizerTest(test.TestCase):
@test_util.run_deprecated_v1
def testTensorLearningRate(self):
for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
with self.cached_session():
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
@ -491,7 +491,7 @@ class AdamOptimizerTest(test.TestCase):
@test_util.run_deprecated_v1
def testSharing(self):
for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
with self.cached_session():
with self.cached_session(use_gpu=True):
# Initialize variables for numpy implementation.
m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)