Merge pull request #41522 from redwrasse:momentum-test-loss-var
PiperOrigin-RevId: 328140122 Change-Id: Iacbb71fb3288057de3db6e0ab06e1cea20961223
This commit is contained in:
commit
d88e5532a1
@ -47,10 +47,12 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
def doTestBasic(self, use_resource=False, use_callable_params=False):
|
def doTestBasic(self, use_resource=False, use_callable_params=False):
|
||||||
for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
|
for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
|
||||||
if use_resource:
|
if use_resource:
|
||||||
var0 = resource_variable_ops.ResourceVariable(
|
var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
|
||||||
[1.0, 2.0], dtype=dtype, name="var0_%d" % i)
|
dtype=dtype,
|
||||||
var1 = resource_variable_ops.ResourceVariable(
|
name="var0_%d" % i)
|
||||||
[3.0, 4.0], dtype=dtype, name="var1_%d" % i)
|
var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
|
||||||
|
dtype=dtype,
|
||||||
|
name="var1_%d" % i)
|
||||||
else:
|
else:
|
||||||
var0 = variables.Variable([1.0, 2.0], dtype=dtype)
|
var0 = variables.Variable([1.0, 2.0], dtype=dtype)
|
||||||
var1 = variables.Variable([3.0, 4.0], dtype=dtype)
|
var1 = variables.Variable([3.0, 4.0], dtype=dtype)
|
||||||
@ -63,8 +65,7 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
momentum = momentum()
|
momentum = momentum()
|
||||||
mom_opt = momentum_lib.MomentumOptimizer(
|
mom_opt = momentum_lib.MomentumOptimizer(
|
||||||
learning_rate=learning_rate, momentum=momentum)
|
learning_rate=learning_rate, momentum=momentum)
|
||||||
mom_update = mom_opt.apply_gradients(
|
mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
|
||||||
zip([grads0, grads1], [var0, var1]))
|
|
||||||
|
|
||||||
if not context.executing_eagerly():
|
if not context.executing_eagerly():
|
||||||
self.evaluate(variables.global_variables_initializer())
|
self.evaluate(variables.global_variables_initializer())
|
||||||
@ -87,14 +88,13 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
if not context.executing_eagerly():
|
if not context.executing_eagerly():
|
||||||
self.evaluate(mom_update)
|
self.evaluate(mom_update)
|
||||||
# Check that the momentum accumulators have been updated.
|
# Check that the momentum accumulators have been updated.
|
||||||
self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
|
self.assertAllCloseAccordingToType(
|
||||||
self.evaluate(slot0))
|
np.array([0.1, 0.1]), self.evaluate(slot0))
|
||||||
self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
|
self.assertAllCloseAccordingToType(
|
||||||
self.evaluate(slot1))
|
np.array([0.01, 0.01]), self.evaluate(slot1))
|
||||||
# Check that the parameters have been updated.
|
# Check that the parameters have been updated.
|
||||||
self.assertAllCloseAccordingToType(
|
self.assertAllCloseAccordingToType(
|
||||||
np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
|
np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0))
|
||||||
self.evaluate(var0))
|
|
||||||
self.assertAllCloseAccordingToType(
|
self.assertAllCloseAccordingToType(
|
||||||
np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
|
np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
|
||||||
self.evaluate(var1))
|
self.evaluate(var1))
|
||||||
@ -118,8 +118,8 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
]), self.evaluate(var0))
|
]), self.evaluate(var0))
|
||||||
self.assertAllCloseAccordingToType(
|
self.assertAllCloseAccordingToType(
|
||||||
np.array([
|
np.array([
|
||||||
2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - (
|
2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
|
||||||
(0.9 * 0.01 + 0.01) * 2.0)
|
3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
|
||||||
]), self.evaluate(var1))
|
]), self.evaluate(var1))
|
||||||
|
|
||||||
def testBasic(self):
|
def testBasic(self):
|
||||||
@ -137,10 +137,12 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
def testVariablesAcrossGraphs(self):
|
def testVariablesAcrossGraphs(self):
|
||||||
optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5)
|
optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5)
|
||||||
with ops.Graph().as_default():
|
with ops.Graph().as_default():
|
||||||
var0 = resource_variable_ops.ResourceVariable(
|
var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
|
||||||
[1.0, 2.0], dtype=dtypes.float32, name="var0")
|
dtype=dtypes.float32,
|
||||||
var1 = resource_variable_ops.ResourceVariable(
|
name="var0")
|
||||||
[3.0, 4.0], dtype=dtypes.float32, name="var1")
|
var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
|
||||||
|
dtype=dtypes.float32,
|
||||||
|
name="var1")
|
||||||
loss = math_ops.reduce_sum(var0 + var1)
|
loss = math_ops.reduce_sum(var0 + var1)
|
||||||
optimizer.minimize(loss)
|
optimizer.minimize(loss)
|
||||||
optimizer_variables = optimizer.variables()
|
optimizer_variables = optimizer.variables()
|
||||||
@ -149,10 +151,12 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
self.assertEqual(2, len(optimizer_variables))
|
self.assertEqual(2, len(optimizer_variables))
|
||||||
|
|
||||||
with ops.Graph().as_default():
|
with ops.Graph().as_default():
|
||||||
var2 = resource_variable_ops.ResourceVariable(
|
var2 = resource_variable_ops.ResourceVariable([1.0, 2.0],
|
||||||
[1.0, 2.0], dtype=dtypes.float32, name="var2")
|
dtype=dtypes.float32,
|
||||||
var3 = resource_variable_ops.ResourceVariable(
|
name="var2")
|
||||||
[3.0, 4.0], dtype=dtypes.float32, name="var3")
|
var3 = resource_variable_ops.ResourceVariable([3.0, 4.0],
|
||||||
|
dtype=dtypes.float32,
|
||||||
|
name="var3")
|
||||||
loss = math_ops.reduce_sum(var2 + var3)
|
loss = math_ops.reduce_sum(var2 + var3)
|
||||||
optimizer.minimize(loss)
|
optimizer.minimize(loss)
|
||||||
optimizer_variables = optimizer.variables()
|
optimizer_variables = optimizer.variables()
|
||||||
@ -181,9 +185,8 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
opt_op.run()
|
opt_op.run()
|
||||||
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
|
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
|
||||||
var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
|
var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
|
||||||
var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
|
var1_np, accum1_np = self._update_nesterov_momentum_numpy(
|
||||||
accum1_np,
|
var1_np, accum1_np, 3, 2.0, 0.9)
|
||||||
3, 2.0, 0.9)
|
|
||||||
self.assertAllClose(var0_np, self.evaluate(var0))
|
self.assertAllClose(var0_np, self.evaluate(var0))
|
||||||
self.assertAllClose(var1_np, self.evaluate(var1))
|
self.assertAllClose(var1_np, self.evaluate(var1))
|
||||||
|
|
||||||
@ -200,32 +203,29 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
grads.append(var0_np * 10)
|
grads.append(var0_np * 10)
|
||||||
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
|
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
|
||||||
var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
|
var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
|
||||||
var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
|
var1_np, accum1_np = self._update_nesterov_momentum_numpy(
|
||||||
accum1_np,
|
var1_np, accum1_np, 3, 2.0, 0.9)
|
||||||
3, 2.0, 0.9)
|
|
||||||
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
|
var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
|
||||||
var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
|
var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
|
||||||
accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
|
accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
|
||||||
accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
|
accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
|
||||||
var0 = variables.Variable(var0_np)
|
var0 = variables.Variable(var0_np)
|
||||||
var1 = variables.Variable(var1_np)
|
var1 = variables.Variable(var1_np)
|
||||||
loss = 5 * var0 * var0 + 3 * var1
|
|
||||||
mom_op = momentum_lib.MomentumOptimizer(
|
mom_op = momentum_lib.MomentumOptimizer(
|
||||||
learning_rate=2.0, momentum=0.9, use_nesterov=True)
|
learning_rate=2.0, momentum=0.9, use_nesterov=True)
|
||||||
x_feed = array_ops.placeholder(dtype)
|
x_feed = array_ops.placeholder(dtype)
|
||||||
y_feed = ops.IndexedSlices(
|
y_feed = ops.IndexedSlices(x_feed, constant_op.constant([0, 1]),
|
||||||
x_feed, constant_op.constant([0, 1]), constant_op.constant([2]))
|
constant_op.constant([2]))
|
||||||
grads_and_vars = [(y_feed, var0), (constant_op.constant(
|
grads_and_vars = [(y_feed, var0),
|
||||||
[3.0, 3.0], dtype=dtype), var1)]
|
(constant_op.constant([3.0, 3.0], dtype=dtype), var1)]
|
||||||
opt_update = mom_op.apply_gradients(grads_and_vars)
|
opt_update = mom_op.apply_gradients(grads_and_vars)
|
||||||
self.evaluate(variables.global_variables_initializer())
|
self.evaluate(variables.global_variables_initializer())
|
||||||
for t in range(1, 5):
|
for t in range(1, 5):
|
||||||
opt_update.run(feed_dict={x_feed: grads[t - 1]})
|
opt_update.run(feed_dict={x_feed: grads[t - 1]})
|
||||||
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
|
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
|
||||||
var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
|
var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
|
||||||
var1_np, accum1_np = self._update_nesterov_momentum_numpy(var1_np,
|
var1_np, accum1_np = self._update_nesterov_momentum_numpy(
|
||||||
accum1_np,
|
var1_np, accum1_np, 3, 2.0, 0.9)
|
||||||
3, 2.0, 0.9)
|
|
||||||
self.assertAllClose(var0_np, self.evaluate(var0))
|
self.assertAllClose(var0_np, self.evaluate(var0))
|
||||||
self.assertAllClose(var1_np, self.evaluate(var1))
|
self.assertAllClose(var1_np, self.evaluate(var1))
|
||||||
|
|
||||||
@ -249,6 +249,7 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
|
x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
|
||||||
pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
|
pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
|
||||||
return pred * pred
|
return pred * pred
|
||||||
|
|
||||||
# pylint: enable=cell-var-from-loop
|
# pylint: enable=cell-var-from-loop
|
||||||
|
|
||||||
opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
|
opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
|
||||||
@ -464,15 +465,11 @@ class MomentumOptimizerTest(test.TestCase):
|
|||||||
var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
|
var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype))
|
||||||
var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2]))
|
var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2]))
|
||||||
grads0 = ops.IndexedSlices(
|
grads0 = ops.IndexedSlices(
|
||||||
constant_op.constant(
|
constant_op.constant([[.1, .1]], dtype=dtype),
|
||||||
[[.1, .1]], dtype=dtype),
|
constant_op.constant([1]), constant_op.constant([4, 2]))
|
||||||
constant_op.constant([1]),
|
|
||||||
constant_op.constant([4, 2]))
|
|
||||||
grads1 = ops.IndexedSlices(
|
grads1 = ops.IndexedSlices(
|
||||||
constant_op.constant(
|
constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype),
|
||||||
[[.01, .01], [.01, .01]], dtype=dtype),
|
constant_op.constant([2, 3]), constant_op.constant([4, 2]))
|
||||||
constant_op.constant([2, 3]),
|
|
||||||
constant_op.constant([4, 2]))
|
|
||||||
mom_opt = momentum_lib.MomentumOptimizer(
|
mom_opt = momentum_lib.MomentumOptimizer(
|
||||||
learning_rate=2.0, momentum=0.9)
|
learning_rate=2.0, momentum=0.9)
|
||||||
mom_update = mom_opt.apply_gradients(
|
mom_update = mom_opt.apply_gradients(
|
||||||
|
Loading…
Reference in New Issue
Block a user