This is a bug fix for multi-replica training. When there is any parameter servers and more than one replica, replicas except the chief replica would complain un-initialized stale_counter variable since it doesn't live in parameter server.

PiperOrigin-RevId: 179421368
This commit is contained in:
A. Unique TensorFlower 2017-12-18 07:42:47 -08:00 committed by TensorFlower Gardener
parent 119f5d477b
commit 13a8558846

View File

@ -78,10 +78,11 @@ class DropStaleGradientOptimizer(optimizer.Optimizer):
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
gradients = []
# Number of stale gradients.
stale_counter = variable_scope.get_variable(
"stale_counter", [],
initializer=init_ops.zeros_initializer(),
trainable=False)
with ops.colocate_with(global_step):
stale_counter = variable_scope.get_variable(
"stale_counter", [],
initializer=init_ops.zeros_initializer(),
trainable=False)
def _AcceptGradientOp():
with ops.control_dependencies(