This is a bug fix for multi-replica training. When there is any parameter servers and more than one replica, replicas except the chief replica would complain un-initialized stale_counter variable since it doesn't live in parameter server.
PiperOrigin-RevId: 179421368
This commit is contained in:
parent
119f5d477b
commit
13a8558846
@ -78,10 +78,11 @@ class DropStaleGradientOptimizer(optimizer.Optimizer):
|
||||
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
|
||||
gradients = []
|
||||
# Number of stale gradients.
|
||||
stale_counter = variable_scope.get_variable(
|
||||
"stale_counter", [],
|
||||
initializer=init_ops.zeros_initializer(),
|
||||
trainable=False)
|
||||
with ops.colocate_with(global_step):
|
||||
stale_counter = variable_scope.get_variable(
|
||||
"stale_counter", [],
|
||||
initializer=init_ops.zeros_initializer(),
|
||||
trainable=False)
|
||||
|
||||
def _AcceptGradientOp():
|
||||
with ops.control_dependencies(
|
||||
|
Loading…
x
Reference in New Issue
Block a user