This is a bug fix for multi-replica training. When there is any parameter servers and more than one replica, replicas except the chief replica would complain un-initialized stale_counter variable since it doesn't live in parameter server.
PiperOrigin-RevId: 179421368
This commit is contained in:
parent
119f5d477b
commit
13a8558846
@ -78,10 +78,11 @@ class DropStaleGradientOptimizer(optimizer.Optimizer):
|
|||||||
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
|
def apply_gradients(self, grads_and_vars, global_step=None, name=None):
|
||||||
gradients = []
|
gradients = []
|
||||||
# Number of stale gradients.
|
# Number of stale gradients.
|
||||||
stale_counter = variable_scope.get_variable(
|
with ops.colocate_with(global_step):
|
||||||
"stale_counter", [],
|
stale_counter = variable_scope.get_variable(
|
||||||
initializer=init_ops.zeros_initializer(),
|
"stale_counter", [],
|
||||||
trainable=False)
|
initializer=init_ops.zeros_initializer(),
|
||||||
|
trainable=False)
|
||||||
|
|
||||||
def _AcceptGradientOp():
|
def _AcceptGradientOp():
|
||||||
with ops.control_dependencies(
|
with ops.control_dependencies(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user