From 13a8558846a1f0a821f3ee1f147fae833b00f088 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 18 Dec 2017 07:42:47 -0800 Subject: [PATCH] This is a bug fix for multi-replica training. When there is any parameter servers and more than one replica, replicas except the chief replica would complain un-initialized stale_counter variable since it doesn't live in parameter server. PiperOrigin-RevId: 179421368 --- .../opt/python/training/drop_stale_gradient_optimizer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py index f20c172ee37..4a905b1b2a0 100644 --- a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py +++ b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer.py @@ -78,10 +78,11 @@ class DropStaleGradientOptimizer(optimizer.Optimizer): def apply_gradients(self, grads_and_vars, global_step=None, name=None): gradients = [] # Number of stale gradients. - stale_counter = variable_scope.get_variable( - "stale_counter", [], - initializer=init_ops.zeros_initializer(), - trainable=False) + with ops.colocate_with(global_step): + stale_counter = variable_scope.get_variable( + "stale_counter", [], + initializer=init_ops.zeros_initializer(), + trainable=False) def _AcceptGradientOp(): with ops.control_dependencies(