Merge pull request #22968 from facaiy:CLN/remove_noisy_log_for_stepHook

PiperOrigin-RevId: 239025726
This commit is contained in:
TensorFlower Gardener 2019-03-18 11:58:26 -07:00
commit 972c3280bc
2 changed files with 13 additions and 18 deletions

View File

@ -653,7 +653,6 @@ class StepCounterHook(session_run_hook.SessionRunHook):
self._summary_writer = summary_writer self._summary_writer = summary_writer
self._output_dir = output_dir self._output_dir = output_dir
self._last_global_step = None self._last_global_step = None
self._global_step_check_count = 0
self._steps_per_run = 1 self._steps_per_run = 1
def _set_steps_per_run(self, steps_per_run): def _set_steps_per_run(self, steps_per_run):
@ -698,22 +697,18 @@ class StepCounterHook(session_run_hook.SessionRunHook):
# step value such that the comparison could be unreliable. For simplicity, # step value such that the comparison could be unreliable. For simplicity,
# we just compare the stale_global_step with previously recorded version. # we just compare the stale_global_step with previously recorded version.
if stale_global_step == self._last_global_step: if stale_global_step == self._last_global_step:
# Here, we use a counter to count how many times we have observed that the # Here, we give a warning in the first 5 times if we have observed that
# global step has not been increased. For some Optimizers, the global step # the global step has not been increased. For some Optimizers, the global
# is not increased each time by design. For example, SyncReplicaOptimizer # step is not increased each time by design. For example,
# doesn't increase the global step in worker's main train step. # SyncReplicaOptimizer doesn't increase the global step in worker's main
self._global_step_check_count += 1 # train step.
if self._global_step_check_count % 20 == 0: logging.log_first_n(
self._global_step_check_count = 0 logging.WARN,
logging.warning( "It seems that global step (tf.train.get_global_step) has not "
"It seems that global step (tf.train.get_global_step) has not " "been increased. Current value (could be stable): %s vs previous "
"been increased. Current value (could be stable): %s vs previous " "value: %s. You could increase the global step by passing "
"value: %s. You could increase the global step by passing " "tf.train.get_global_step() to Optimizer.apply_gradients or "
"tf.train.get_global_step() to Optimizer.apply_gradients or " "Optimizer.minimize.", 5, stale_global_step, self._last_global_step)
"Optimizer.minimize.", stale_global_step, self._last_global_step)
else:
# Whenever we observe the increment, reset the counter.
self._global_step_check_count = 0
self._last_global_step = stale_global_step self._last_global_step = stale_global_step

View File

@ -1026,7 +1026,7 @@ class StepCounterHookTest(test.TestCase):
hook.begin() hook.begin()
mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess = monitored_session._HookedSession(sess, [hook])
mon_sess.run(train_op) # Run one step to record global step. mon_sess.run(train_op) # Run one step to record global step.
with test.mock.patch.object(tf_logging, 'warning') as mock_log: with test.mock.patch.object(tf_logging, 'log_first_n') as mock_log:
for _ in range(30): for _ in range(30):
mon_sess.run(train_op) mon_sess.run(train_op)
self.assertRegexpMatches( self.assertRegexpMatches(