CLN: remove noisy warning in StepCounterHook

This commit is contained in:
Yan Facai (颜发才) 2018-10-14 16:35:12 +08:00
parent b47a10d0a2
commit f1757f5a58
No known key found for this signature in database
GPG Key ID: 6F1973CF5C31A4A6
2 changed files with 11 additions and 15 deletions

View File

@ -653,7 +653,6 @@ class StepCounterHook(session_run_hook.SessionRunHook):
self._summary_writer = summary_writer self._summary_writer = summary_writer
self._output_dir = output_dir self._output_dir = output_dir
self._last_global_step = None self._last_global_step = None
self._global_step_check_count = 0
self._steps_per_run = 1 self._steps_per_run = 1
def _set_steps_per_run(self, steps_per_run): def _set_steps_per_run(self, steps_per_run):
@ -698,22 +697,19 @@ class StepCounterHook(session_run_hook.SessionRunHook):
# step value such that the comparison could be unreliable. For simplicity, # step value such that the comparison could be unreliable. For simplicity,
# we just compare the stale_global_step with previously recorded version. # we just compare the stale_global_step with previously recorded version.
if stale_global_step == self._last_global_step: if stale_global_step == self._last_global_step:
# Here, we use a counter to count how many times we have observed that the # Here, we give a warning in the first 5 times if we have observed that the
# global step has not been increased. For some Optimizers, the global step # global step has not been increased. For some Optimizers, the global step
# is not increased each time by design. For example, SyncReplicaOptimizer # is not increased each time by design. For example, SyncReplicaOptimizer
# doesn't increase the global step in worker's main train step. # doesn't increase the global step in worker's main train step.
self._global_step_check_count += 1 logging.log_first_n(
if self._global_step_check_count % 20 == 0: logging.WARN,
self._global_step_check_count = 0
logging.warning(
"It seems that global step (tf.train.get_global_step) has not " "It seems that global step (tf.train.get_global_step) has not "
"been increased. Current value (could be stable): %s vs previous " "been increased. Current value (could be stable): %s vs previous "
"value: %s. You could increase the global step by passing " "value: %s. You could increase the global step by passing "
"tf.train.get_global_step() to Optimizer.apply_gradients or " "tf.train.get_global_step() to Optimizer.apply_gradients or "
"Optimizer.minimize.", stale_global_step, self._last_global_step) "Optimizer.minimize.",
else: 5,
# Whenever we observe the increment, reset the counter. stale_global_step, self._last_global_step)
self._global_step_check_count = 0
self._last_global_step = stale_global_step self._last_global_step = stale_global_step

View File

@ -1013,7 +1013,7 @@ class StepCounterHookTest(test.TestCase):
hook.begin() hook.begin()
mon_sess = monitored_session._HookedSession(sess, [hook]) mon_sess = monitored_session._HookedSession(sess, [hook])
mon_sess.run(train_op) # Run one step to record global step. mon_sess.run(train_op) # Run one step to record global step.
with test.mock.patch.object(tf_logging, 'warning') as mock_log: with test.mock.patch.object(tf_logging, 'log_first_n') as mock_log:
for _ in range(30): for _ in range(30):
mon_sess.run(train_op) mon_sess.run(train_op)
self.assertRegexpMatches( self.assertRegexpMatches(