From 2147192998edc8e211d594af51709fbb628ed09d Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Fri, 5 Apr 2019 17:31:57 -0700 Subject: [PATCH] Fix the hanging in TensorBoard callback by adding step=0 to summary_ops_v2.graph. The hang is caused by TensorBoard callback using summary_ops_v2 to write a graph. summary_ops_v2.graph() doesn't actually need a global step variable but is creating one anyway (due to the structure of event log files), which causes a problem since the chief worker needs to send the initial value through collective_ops to other workers, but other workers don't have corresponding receiving call due to the lack of TensorBoard callback with ModelCheckpoint._chief_worker_only=True. PiperOrigin-RevId: 242224121 --- tensorflow/python/keras/callbacks.py | 2 +- tensorflow/python/keras/callbacks_v1.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 3418b8892c6..bbb93deec99 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -1304,7 +1304,7 @@ class TensorBoard(Callback): with self._get_writer(self._train_run_name).as_default(): with summary_ops_v2.always_record_summaries(): if not model.run_eagerly: - summary_ops_v2.graph(K.get_graph()) + summary_ops_v2.graph(K.get_graph(), step=0) summary_writable = ( self.model._is_graph_network or # pylint: disable=protected-access diff --git a/tensorflow/python/keras/callbacks_v1.py b/tensorflow/python/keras/callbacks_v1.py index e403887181c..db0d2b9f4b5 100644 --- a/tensorflow/python/keras/callbacks_v1.py +++ b/tensorflow/python/keras/callbacks_v1.py @@ -168,7 +168,7 @@ class TensorBoard(callbacks.Callback): self.writer = summary_ops_v2.create_file_writer(self.log_dir) if not model.run_eagerly and self.write_graph: with self.writer.as_default(): - summary_ops_v2.graph(K.get_graph()) + summary_ops_v2.graph(K.get_graph(), step=0) elif self.write_graph: self.writer = tf_summary.FileWriter(self.log_dir, K.get_graph()) else: