Catch and suppress NotFoundError possibly raised by get_checkpoint_mtimes in rare case in the recover_last_checkpoints method of saver.py.

PiperOrigin-RevId: 339175562
Change-Id: I9c698b9b42ae28ca1e60173e6000739f56dd79fc
This commit is contained in:
A. Unique TensorFlower 2020-10-26 20:59:00 -07:00 committed by TensorFlower Gardener
parent c701bc5780
commit 3315eb97c0

View File

@ -1073,7 +1073,12 @@ class Saver(object):
"""
checkpoints_with_mtimes = []
for checkpoint_path in checkpoint_paths:
mtime = checkpoint_management.get_checkpoint_mtimes([checkpoint_path])
try:
mtime = checkpoint_management.get_checkpoint_mtimes([checkpoint_path])
except errors.NotFoundError:
# It's fine if some other thread/process is deleting some older
# checkpoint concurrently.
continue
if mtime:
checkpoints_with_mtimes.append((checkpoint_path, mtime[0]))
self.set_last_checkpoints_with_time(checkpoints_with_mtimes)