Remove catching InternalError for failed tensor copies as worker failures.

PiperOrigin-RevId: 337976482
Change-Id: Ic385376d39cd6928b39dea985a300a0be14bcc74
This commit is contained in:
Haoyu Zhang 2020-10-19 18:35:15 -07:00 committed by TensorFlower Gardener
parent ab9b5f5b05
commit 80999cc508

View File

@ -1180,14 +1180,8 @@ def _is_worker_failure(error):
# remote_handle" part.
return True
# TODO(b/162541228): The following 3 types of errors are very rare and only
# TODO(b/162541228): The following 2 types of errors are very rare and only
# observed in large-scale testing. The types of errors should be reduced.
# This error could show up when copying function inputs from remote tasks.
if isinstance(error, errors.InternalError):
if ("Failed copying input tensor" in str(error) or
"Unable to find a context_id" in str(error)):
return True
# This could happen when the function registration fails. In the observed
# cases this only happens to the dataset related functions.
if isinstance(error, errors.NotFoundError):