From 80999cc5086c635ad8ca564c648eaed45c99ecbc Mon Sep 17 00:00:00 2001
From: Haoyu Zhang <haoyuzhang@google.com>
Date: Mon, 19 Oct 2020 18:35:15 -0700
Subject: [PATCH] Remove catching InternalError for failed tensor copies as
 worker failures.

PiperOrigin-RevId: 337976482
Change-Id: Ic385376d39cd6928b39dea985a300a0be14bcc74
---
 tensorflow/python/distribute/client/client.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tensorflow/python/distribute/client/client.py b/tensorflow/python/distribute/client/client.py
index be7157c1fea..a5eb9ff5e21 100644
--- a/tensorflow/python/distribute/client/client.py
+++ b/tensorflow/python/distribute/client/client.py
@@ -1180,14 +1180,8 @@ def _is_worker_failure(error):
       # remote_handle" part.
       return True
 
-  # TODO(b/162541228): The following 3 types of errors are very rare and only
+  # TODO(b/162541228): The following 2 types of errors are very rare and only
   # observed in large-scale testing. The types of errors should be reduced.
-  # This error could show up when copying function inputs from remote tasks.
-  if isinstance(error, errors.InternalError):
-    if ("Failed copying input tensor" in str(error) or
-        "Unable to find a context_id" in str(error)):
-      return True
-
   # This could happen when the function registration fails. In the observed
   # cases this only happens to the dataset related functions.
   if isinstance(error, errors.NotFoundError):