Set 'GRPC_FAIL_FAST' to 'user_caller' in MultiProcessCluster.

PiperOrigin-RevId: 338147299
Change-Id: I3b8bdcbfe5d57fcd630dd69a138d07756bef0a82
This commit is contained in:
Chenkai Kuang 2020-10-20 15:11:58 -07:00 committed by TensorFlower Gardener
parent 6ea5a4b73f
commit ce8d708785

View File

@ -234,6 +234,11 @@ class MultiProcessCluster(object):
server_config = config_pb2.ConfigProto()
server_config.device_count['GPU'] = 0
# Set the environment variable to prevent hanging upon job failure and
# restart. Note that it defaults to 'use_caller' at Google, but defaults
# to False in OSS.
os.environ['GRPC_FAIL_FAST'] = 'use_caller'
server_lib.Server(
cluster_spec,
job_name=task_type,