Set 'GRPC_FAIL_FAST' to 'user_caller' in MultiProcessCluster
.
PiperOrigin-RevId: 338147299 Change-Id: I3b8bdcbfe5d57fcd630dd69a138d07756bef0a82
This commit is contained in:
parent
6ea5a4b73f
commit
ce8d708785
@ -234,6 +234,11 @@ class MultiProcessCluster(object):
|
||||
server_config = config_pb2.ConfigProto()
|
||||
server_config.device_count['GPU'] = 0
|
||||
|
||||
# Set the environment variable to prevent hanging upon job failure and
|
||||
# restart. Note that it defaults to 'use_caller' at Google, but defaults
|
||||
# to False in OSS.
|
||||
os.environ['GRPC_FAIL_FAST'] = 'use_caller'
|
||||
|
||||
server_lib.Server(
|
||||
cluster_spec,
|
||||
job_name=task_type,
|
||||
|
Loading…
Reference in New Issue
Block a user