Use caller fail_fast argument when GRPC_FAIL_FAST is not set.

PiperOrigin-RevId: 337308186
Change-Id: I2e80be55d00eb76f773ec8c8078f975e80a2aa88
This commit is contained in:
Haoyu Zhang 2020-10-15 08:01:21 -07:00 committed by TensorFlower Gardener
parent 0e2fb37e32
commit 44c1dfd80a

View File

@ -49,19 +49,40 @@ class RPCState : public GrpcClientCQTag {
: RPCState(
stub, cq, method, request, response, std::move(done), call_opts,
threadpool,
// 1) If GRPC_FAIL_FAST is specified, fail_fast=$GRPC_FAIL_FAST.
// See b/141948186.
// 2) Otherwise, if the platform is Google, use the fail_fast from
// the caller. See b/140260119.
// 3) Otherwise, use fail_fast=false.
[fail_fast]() -> bool {
bool x;
// 1) If GRPC_FAIL_FAST is set to 'true' or 'false',
// fail_fast=$GRPC_FAIL_FAST. See b/141948186.
// 2) Otherwise if GRPC_FAIL_FAST is set to 'use_caller', use the
// fail_fast from the caller. See b/140260119.
//
// Current default for PLATFORM_GOOGLE: use caller fail_fast;
// Current default for open source: fail_fast=false.
//
// NOTE: Callers mostly set fail_fast=true to prevent job hanging
// on worker task failures, except a few cases such as GetStatus
// in cluster initialization and collective param resolution.
[fail_fast, &done]() -> bool {
string fail_fast_env;
#if defined(PLATFORM_GOOGLE)
TF_CHECK_OK(ReadBoolFromEnvVar("GRPC_FAIL_FAST", fail_fast, &x));
TF_CHECK_OK(ReadStringFromEnvVar("GRPC_FAIL_FAST", "use_caller",
&fail_fast_env));
#else
TF_CHECK_OK(ReadBoolFromEnvVar("GRPC_FAIL_FAST", false, &x));
TF_CHECK_OK(ReadStringFromEnvVar("GRPC_FAIL_FAST", "false",
&fail_fast_env));
#endif // PLATFORM_GOOGLE
return x;
string fail_fast_env_lower = absl::AsciiStrToLower(fail_fast_env);
if (fail_fast_env_lower == "true") {
return true;
} else if (fail_fast_env_lower == "use_caller") {
return fail_fast;
} else if (fail_fast_env_lower == "false") {
return false;
} else {
string error_message = strings::StrCat(
"Invalid GRPC_FAIL_FAST config: ", fail_fast_env);
LOG(WARNING) << error_message;
done(errors::InvalidArgument(error_message));
return false;
}
}(),
/*timeout_in_ms=*/0, max_retries, target) {
}