Make GRPC retries configurable.
PiperOrigin-RevId: 328965782 Change-Id: I275e085eba595cb7e49a6837b8ed92e4608e72eb
This commit is contained in:
parent
3c0a8a4a8f
commit
c413149b9b
@ -35,11 +35,10 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/tracing.h"
|
||||
#include "tensorflow/core/protobuf/transport_options.pb.h"
|
||||
#include "tensorflow/core/protobuf/worker.pb.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
const int kMaxWorkerRpcRetries = 10;
|
||||
|
||||
class GrpcRemoteWorker : public WorkerInterface {
|
||||
public:
|
||||
explicit GrpcRemoteWorker(SharedGrpcChannelPtr channel,
|
||||
@ -274,7 +273,7 @@ class GrpcRemoteWorker : public WorkerInterface {
|
||||
bool fail_fast = true) {
|
||||
new RPCState<protobuf::Message>(
|
||||
&stub_, cq_, method, *request, response, std::move(done), call_opts,
|
||||
callback_threadpool_, /*max_retries=*/0, fail_fast, &target_);
|
||||
callback_threadpool_, MaxRetries(), fail_fast, &target_);
|
||||
}
|
||||
|
||||
void IssueRequest(const protobuf::Message* request, TensorResponse* response,
|
||||
@ -282,7 +281,7 @@ class GrpcRemoteWorker : public WorkerInterface {
|
||||
CallOptions* call_opts = nullptr) {
|
||||
new RPCState<TensorResponse>(&stub_, cq_, method, *request, response,
|
||||
std::move(done), call_opts,
|
||||
callback_threadpool_, /*max_retries=*/0,
|
||||
callback_threadpool_, MaxRetries(),
|
||||
/*fail_fast=*/true, &target_);
|
||||
}
|
||||
|
||||
@ -299,6 +298,14 @@ class GrpcRemoteWorker : public WorkerInterface {
|
||||
// Helper function for initializing the RpcMethod objects below.
|
||||
const char* Method(GrpcWorkerMethod id) { return GrpcWorkerMethodName(id); }
|
||||
|
||||
// Helper function for configuring max GRPC retries. Defaults to 0 (no
|
||||
// retries).
|
||||
const int64 MaxRetries() {
|
||||
int64 max_retries = -1;
|
||||
TF_CHECK_OK(ReadInt64FromEnvVar("GRPC_MAX_RETRIES", 0, &max_retries));
|
||||
return max_retries;
|
||||
}
|
||||
|
||||
SharedGrpcChannelPtr channel_;
|
||||
::grpc::GenericStub stub_;
|
||||
::grpc::CompletionQueue* cq_;
|
||||
|
Loading…
x
Reference in New Issue
Block a user