From 135cdcee4b3ef76e874a51c7af6035741e884f40 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 19 Oct 2020 14:21:17 -0700 Subject: [PATCH] [tf.data service] Improve error message when non-fault-tolerant dispatcher restarts. PiperOrigin-RevId: 337932381 Change-Id: I4442113a892e07f85a630fccae9f09bff3e862c1 --- tensorflow/core/data/service/dispatcher_impl.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index c8eddaefeea..4e9d4bcba49 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -654,7 +654,14 @@ Status DataServiceDispatcherImpl::GetTasks(const GetTasksRequest* request, mutex_lock l(mu_); VLOG(3) << "Looking up tasks for job client id " << request->job_client_id(); std::shared_ptr job; - TF_RETURN_IF_ERROR(state_.JobForJobClientId(request->job_client_id(), job)); + Status s = state_.JobForJobClientId(request->job_client_id(), job); + if (errors::IsNotFound(s) && !config_.fault_tolerant_mode()) { + return errors::NotFound( + "Unknown job client id ", request->job_client_id(), + ". The dispatcher is not configured to be fault tolerant, so this " + "could be caused by a dispatcher restart."); + } + TF_RETURN_IF_ERROR(s); std::vector> tasks; TF_RETURN_IF_ERROR(state_.TasksForJob(job->job_id, tasks)); for (const auto& task : tasks) {