[tf.data service] Improve error message when non-fault-tolerant dispatcher restarts.

PiperOrigin-RevId: 337932381
Change-Id: I4442113a892e07f85a630fccae9f09bff3e862c1
This commit is contained in:
Andrew Audibert 2020-10-19 14:21:17 -07:00 committed by TensorFlower Gardener
parent 646dbdc2f5
commit 135cdcee4b

View File

@ -654,7 +654,14 @@ Status DataServiceDispatcherImpl::GetTasks(const GetTasksRequest* request,
mutex_lock l(mu_); mutex_lock l(mu_);
VLOG(3) << "Looking up tasks for job client id " << request->job_client_id(); VLOG(3) << "Looking up tasks for job client id " << request->job_client_id();
std::shared_ptr<const Job> job; std::shared_ptr<const Job> job;
TF_RETURN_IF_ERROR(state_.JobForJobClientId(request->job_client_id(), job)); Status s = state_.JobForJobClientId(request->job_client_id(), job);
if (errors::IsNotFound(s) && !config_.fault_tolerant_mode()) {
return errors::NotFound(
"Unknown job client id ", request->job_client_id(),
". The dispatcher is not configured to be fault tolerant, so this "
"could be caused by a dispatcher restart.");
}
TF_RETURN_IF_ERROR(s);
std::vector<std::shared_ptr<const Task>> tasks; std::vector<std::shared_ptr<const Task>> tasks;
TF_RETURN_IF_ERROR(state_.TasksForJob(job->job_id, tasks)); TF_RETURN_IF_ERROR(state_.TasksForJob(job->job_id, tasks));
for (const auto& task : tasks) { for (const auto& task : tasks) {