[tf.data service] Relax locking during round robin deepcopy.

Copying data out of the round robin buffer requires only a shared lock, but we were using an exclusive lock. This causes contention among consumers, especially when dataset elements are large.

PiperOrigin-RevId: 351879643
Change-Id: I407fd1d5b296dcf514bc055fa5477c91430e8ccd
This commit is contained in:
Andrew Audibert 2021-01-14 14:39:43 -08:00 committed by TensorFlower Gardener
parent e97de5bfd7
commit 631fcba18c

View File

@ -101,13 +101,15 @@ Status RoundRobinTaskRunner::GetNext(const Request& request,
}
VLOG(2) << "Received request from consumer index " << request.consumer_index
<< " for round " << request.round_index;
{
mutex_lock l(mu_);
absl::flat_hash_set<int64>& round = requests_[request.round_index];
first_round_ = std::min(first_round_, request.round_index);
round.insert(request.consumer_index);
if (current_round_ < request.round_index && round.size() == num_consumers_) {
VLOG(1) << "Starting normal round with round index " << request.round_index;
if (current_round_ < request.round_index &&
round.size() == num_consumers_) {
VLOG(1) << "Starting normal round with round index "
<< request.round_index;
// This was the last request to arrive, time to start a new round.
TF_RETURN_IF_ERROR(FillBuffer());
VLOG(1) << "Finished preparing data for round " << request.round_index;
@ -134,8 +136,10 @@ Status RoundRobinTaskRunner::GetNext(const Request& request,
}
}
end_of_task = end_of_task_;
}
if (!end_of_task) {
element.clear();
tf_shared_lock l(mu_);
for (auto& component : buffer_[request.consumer_index]) {
element.push_back(tensor::DeepCopy(component));
}