[RunHandler] Fix wait-for-handler code when timeout is not set.

Previously we were setting a (very short) deadline when `call_timeout == 0`, whereas this should be treated as an indefinite deadline.

PiperOrigin-RevId: 292241523
Change-Id: I659886f0f1642b6683c4c2ff44d74ae7bec29620
This commit is contained in:
Derek Murray 2020-01-29 16:22:37 -08:00 committed by TensorFlower Gardener
parent 39fd4e7c7b
commit f3117e8ec1
2 changed files with 40 additions and 4 deletions
tensorflow/core/framework

View File

@ -36,7 +36,9 @@ limitations under the License.
namespace tensorflow {
namespace {
// LINT.IfChange
static constexpr int32 kMaxConcurrentHandlers = 128;
// LINT.ThenChange(//tensorflow/core/framework/run_handler_test.cc)
// TODO(azaks): Refactor with thread:ThreadPool
class RunHandlerEnvironment {
@ -948,16 +950,18 @@ class RunHandlerPool::Impl {
RunHandler::Impl* handler_impl;
{
mutex_lock l(mu_);
if (free_handlers_.empty()) {
if (!has_free_handler()) {
profiler::TraceMe activity(
[&] {
return strings::StrCat("WaitingForHandler#step_id=", step_id,
"#");
},
profiler::TraceMeLevel::kInfo);
if (!mu_.AwaitWithDeadline(
Condition(this, &Impl::has_free_handler),
EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) {
if (timeout_in_ms == 0) {
mu_.Await(Condition(this, &Impl::has_free_handler));
} else if (!mu_.AwaitWithDeadline(
Condition(this, &Impl::has_free_handler),
EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) {
return nullptr;
}
}

View File

@ -205,5 +205,37 @@ TEST_F(RunHandlerTest, TestConcurrencyUseRunHandlerPool) {
delete tp;
}
TEST_F(RunHandlerTest, TestWaitTimeout) {
std::unique_ptr<RunHandlerPool> pool(new RunHandlerPool(1, 1));
// Get the single handler in the pool.
std::vector<std::unique_ptr<RunHandler>> blocking_handles;
const int32 kMaxConcurrentHandlers = 128; // Copied from run_handler.cc.
blocking_handles.reserve(kMaxConcurrentHandlers);
for (int i = 0; i < kMaxConcurrentHandlers; ++i) {
blocking_handles.push_back(pool->Get(i));
}
// A subsequent request with a non-zero timeout will fail by returning
// nullptr.
auto null_handle = pool->Get(128, 1);
EXPECT_EQ(null_handle.get(), nullptr);
// A subsequent request with no timeout will succeed once the blocking handle
// is returned.
auto tp = std::make_unique<thread::ThreadPool>(Env::Default(), "test", 4);
std::atomic<int64> release_time;
tp->Schedule([&blocking_handles, &release_time]() {
Env::Default()->SleepForMicroseconds(5000);
release_time = EnvTime::NowNanos();
blocking_handles[0].reset();
});
auto next_handle = pool->Get(129, 0);
EXPECT_GT(EnvTime::NowNanos(), release_time);
EXPECT_NE(next_handle.get(), nullptr);
}
} // namespace
} // namespace tensorflow