Make CUDA host allocator fetch one of the available stream executors

in the process, not the 0th one, which may not be visible to the process.

Fixes  (for real this time?)
Change: 132128469
This commit is contained in:
Vijay Vasudevan 2016-09-02 19:32:03 -08:00 committed by TensorFlower Gardener
parent f504644d15
commit 5a96e4e672

View File

@ -181,12 +181,25 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
// different numa_nodes. For now, just one. // different numa_nodes. For now, just one.
numa_node = 0; numa_node = 0;
mutex_lock lock(mu_); mutex_lock lock(mu_);
// Find the first valid StreamExecutor to request CUDA host memory
// through, since any will work.
//
// This search isn't super clean, and it would be nice to use a
// better source of information about which executor to use. For
// example, process_state could maybe save the first stream executor
// it knows is valid.
gpu::StreamExecutor* se = nullptr;
for (size_t i = 0; i < gpu_allocators_.size(); ++i) {
if (gpu_allocators_[i] != nullptr) {
se = GPUMachineManager()->ExecutorForDevice(i).ValueOrDie();
break;
}
}
CHECK_NE(nullptr, se);
while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) { while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) {
// CUDAHost alloc the same across all gpus, so just get the
// executor for the first device.
gpu::Platform* gpu_platform = GPUMachineManager();
gpu::StreamExecutor* se = gpu_platform->ExecutorForDevice(0).ValueOrDie();
CHECK(se);
Allocator* allocator = nullptr; Allocator* allocator = nullptr;
static constexpr bool kCudaHostMemoryUseBFC = true; static constexpr bool kCudaHostMemoryUseBFC = true;
if (kCudaHostMemoryUseBFC) { if (kCudaHostMemoryUseBFC) {