From d1022145203c1edb81a39010da3f61207533091d Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 13 May 2019 17:02:18 -0700 Subject: [PATCH] Factor out GPUMemAllocator into its own include. This allows using GPUMemAllocator without pulling in TF protobufs as a dependency. PiperOrigin-RevId: 248040160 --- tensorflow/core/BUILD | 19 ++++- .../common_runtime/gpu/gpu_bfc_allocator.h | 54 +------------ .../common_runtime/gpu/gpu_mem_allocator.h | 78 +++++++++++++++++++ 3 files changed, 96 insertions(+), 55 deletions(-) create mode 100644 tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 553d1901a9e..4472b4a72dd 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3447,16 +3447,17 @@ cc_library( ) GPU_RUNTIME_HEADERS = [ - "common_runtime/gpu/gpu_host_allocator.h", "common_runtime/gpu/gpu_bfc_allocator.h", "common_runtime/gpu/gpu_cudamalloc_allocator.h", "common_runtime/gpu/gpu_debug_allocator.h", "common_runtime/gpu/gpu_device.h", + "common_runtime/gpu/gpu_host_allocator.h", "common_runtime/gpu/gpu_id.h", "common_runtime/gpu/gpu_id_manager.h", "common_runtime/gpu/gpu_id_utils.h", "common_runtime/gpu/gpu_init.h", "common_runtime/gpu/gpu_managed_allocator.h", + "common_runtime/gpu/gpu_mem_allocator.h", "common_runtime/gpu/gpu_process_state.h", "common_runtime/gpu/gpu_stream_util.h", "common_runtime/gpu/gpu_util.h", @@ -3520,16 +3521,30 @@ tf_cuda_library( name = "gpu_bfc_allocator", srcs = [ "common_runtime/gpu/gpu_bfc_allocator.cc", - "common_runtime/gpu/gpu_id.h", ], hdrs = ["common_runtime/gpu/gpu_bfc_allocator.h"], features = ["parse_headers"], visibility = ["//visibility:public"], deps = [ ":bfc_allocator", + ":gpu_mem_allocator", ":lib", ":lib_internal", ":protos_all_cc", + ], +) + +tf_cuda_library( + name = "gpu_mem_allocator", + srcs = [ + "common_runtime/gpu/gpu_id.h", + ], + hdrs = ["common_runtime/gpu/gpu_mem_allocator.h"], + features = ["parse_headers"], + visibility = ["//visibility:public"], + deps = [ + ":allocator", + ":lib_internal", ":stream_executor", ], ) diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index fb4ca3ffdb7..5cae743115f 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -22,65 +22,13 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/common_runtime/gpu/gpu_id.h" -#include "tensorflow/core/platform/stream_executor.h" +#include "tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { -// Suballocator for GPU memory. -class GPUMemAllocator : public SubAllocator { - public: - // 'platform_gpu_id' refers to the ID of the GPU device within - // the process and must reference a valid ID in the process. - // Note: stream_exec cannot be null. - explicit GPUMemAllocator(se::StreamExecutor* stream_exec, - PlatformGpuId gpu_id, bool use_unified_memory, - const std::vector& alloc_visitors, - const std::vector& free_visitors) - : SubAllocator(alloc_visitors, free_visitors), - stream_exec_(stream_exec), - gpu_id_(gpu_id), - use_unified_memory_(use_unified_memory) { - CHECK(stream_exec_ != nullptr); - } - ~GPUMemAllocator() override {} - - void* Alloc(size_t alignment, size_t num_bytes) override { - void* ptr = nullptr; - if (num_bytes > 0) { - if (use_unified_memory_) { - ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes); - } else { - ptr = stream_exec_->AllocateArray(num_bytes).opaque(); - } - VisitAlloc(ptr, gpu_id_.value(), num_bytes); - } - return ptr; - } - - void Free(void* ptr, size_t num_bytes) override { - if (ptr != nullptr) { - VisitFree(ptr, gpu_id_.value(), num_bytes); - if (use_unified_memory_) { - stream_exec_->UnifiedMemoryDeallocate(ptr); - } else { - se::DeviceMemoryBase gpu_ptr(ptr); - stream_exec_->Deallocate(&gpu_ptr); - } - } - } - - private: - se::StreamExecutor* stream_exec_; // not owned, non-null - const PlatformGpuId gpu_id_; - const bool use_unified_memory_ = false; - - TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator); -}; - // A GPU memory allocator that implements a 'best-fit with coalescing' // algorithm. class GPUBFCAllocator : public BFCAllocator { diff --git a/tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h new file mode 100644 index 00000000000..e14f2d9377a --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h @@ -0,0 +1,78 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_ + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace tensorflow { + +// Suballocator for GPU memory. +class GPUMemAllocator : public SubAllocator { + public: + // 'platform_gpu_id' refers to the ID of the GPU device within + // the process and must reference a valid ID in the process. + // Note: stream_exec cannot be null. + explicit GPUMemAllocator(se::StreamExecutor* stream_exec, + PlatformGpuId gpu_id, bool use_unified_memory, + const std::vector& alloc_visitors, + const std::vector& free_visitors) + : SubAllocator(alloc_visitors, free_visitors), + stream_exec_(stream_exec), + gpu_id_(gpu_id), + use_unified_memory_(use_unified_memory) { + CHECK(stream_exec_ != nullptr); + } + ~GPUMemAllocator() override {} + + void* Alloc(size_t alignment, size_t num_bytes) override { + void* ptr = nullptr; + if (num_bytes > 0) { + if (use_unified_memory_) { + ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes); + } else { + ptr = stream_exec_->AllocateArray(num_bytes).opaque(); + } + VisitAlloc(ptr, gpu_id_.value(), num_bytes); + } + return ptr; + } + + void Free(void* ptr, size_t num_bytes) override { + if (ptr != nullptr) { + VisitFree(ptr, gpu_id_.value(), num_bytes); + if (use_unified_memory_) { + stream_exec_->UnifiedMemoryDeallocate(ptr); + } else { + se::DeviceMemoryBase gpu_ptr(ptr); + stream_exec_->Deallocate(&gpu_ptr); + } + } + } + + private: + se::StreamExecutor* stream_exec_; // not owned, non-null + const PlatformGpuId gpu_id_; + const bool use_unified_memory_ = false; + + TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_