Factor out GPUMemAllocator into its own include.
This allows using GPUMemAllocator without pulling in TF protobufs as a dependency. PiperOrigin-RevId: 248040160
This commit is contained in:
parent
20f681b89a
commit
d102214520
@ -3447,16 +3447,17 @@ cc_library(
|
|||||||
)
|
)
|
||||||
|
|
||||||
GPU_RUNTIME_HEADERS = [
|
GPU_RUNTIME_HEADERS = [
|
||||||
"common_runtime/gpu/gpu_host_allocator.h",
|
|
||||||
"common_runtime/gpu/gpu_bfc_allocator.h",
|
"common_runtime/gpu/gpu_bfc_allocator.h",
|
||||||
"common_runtime/gpu/gpu_cudamalloc_allocator.h",
|
"common_runtime/gpu/gpu_cudamalloc_allocator.h",
|
||||||
"common_runtime/gpu/gpu_debug_allocator.h",
|
"common_runtime/gpu/gpu_debug_allocator.h",
|
||||||
"common_runtime/gpu/gpu_device.h",
|
"common_runtime/gpu/gpu_device.h",
|
||||||
|
"common_runtime/gpu/gpu_host_allocator.h",
|
||||||
"common_runtime/gpu/gpu_id.h",
|
"common_runtime/gpu/gpu_id.h",
|
||||||
"common_runtime/gpu/gpu_id_manager.h",
|
"common_runtime/gpu/gpu_id_manager.h",
|
||||||
"common_runtime/gpu/gpu_id_utils.h",
|
"common_runtime/gpu/gpu_id_utils.h",
|
||||||
"common_runtime/gpu/gpu_init.h",
|
"common_runtime/gpu/gpu_init.h",
|
||||||
"common_runtime/gpu/gpu_managed_allocator.h",
|
"common_runtime/gpu/gpu_managed_allocator.h",
|
||||||
|
"common_runtime/gpu/gpu_mem_allocator.h",
|
||||||
"common_runtime/gpu/gpu_process_state.h",
|
"common_runtime/gpu/gpu_process_state.h",
|
||||||
"common_runtime/gpu/gpu_stream_util.h",
|
"common_runtime/gpu/gpu_stream_util.h",
|
||||||
"common_runtime/gpu/gpu_util.h",
|
"common_runtime/gpu/gpu_util.h",
|
||||||
@ -3520,16 +3521,30 @@ tf_cuda_library(
|
|||||||
name = "gpu_bfc_allocator",
|
name = "gpu_bfc_allocator",
|
||||||
srcs = [
|
srcs = [
|
||||||
"common_runtime/gpu/gpu_bfc_allocator.cc",
|
"common_runtime/gpu/gpu_bfc_allocator.cc",
|
||||||
"common_runtime/gpu/gpu_id.h",
|
|
||||||
],
|
],
|
||||||
hdrs = ["common_runtime/gpu/gpu_bfc_allocator.h"],
|
hdrs = ["common_runtime/gpu/gpu_bfc_allocator.h"],
|
||||||
features = ["parse_headers"],
|
features = ["parse_headers"],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
":bfc_allocator",
|
":bfc_allocator",
|
||||||
|
":gpu_mem_allocator",
|
||||||
":lib",
|
":lib",
|
||||||
":lib_internal",
|
":lib_internal",
|
||||||
":protos_all_cc",
|
":protos_all_cc",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_cuda_library(
|
||||||
|
name = "gpu_mem_allocator",
|
||||||
|
srcs = [
|
||||||
|
"common_runtime/gpu/gpu_id.h",
|
||||||
|
],
|
||||||
|
hdrs = ["common_runtime/gpu/gpu_mem_allocator.h"],
|
||||||
|
features = ["parse_headers"],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":allocator",
|
||||||
|
":lib_internal",
|
||||||
":stream_executor",
|
":stream_executor",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -22,65 +22,13 @@ limitations under the License.
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/core/common_runtime/bfc_allocator.h"
|
#include "tensorflow/core/common_runtime/bfc_allocator.h"
|
||||||
#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
|
#include "tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h"
|
||||||
#include "tensorflow/core/platform/stream_executor.h"
|
|
||||||
#include "tensorflow/core/platform/thread_annotations.h"
|
#include "tensorflow/core/platform/thread_annotations.h"
|
||||||
#include "tensorflow/core/platform/types.h"
|
#include "tensorflow/core/platform/types.h"
|
||||||
#include "tensorflow/core/protobuf/config.pb.h"
|
#include "tensorflow/core/protobuf/config.pb.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
// Suballocator for GPU memory.
|
|
||||||
class GPUMemAllocator : public SubAllocator {
|
|
||||||
public:
|
|
||||||
// 'platform_gpu_id' refers to the ID of the GPU device within
|
|
||||||
// the process and must reference a valid ID in the process.
|
|
||||||
// Note: stream_exec cannot be null.
|
|
||||||
explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
|
|
||||||
PlatformGpuId gpu_id, bool use_unified_memory,
|
|
||||||
const std::vector<Visitor>& alloc_visitors,
|
|
||||||
const std::vector<Visitor>& free_visitors)
|
|
||||||
: SubAllocator(alloc_visitors, free_visitors),
|
|
||||||
stream_exec_(stream_exec),
|
|
||||||
gpu_id_(gpu_id),
|
|
||||||
use_unified_memory_(use_unified_memory) {
|
|
||||||
CHECK(stream_exec_ != nullptr);
|
|
||||||
}
|
|
||||||
~GPUMemAllocator() override {}
|
|
||||||
|
|
||||||
void* Alloc(size_t alignment, size_t num_bytes) override {
|
|
||||||
void* ptr = nullptr;
|
|
||||||
if (num_bytes > 0) {
|
|
||||||
if (use_unified_memory_) {
|
|
||||||
ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes);
|
|
||||||
} else {
|
|
||||||
ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
|
|
||||||
}
|
|
||||||
VisitAlloc(ptr, gpu_id_.value(), num_bytes);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Free(void* ptr, size_t num_bytes) override {
|
|
||||||
if (ptr != nullptr) {
|
|
||||||
VisitFree(ptr, gpu_id_.value(), num_bytes);
|
|
||||||
if (use_unified_memory_) {
|
|
||||||
stream_exec_->UnifiedMemoryDeallocate(ptr);
|
|
||||||
} else {
|
|
||||||
se::DeviceMemoryBase gpu_ptr(ptr);
|
|
||||||
stream_exec_->Deallocate(&gpu_ptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
se::StreamExecutor* stream_exec_; // not owned, non-null
|
|
||||||
const PlatformGpuId gpu_id_;
|
|
||||||
const bool use_unified_memory_ = false;
|
|
||||||
|
|
||||||
TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
|
|
||||||
};
|
|
||||||
|
|
||||||
// A GPU memory allocator that implements a 'best-fit with coalescing'
|
// A GPU memory allocator that implements a 'best-fit with coalescing'
|
||||||
// algorithm.
|
// algorithm.
|
||||||
class GPUBFCAllocator : public BFCAllocator {
|
class GPUBFCAllocator : public BFCAllocator {
|
||||||
|
78
tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h
Normal file
78
tensorflow/core/common_runtime/gpu/gpu_mem_allocator.h
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_
|
||||||
|
#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_
|
||||||
|
|
||||||
|
#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
|
||||||
|
#include "tensorflow/core/framework/allocator.h"
|
||||||
|
#include "tensorflow/core/platform/stream_executor.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
|
||||||
|
// Suballocator for GPU memory.
|
||||||
|
class GPUMemAllocator : public SubAllocator {
|
||||||
|
public:
|
||||||
|
// 'platform_gpu_id' refers to the ID of the GPU device within
|
||||||
|
// the process and must reference a valid ID in the process.
|
||||||
|
// Note: stream_exec cannot be null.
|
||||||
|
explicit GPUMemAllocator(se::StreamExecutor* stream_exec,
|
||||||
|
PlatformGpuId gpu_id, bool use_unified_memory,
|
||||||
|
const std::vector<Visitor>& alloc_visitors,
|
||||||
|
const std::vector<Visitor>& free_visitors)
|
||||||
|
: SubAllocator(alloc_visitors, free_visitors),
|
||||||
|
stream_exec_(stream_exec),
|
||||||
|
gpu_id_(gpu_id),
|
||||||
|
use_unified_memory_(use_unified_memory) {
|
||||||
|
CHECK(stream_exec_ != nullptr);
|
||||||
|
}
|
||||||
|
~GPUMemAllocator() override {}
|
||||||
|
|
||||||
|
void* Alloc(size_t alignment, size_t num_bytes) override {
|
||||||
|
void* ptr = nullptr;
|
||||||
|
if (num_bytes > 0) {
|
||||||
|
if (use_unified_memory_) {
|
||||||
|
ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes);
|
||||||
|
} else {
|
||||||
|
ptr = stream_exec_->AllocateArray<char>(num_bytes).opaque();
|
||||||
|
}
|
||||||
|
VisitAlloc(ptr, gpu_id_.value(), num_bytes);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Free(void* ptr, size_t num_bytes) override {
|
||||||
|
if (ptr != nullptr) {
|
||||||
|
VisitFree(ptr, gpu_id_.value(), num_bytes);
|
||||||
|
if (use_unified_memory_) {
|
||||||
|
stream_exec_->UnifiedMemoryDeallocate(ptr);
|
||||||
|
} else {
|
||||||
|
se::DeviceMemoryBase gpu_ptr(ptr);
|
||||||
|
stream_exec_->Deallocate(&gpu_ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
se::StreamExecutor* stream_exec_; // not owned, non-null
|
||||||
|
const PlatformGpuId gpu_id_;
|
||||||
|
const bool use_unified_memory_ = false;
|
||||||
|
|
||||||
|
TF_DISALLOW_COPY_AND_ASSIGN(GPUMemAllocator);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_MEM_ALLOCATOR_H_
|
Loading…
Reference in New Issue
Block a user