Enable SubAllocator to inform the main allocator of an over-allocation.

If a suballocator has some alignment constraints that requires a minimum
allocation size, it will return the true number of bytes allocated in the
bytes_received argument.

PiperOrigin-RevId: 343598132
Change-Id: I0bfe01e5a952ba0a924da23449336fa9fe94d50c
This commit is contained in:
A. Unique TensorFlower 2020-11-20 17:52:47 -08:00 committed by TensorFlower Gardener
parent bd60d491c2
commit e984937a4b
7 changed files with 27 additions and 11 deletions

View File

@ -125,7 +125,8 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
// Try allocating. // Try allocating.
size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
void* mem_addr = sub_allocator_->Alloc(alignment, bytes); size_t bytes_received;
void* mem_addr = sub_allocator_->Alloc(alignment, bytes, &bytes_received);
if (mem_addr == nullptr && !started_backpedal_) { if (mem_addr == nullptr && !started_backpedal_) {
// Only backpedal once. // Only backpedal once.
started_backpedal_ = true; started_backpedal_ = true;
@ -136,7 +137,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
while (mem_addr == nullptr) { while (mem_addr == nullptr) {
bytes = RoundedBytes(bytes * kBackpedalFactor); bytes = RoundedBytes(bytes * kBackpedalFactor);
if (bytes < rounded_bytes) break; if (bytes < rounded_bytes) break;
mem_addr = sub_allocator_->Alloc(alignment, bytes); mem_addr = sub_allocator_->Alloc(alignment, bytes, &bytes_received);
} }
} }
@ -158,7 +159,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
VLOG(1) << "Allocated memory at " << mem_addr << " to " VLOG(1) << "Allocated memory at " << mem_addr << " to "
<< static_cast<void*>(static_cast<char*>(mem_addr) + bytes); << static_cast<void*>(static_cast<char*>(mem_addr) + bytes);
region_manager_.AddAllocationRegion(mem_addr, bytes); region_manager_.AddAllocationRegion(mem_addr, bytes_received);
// Create one large chunk for the whole memory space that will // Create one large chunk for the whole memory space that will
// be chunked later. // be chunked later.

View File

@ -36,8 +36,10 @@ class DeviceHostAllocator : public SubAllocator {
} }
~DeviceHostAllocator() override {} ~DeviceHostAllocator() override {}
void* Alloc(size_t alignment, size_t num_bytes) override { void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override {
void* ptr = nullptr; void* ptr = nullptr;
*bytes_received = num_bytes;
if (num_bytes > 0) { if (num_bytes > 0) {
ptr = stream_exec_->HostMemoryAllocate(num_bytes); ptr = stream_exec_->HostMemoryAllocate(num_bytes);
if (ptr == nullptr) { if (ptr == nullptr) {

View File

@ -41,8 +41,10 @@ class DeviceMemAllocator : public SubAllocator {
} }
~DeviceMemAllocator() override {} ~DeviceMemAllocator() override {}
void* Alloc(size_t alignment, size_t num_bytes) override { void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override {
void* ptr = nullptr; void* ptr = nullptr;
*bytes_received = num_bytes;
if (num_bytes > 0) { if (num_bytes > 0) {
if (use_unified_memory_) { if (use_unified_memory_) {
ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes); ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes);

View File

@ -127,8 +127,9 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
delete pr; delete pr;
return PrepareChunk(r, alignment, num_bytes); return PrepareChunk(r, alignment, num_bytes);
} else { } else {
void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes); size_t bytes_received;
return PrepareChunk(ptr, alignment, num_bytes); void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes, &bytes_received);
return PrepareChunk(ptr, alignment, bytes_received);
} }
} }
@ -256,8 +257,10 @@ void PoolAllocator::EvictOne() {
} }
} }
void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) { void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) {
void* ptr = nullptr; void* ptr = nullptr;
*bytes_received = num_bytes;
if (num_bytes > 0) { if (num_bytes > 0) {
if (numa_node_ == port::kNUMANoAffinity) { if (numa_node_ == port::kNUMANoAffinity) {
ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment)); ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));

View File

@ -22,6 +22,7 @@ limitations under the License.
#include <map> #include <map>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/core/bits.h"
#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/logging.h"
@ -154,7 +155,8 @@ class BasicCPUAllocator : public SubAllocator {
~BasicCPUAllocator() override {} ~BasicCPUAllocator() override {}
void* Alloc(size_t alignment, size_t num_bytes) override; void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override;
void Free(void* ptr, size_t num_bytes) override; void Free(void* ptr, size_t num_bytes) override;

View File

@ -439,7 +439,11 @@ class SubAllocator {
const std::vector<Visitor>& free_visitors); const std::vector<Visitor>& free_visitors);
virtual ~SubAllocator() {} virtual ~SubAllocator() {}
virtual void* Alloc(size_t alignment, size_t num_bytes) = 0; // Allocates at least num_bytes. Returns actual number of bytes allocated in
// bytes_received. The caller can safely use the full bytes_received sized
// buffer following the returend pointer.
virtual void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) = 0;
virtual void Free(void* ptr, size_t num_bytes) = 0; virtual void Free(void* ptr, size_t num_bytes) = 0;
protected: protected:

View File

@ -156,7 +156,9 @@ class CPUAllocatorFactory : public AllocatorFactory {
explicit CPUSubAllocator(CPUAllocator* cpu_allocator) explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
: SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {} : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
void* Alloc(size_t alignment, size_t num_bytes) override { void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override {
*bytes_received = num_bytes;
return cpu_allocator_->AllocateRaw(alignment, num_bytes); return cpu_allocator_->AllocateRaw(alignment, num_bytes);
} }