Enable SubAllocator to inform the main allocator of an over-allocation.

If a suballocator has some alignment constraints that requires a minimum
allocation size, it will return the true number of bytes allocated in the
bytes_received argument.

PiperOrigin-RevId: 343598132
Change-Id: I0bfe01e5a952ba0a924da23449336fa9fe94d50c
This commit is contained in:
A. Unique TensorFlower 2020-11-20 17:52:47 -08:00 committed by TensorFlower Gardener
parent bd60d491c2
commit e984937a4b
7 changed files with 27 additions and 11 deletions

View File

@ -125,7 +125,8 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
// Try allocating.
size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
void* mem_addr = sub_allocator_->Alloc(alignment, bytes);
size_t bytes_received;
void* mem_addr = sub_allocator_->Alloc(alignment, bytes, &bytes_received);
if (mem_addr == nullptr && !started_backpedal_) {
// Only backpedal once.
started_backpedal_ = true;
@ -136,7 +137,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
while (mem_addr == nullptr) {
bytes = RoundedBytes(bytes * kBackpedalFactor);
if (bytes < rounded_bytes) break;
mem_addr = sub_allocator_->Alloc(alignment, bytes);
mem_addr = sub_allocator_->Alloc(alignment, bytes, &bytes_received);
}
}
@ -158,7 +159,7 @@ bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
VLOG(1) << "Allocated memory at " << mem_addr << " to "
<< static_cast<void*>(static_cast<char*>(mem_addr) + bytes);
region_manager_.AddAllocationRegion(mem_addr, bytes);
region_manager_.AddAllocationRegion(mem_addr, bytes_received);
// Create one large chunk for the whole memory space that will
// be chunked later.

View File

@ -36,8 +36,10 @@ class DeviceHostAllocator : public SubAllocator {
}
~DeviceHostAllocator() override {}
void* Alloc(size_t alignment, size_t num_bytes) override {
void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override {
void* ptr = nullptr;
*bytes_received = num_bytes;
if (num_bytes > 0) {
ptr = stream_exec_->HostMemoryAllocate(num_bytes);
if (ptr == nullptr) {

View File

@ -41,8 +41,10 @@ class DeviceMemAllocator : public SubAllocator {
}
~DeviceMemAllocator() override {}
void* Alloc(size_t alignment, size_t num_bytes) override {
void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override {
void* ptr = nullptr;
*bytes_received = num_bytes;
if (num_bytes > 0) {
if (use_unified_memory_) {
ptr = stream_exec_->UnifiedMemoryAllocate(num_bytes);

View File

@ -127,8 +127,9 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
delete pr;
return PrepareChunk(r, alignment, num_bytes);
} else {
void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
return PrepareChunk(ptr, alignment, num_bytes);
size_t bytes_received;
void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes, &bytes_received);
return PrepareChunk(ptr, alignment, bytes_received);
}
}
@ -256,8 +257,10 @@ void PoolAllocator::EvictOne() {
}
}
void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes) {
void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) {
void* ptr = nullptr;
*bytes_received = num_bytes;
if (num_bytes > 0) {
if (numa_node_ == port::kNUMANoAffinity) {
ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));

View File

@ -22,6 +22,7 @@ limitations under the License.
#include <map>
#include <memory>
#include <vector>
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/lib/core/bits.h"
#include "tensorflow/core/platform/logging.h"
@ -154,7 +155,8 @@ class BasicCPUAllocator : public SubAllocator {
~BasicCPUAllocator() override {}
void* Alloc(size_t alignment, size_t num_bytes) override;
void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override;
void Free(void* ptr, size_t num_bytes) override;

View File

@ -439,7 +439,11 @@ class SubAllocator {
const std::vector<Visitor>& free_visitors);
virtual ~SubAllocator() {}
virtual void* Alloc(size_t alignment, size_t num_bytes) = 0;
// Allocates at least num_bytes. Returns actual number of bytes allocated in
// bytes_received. The caller can safely use the full bytes_received sized
// buffer following the returend pointer.
virtual void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) = 0;
virtual void Free(void* ptr, size_t num_bytes) = 0;
protected:

View File

@ -156,7 +156,9 @@ class CPUAllocatorFactory : public AllocatorFactory {
explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
: SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
void* Alloc(size_t alignment, size_t num_bytes) override {
void* Alloc(size_t alignment, size_t num_bytes,
size_t* bytes_received) override {
*bytes_received = num_bytes;
return cpu_allocator_->AllocateRaw(alignment, num_bytes);
}