Add memory fragmentation instrumentation for BFCAllocator.

PiperOrigin-RevId: 307956353
Change-Id: Ia3a02a3161e1d8d3177ca22ffada6b3520953763
This commit is contained in:
A. Unique TensorFlower 2020-04-22 19:35:46 -07:00 committed by TensorFlower Gardener
parent 074b77f519
commit 7bd0df753f
2 changed files with 45 additions and 35 deletions

View File

@ -441,8 +441,30 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
return nullptr;
}
int64 BFCAllocator::LargestFreeChunk() {
for (int i = kNumBins - 1; i >= 0; i--) {
if (!BinFromIndex(i)->free_chunks.empty()) {
return ChunkFromHandle(*BinFromIndex(i)->free_chunks.rbegin())->size;
}
}
return 0;
}
double BFCAllocator::GetFragmentation() {
int64 bytes_available = total_region_allocated_bytes_ - stats_.bytes_in_use;
DCHECK_GT(bytes_available, 0);
return static_cast<double>(bytes_available - LargestFreeChunk()) /
bytes_available;
}
void BFCAllocator::AddTraceMe(absl::string_view traceme_name, const void* ptr) {
BFCAllocator::Chunk* chunk = ChunkFromHandle(region_manager_.get_handle(ptr));
AddTraceMe(traceme_name, chunk->ptr, chunk->requested_size, chunk->size);
}
void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
const void* chunk_ptr) {
const void* chunk_ptr, int64 req_bytes,
int64 alloc_bytes) {
// Internal users will see the memory profile with default trace level.
auto traceme_level = profiler::TraceMeLevel::kVerbose;
#ifdef PLATFORM_GOOGLE
@ -454,21 +476,19 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
AllocatorStats stats = stats_;
int64 bytes_available =
memory_limit_ - stats.bytes_reserved - stats.bytes_in_use;
BFCAllocator::Chunk* chunk =
ChunkFromHandle(region_manager_.get_handle(chunk_ptr));
const auto& annotation =
ScopedMemoryDebugAnnotation::CurrentAnnotation();
std::string tensor_shape = annotation.pending_shape
? annotation.pending_shape->DebugString()
: "";
return absl::StrCat(traceme_name, "#allocator_name=", name_,
",bytes_reserved=", stats.bytes_reserved,
",bytes_allocated=", stats.bytes_in_use,
",bytes_available=", bytes_available,
",fragmentation=", GetFragmentation(),
",peak_bytes_in_use=", stats.peak_bytes_in_use,
",requested_bytes=", chunk->requested_size,
",allocation_bytes=", chunk->size,
",requested_bytes=", req_bytes,
",allocation_bytes=", alloc_bytes,
",addr=", reinterpret_cast<uint64>(chunk_ptr),
",tf_op=", annotation.pending_op_name,
",id=", annotation.pending_step_id,
@ -613,11 +633,13 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
// Find the chunk from the ptr.
BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
CHECK(h != kInvalidChunkHandle);
// Record chunk information before it's freed.
Chunk* chunk = ChunkFromHandle(h);
void* chunk_ptr = chunk->ptr;
int64 req_bytes = chunk->requested_size;
int64 alloc_bytes = chunk->size;
MarkFree(h);
// TraceMe needs to be added after MarkFree and before InsertFreeChunkIntoBin
// for correct memory stats.
AddTraceMe("MemoryDeallocation", ptr);
// Consider coalescing it.
if (timing_counter_) {
@ -627,6 +649,10 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
InsertFreeChunkIntoBin(TryToCoalesce(h, false));
}
// TraceMe needs to be added after MarkFree and InsertFreeChunkIntoBin for
// correct aggregation stats (bytes_in_use, fragmentation).
AddTraceMe("MemoryDeallocation", chunk_ptr, req_bytes, alloc_bytes);
if (VLOG_IS_ON(4)) {
LOG(INFO) << "F: " << RenderOccupancy();
}
@ -1115,31 +1141,6 @@ MemoryDump BFCAllocator::RecordMemoryMapInternal() {
return md;
}
double BFCAllocator::GetFragmentation() {
int64 largest_free_chunk = 0;
int64 free_bytes = 0;
for (const auto& region : region_manager_.regions()) {
ChunkHandle chunk_handle = region_manager_.get_handle(region.ptr());
while (chunk_handle != kInvalidChunkHandle) {
const Chunk* chunk = ChunkFromHandle(chunk_handle);
if (!chunk->in_use()) {
free_bytes += chunk->size;
if (chunk->size > largest_free_chunk) {
largest_free_chunk = chunk->size;
}
}
chunk_handle = chunk->next;
}
}
double frag_metric = 0.0;
if (free_bytes > 0) {
frag_metric =
(free_bytes - largest_free_chunk) / static_cast<double>(free_bytes);
}
return frag_metric;
}
absl::optional<AllocatorStats> BFCAllocator::GetStats() {
mutex_lock l(lock_);
return stats_;

View File

@ -115,10 +115,19 @@ class BFCAllocator : public Allocator {
bool MergeTimestampedChunks(size_t required_bytes)
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Return the largest free chunk bytes from the largest bin in constant time.
// The free chunks are sorted by size (and then address) in a bin.
int64 LargestFreeChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Add TraceMe (in memory allocation and deallocation) for memory stats
// profiling. The chunk_ptr is passed to get information such as address,
// chunk size and requested_size.
void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr)
void AddTraceMe(absl::string_view traceme_name, const void* ptr)
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Overloaded AddTraceMe function with chunk information.
void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
int64 req_bytes, int64 alloc_bytes)
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator