Add memory fragmentation instrumentation for BFCAllocator.
PiperOrigin-RevId: 307956353 Change-Id: Ia3a02a3161e1d8d3177ca22ffada6b3520953763
This commit is contained in:
parent
074b77f519
commit
7bd0df753f
@ -441,8 +441,30 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int64 BFCAllocator::LargestFreeChunk() {
|
||||
for (int i = kNumBins - 1; i >= 0; i--) {
|
||||
if (!BinFromIndex(i)->free_chunks.empty()) {
|
||||
return ChunkFromHandle(*BinFromIndex(i)->free_chunks.rbegin())->size;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
double BFCAllocator::GetFragmentation() {
|
||||
int64 bytes_available = total_region_allocated_bytes_ - stats_.bytes_in_use;
|
||||
DCHECK_GT(bytes_available, 0);
|
||||
return static_cast<double>(bytes_available - LargestFreeChunk()) /
|
||||
bytes_available;
|
||||
}
|
||||
|
||||
void BFCAllocator::AddTraceMe(absl::string_view traceme_name, const void* ptr) {
|
||||
BFCAllocator::Chunk* chunk = ChunkFromHandle(region_manager_.get_handle(ptr));
|
||||
AddTraceMe(traceme_name, chunk->ptr, chunk->requested_size, chunk->size);
|
||||
}
|
||||
|
||||
void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
|
||||
const void* chunk_ptr) {
|
||||
const void* chunk_ptr, int64 req_bytes,
|
||||
int64 alloc_bytes) {
|
||||
// Internal users will see the memory profile with default trace level.
|
||||
auto traceme_level = profiler::TraceMeLevel::kVerbose;
|
||||
#ifdef PLATFORM_GOOGLE
|
||||
@ -454,21 +476,19 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
|
||||
AllocatorStats stats = stats_;
|
||||
int64 bytes_available =
|
||||
memory_limit_ - stats.bytes_reserved - stats.bytes_in_use;
|
||||
BFCAllocator::Chunk* chunk =
|
||||
ChunkFromHandle(region_manager_.get_handle(chunk_ptr));
|
||||
const auto& annotation =
|
||||
ScopedMemoryDebugAnnotation::CurrentAnnotation();
|
||||
std::string tensor_shape = annotation.pending_shape
|
||||
? annotation.pending_shape->DebugString()
|
||||
: "";
|
||||
|
||||
return absl::StrCat(traceme_name, "#allocator_name=", name_,
|
||||
",bytes_reserved=", stats.bytes_reserved,
|
||||
",bytes_allocated=", stats.bytes_in_use,
|
||||
",bytes_available=", bytes_available,
|
||||
",fragmentation=", GetFragmentation(),
|
||||
",peak_bytes_in_use=", stats.peak_bytes_in_use,
|
||||
",requested_bytes=", chunk->requested_size,
|
||||
",allocation_bytes=", chunk->size,
|
||||
",requested_bytes=", req_bytes,
|
||||
",allocation_bytes=", alloc_bytes,
|
||||
",addr=", reinterpret_cast<uint64>(chunk_ptr),
|
||||
",tf_op=", annotation.pending_op_name,
|
||||
",id=", annotation.pending_step_id,
|
||||
@ -613,11 +633,13 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
|
||||
// Find the chunk from the ptr.
|
||||
BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
|
||||
CHECK(h != kInvalidChunkHandle);
|
||||
// Record chunk information before it's freed.
|
||||
Chunk* chunk = ChunkFromHandle(h);
|
||||
void* chunk_ptr = chunk->ptr;
|
||||
int64 req_bytes = chunk->requested_size;
|
||||
int64 alloc_bytes = chunk->size;
|
||||
|
||||
MarkFree(h);
|
||||
// TraceMe needs to be added after MarkFree and before InsertFreeChunkIntoBin
|
||||
// for correct memory stats.
|
||||
AddTraceMe("MemoryDeallocation", ptr);
|
||||
|
||||
// Consider coalescing it.
|
||||
if (timing_counter_) {
|
||||
@ -627,6 +649,10 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
|
||||
InsertFreeChunkIntoBin(TryToCoalesce(h, false));
|
||||
}
|
||||
|
||||
// TraceMe needs to be added after MarkFree and InsertFreeChunkIntoBin for
|
||||
// correct aggregation stats (bytes_in_use, fragmentation).
|
||||
AddTraceMe("MemoryDeallocation", chunk_ptr, req_bytes, alloc_bytes);
|
||||
|
||||
if (VLOG_IS_ON(4)) {
|
||||
LOG(INFO) << "F: " << RenderOccupancy();
|
||||
}
|
||||
@ -1115,31 +1141,6 @@ MemoryDump BFCAllocator::RecordMemoryMapInternal() {
|
||||
return md;
|
||||
}
|
||||
|
||||
double BFCAllocator::GetFragmentation() {
|
||||
int64 largest_free_chunk = 0;
|
||||
int64 free_bytes = 0;
|
||||
for (const auto& region : region_manager_.regions()) {
|
||||
ChunkHandle chunk_handle = region_manager_.get_handle(region.ptr());
|
||||
while (chunk_handle != kInvalidChunkHandle) {
|
||||
const Chunk* chunk = ChunkFromHandle(chunk_handle);
|
||||
if (!chunk->in_use()) {
|
||||
free_bytes += chunk->size;
|
||||
if (chunk->size > largest_free_chunk) {
|
||||
largest_free_chunk = chunk->size;
|
||||
}
|
||||
}
|
||||
chunk_handle = chunk->next;
|
||||
}
|
||||
}
|
||||
double frag_metric = 0.0;
|
||||
if (free_bytes > 0) {
|
||||
frag_metric =
|
||||
(free_bytes - largest_free_chunk) / static_cast<double>(free_bytes);
|
||||
}
|
||||
|
||||
return frag_metric;
|
||||
}
|
||||
|
||||
absl::optional<AllocatorStats> BFCAllocator::GetStats() {
|
||||
mutex_lock l(lock_);
|
||||
return stats_;
|
||||
|
@ -115,10 +115,19 @@ class BFCAllocator : public Allocator {
|
||||
bool MergeTimestampedChunks(size_t required_bytes)
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Return the largest free chunk bytes from the largest bin in constant time.
|
||||
// The free chunks are sorted by size (and then address) in a bin.
|
||||
int64 LargestFreeChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Add TraceMe (in memory allocation and deallocation) for memory stats
|
||||
// profiling. The chunk_ptr is passed to get information such as address,
|
||||
// chunk size and requested_size.
|
||||
void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr)
|
||||
void AddTraceMe(absl::string_view traceme_name, const void* ptr)
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Overloaded AddTraceMe function with chunk information.
|
||||
void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
|
||||
int64 req_bytes, int64 alloc_bytes)
|
||||
TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
|
||||
|
Loading…
Reference in New Issue
Block a user