Additional memory profile annotations in BFCAllocator.
PiperOrigin-RevId: 295243569 Change-Id: Iac2a4beb8654fa74ecaabbc9ce82fa2dd75b049e
This commit is contained in:
parent
1e74a754af
commit
d6741e0994
@ -382,7 +382,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
||||
}
|
||||
void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||
if (ptr != nullptr) {
|
||||
AddTraceMe("MemoryAllocation");
|
||||
AddTraceMe("MemoryAllocation", num_bytes);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@ -390,7 +390,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
||||
if (Extend(unused_alignment, rounded_bytes)) {
|
||||
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||
if (ptr != nullptr) {
|
||||
AddTraceMe("MemoryAllocation");
|
||||
AddTraceMe("MemoryAllocation", num_bytes);
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
@ -403,7 +403,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
||||
if (MergeTimestampedChunks(rounded_bytes)) {
|
||||
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||
if (ptr != nullptr) {
|
||||
AddTraceMe("MemoryAllocation");
|
||||
AddTraceMe("MemoryAllocation", num_bytes);
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
@ -417,7 +417,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
||||
Extend(unused_alignment, rounded_bytes)) {
|
||||
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||
if (ptr != nullptr) {
|
||||
AddTraceMe("MemoryAllocation");
|
||||
AddTraceMe("MemoryAllocation", num_bytes);
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
@ -441,7 +441,8 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void BFCAllocator::AddTraceMe(absl::string_view traceme_name) {
|
||||
void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
|
||||
int64 requested_bytes) {
|
||||
tensorflow::profiler::TraceMe trace_me(
|
||||
[&]() EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||
AllocatorStats stats = stats_;
|
||||
@ -452,6 +453,10 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name) {
|
||||
",bytes_allocated=", stats.bytes_in_use,
|
||||
",bytes_available=", bytes_available,
|
||||
",peak_bytes_in_use=", stats.peak_bytes_in_use,
|
||||
",requested_bytes=", requested_bytes,
|
||||
#ifdef TENSORFLOW_MEM_DEBUG
|
||||
",tf_op=", pending_op_name, ",id=", pending_step_id,
|
||||
#endif
|
||||
"#");
|
||||
},
|
||||
/*level=*/profiler::TraceMeLevel::kInfo);
|
||||
@ -589,6 +594,7 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
|
||||
BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
|
||||
CHECK(h != kInvalidChunkHandle);
|
||||
|
||||
int64 requested_bytes = ChunkFromHandle(h)->requested_size;
|
||||
MarkFree(h);
|
||||
|
||||
// Consider coalescing it.
|
||||
@ -603,7 +609,7 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
|
||||
LOG(INFO) << "F: " << RenderOccupancy();
|
||||
}
|
||||
|
||||
AddTraceMe("MemoryDeallocation");
|
||||
AddTraceMe("MemoryDeallocation", -requested_bytes);
|
||||
}
|
||||
|
||||
// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
|
||||
|
@ -116,8 +116,8 @@ class BFCAllocator : public Allocator {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// Add TraceMe (in memory allocation and deallocation) for memory stats
|
||||
// profiling.
|
||||
void AddTraceMe(absl::string_view traceme_name)
|
||||
// profiling. The requested_bytes can be negative if it's a deallocation.
|
||||
void AddTraceMe(absl::string_view traceme_name, int64 requested_bytes)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||
|
||||
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
|
||||
|
@ -114,6 +114,7 @@ const StatTypeMap& GetStatTypeMap() {
|
||||
{"bytes_available", kBytesAvailable},
|
||||
{"fragmentation", kFragmentation},
|
||||
{"peak_bytes_in_use", kPeakBytesInUse},
|
||||
{"requested_bytes", kRequestedBytes},
|
||||
{"shape", kTensorShapes},
|
||||
// Device trace arguments.
|
||||
{"device_id", kDeviceId},
|
||||
|
@ -106,6 +106,7 @@ enum StatType {
|
||||
kBytesAvailable,
|
||||
kFragmentation,
|
||||
kPeakBytesInUse,
|
||||
kRequestedBytes,
|
||||
kTensorShapes,
|
||||
// Device trace arguments.
|
||||
kDeviceId,
|
||||
|
Loading…
x
Reference in New Issue
Block a user