Additional memory profile annotations in BFCAllocator.
PiperOrigin-RevId: 295243569 Change-Id: Iac2a4beb8654fa74ecaabbc9ce82fa2dd75b049e
This commit is contained in:
parent
1e74a754af
commit
d6741e0994
@ -382,7 +382,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
|||||||
}
|
}
|
||||||
void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||||
if (ptr != nullptr) {
|
if (ptr != nullptr) {
|
||||||
AddTraceMe("MemoryAllocation");
|
AddTraceMe("MemoryAllocation", num_bytes);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -390,7 +390,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
|||||||
if (Extend(unused_alignment, rounded_bytes)) {
|
if (Extend(unused_alignment, rounded_bytes)) {
|
||||||
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||||
if (ptr != nullptr) {
|
if (ptr != nullptr) {
|
||||||
AddTraceMe("MemoryAllocation");
|
AddTraceMe("MemoryAllocation", num_bytes);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -403,7 +403,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
|||||||
if (MergeTimestampedChunks(rounded_bytes)) {
|
if (MergeTimestampedChunks(rounded_bytes)) {
|
||||||
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||||
if (ptr != nullptr) {
|
if (ptr != nullptr) {
|
||||||
AddTraceMe("MemoryAllocation");
|
AddTraceMe("MemoryAllocation", num_bytes);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -417,7 +417,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
|||||||
Extend(unused_alignment, rounded_bytes)) {
|
Extend(unused_alignment, rounded_bytes)) {
|
||||||
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
|
||||||
if (ptr != nullptr) {
|
if (ptr != nullptr) {
|
||||||
AddTraceMe("MemoryAllocation");
|
AddTraceMe("MemoryAllocation", num_bytes);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -441,7 +441,8 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BFCAllocator::AddTraceMe(absl::string_view traceme_name) {
|
void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
|
||||||
|
int64 requested_bytes) {
|
||||||
tensorflow::profiler::TraceMe trace_me(
|
tensorflow::profiler::TraceMe trace_me(
|
||||||
[&]() EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
[&]() EXCLUSIVE_LOCKS_REQUIRED(lock_) {
|
||||||
AllocatorStats stats = stats_;
|
AllocatorStats stats = stats_;
|
||||||
@ -452,6 +453,10 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name) {
|
|||||||
",bytes_allocated=", stats.bytes_in_use,
|
",bytes_allocated=", stats.bytes_in_use,
|
||||||
",bytes_available=", bytes_available,
|
",bytes_available=", bytes_available,
|
||||||
",peak_bytes_in_use=", stats.peak_bytes_in_use,
|
",peak_bytes_in_use=", stats.peak_bytes_in_use,
|
||||||
|
",requested_bytes=", requested_bytes,
|
||||||
|
#ifdef TENSORFLOW_MEM_DEBUG
|
||||||
|
",tf_op=", pending_op_name, ",id=", pending_step_id,
|
||||||
|
#endif
|
||||||
"#");
|
"#");
|
||||||
},
|
},
|
||||||
/*level=*/profiler::TraceMeLevel::kInfo);
|
/*level=*/profiler::TraceMeLevel::kInfo);
|
||||||
@ -589,6 +594,7 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
|
|||||||
BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
|
BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
|
||||||
CHECK(h != kInvalidChunkHandle);
|
CHECK(h != kInvalidChunkHandle);
|
||||||
|
|
||||||
|
int64 requested_bytes = ChunkFromHandle(h)->requested_size;
|
||||||
MarkFree(h);
|
MarkFree(h);
|
||||||
|
|
||||||
// Consider coalescing it.
|
// Consider coalescing it.
|
||||||
@ -603,7 +609,7 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
|
|||||||
LOG(INFO) << "F: " << RenderOccupancy();
|
LOG(INFO) << "F: " << RenderOccupancy();
|
||||||
}
|
}
|
||||||
|
|
||||||
AddTraceMe("MemoryDeallocation");
|
AddTraceMe("MemoryDeallocation", -requested_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
|
// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
|
||||||
|
@ -116,8 +116,8 @@ class BFCAllocator : public Allocator {
|
|||||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||||
|
|
||||||
// Add TraceMe (in memory allocation and deallocation) for memory stats
|
// Add TraceMe (in memory allocation and deallocation) for memory stats
|
||||||
// profiling.
|
// profiling. The requested_bytes can be negative if it's a deallocation.
|
||||||
void AddTraceMe(absl::string_view traceme_name)
|
void AddTraceMe(absl::string_view traceme_name, int64 requested_bytes)
|
||||||
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
EXCLUSIVE_LOCKS_REQUIRED(lock_);
|
||||||
|
|
||||||
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
|
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
|
||||||
|
@ -114,6 +114,7 @@ const StatTypeMap& GetStatTypeMap() {
|
|||||||
{"bytes_available", kBytesAvailable},
|
{"bytes_available", kBytesAvailable},
|
||||||
{"fragmentation", kFragmentation},
|
{"fragmentation", kFragmentation},
|
||||||
{"peak_bytes_in_use", kPeakBytesInUse},
|
{"peak_bytes_in_use", kPeakBytesInUse},
|
||||||
|
{"requested_bytes", kRequestedBytes},
|
||||||
{"shape", kTensorShapes},
|
{"shape", kTensorShapes},
|
||||||
// Device trace arguments.
|
// Device trace arguments.
|
||||||
{"device_id", kDeviceId},
|
{"device_id", kDeviceId},
|
||||||
|
@ -106,6 +106,7 @@ enum StatType {
|
|||||||
kBytesAvailable,
|
kBytesAvailable,
|
||||||
kFragmentation,
|
kFragmentation,
|
||||||
kPeakBytesInUse,
|
kPeakBytesInUse,
|
||||||
|
kRequestedBytes,
|
||||||
kTensorShapes,
|
kTensorShapes,
|
||||||
// Device trace arguments.
|
// Device trace arguments.
|
||||||
kDeviceId,
|
kDeviceId,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user