Add memory stats profiling in BFCAllocator.

PiperOrigin-RevId: 289694152
Change-Id: I67792021ec90e67f5ccd47d391bf11ef36ff23ed
This commit is contained in:
A. Unique TensorFlower 2020-01-14 11:30:02 -08:00 committed by TensorFlower Gardener
parent 9b58a04025
commit cb2ca1e812
5 changed files with 74 additions and 14 deletions

View File

@ -2779,7 +2779,9 @@ cc_library(
":protos_all_cc",
":shared_counter",
"//tensorflow/core/framework:allocator",
"//tensorflow/core/profiler/lib:traceme",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/strings",
],
)

View File

@ -17,6 +17,7 @@ limitations under the License.
#include <atomic>
#include "absl/strings/string_view.h"
#include "tensorflow/core/common_runtime/allocator_retry.h"
#include "tensorflow/core/lib/core/bits.h"
#include "tensorflow/core/lib/strings/numbers.h"
@ -29,6 +30,7 @@ limitations under the License.
#include "tensorflow/core/platform/stacktrace.h"
#endif
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/protobuf/bfc_memory_map.pb.h"
namespace tensorflow {
@ -380,6 +382,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
}
void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
if (ptr != nullptr) {
AddTraceMe("MemoryAllocation");
return ptr;
}
@ -387,6 +390,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
if (Extend(unused_alignment, rounded_bytes)) {
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
if (ptr != nullptr) {
AddTraceMe("MemoryAllocation");
return ptr;
}
}
@ -399,6 +403,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
if (MergeTimestampedChunks(rounded_bytes)) {
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
if (ptr != nullptr) {
AddTraceMe("MemoryAllocation");
return ptr;
}
}
@ -412,6 +417,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
Extend(unused_alignment, rounded_bytes)) {
ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
if (ptr != nullptr) {
AddTraceMe("MemoryAllocation");
return ptr;
}
}
@ -435,6 +441,24 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
return nullptr;
}
void BFCAllocator::AddTraceMe(absl::string_view traceme_name) {
tensorflow::profiler::TraceMe trace_me(
[&]() EXCLUSIVE_LOCKS_REQUIRED(lock_) {
AllocatorStats stats = stats_;
double fragmentation = GetFragmentation();
int64 bytes_available =
memory_limit_ - stats.bytes_reserved - stats.bytes_in_use;
return absl::StrCat(traceme_name, "#allocator_name=", name_,
",bytes_reserved=", stats.bytes_reserved,
",bytes_allocated=", stats.bytes_in_use,
",bytes_available=", bytes_available,
",fragmentation=", fragmentation,
",peak_bytes_in_use=", stats.peak_bytes_in_use,
"#");
},
/*level=*/2);
}
void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
size_t num_bytes, uint64 freed_before) {
// First identify the first bin that could satisfy rounded_bytes.
@ -580,6 +604,8 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) {
if (VLOG_IS_ON(4)) {
LOG(INFO) << "F: " << RenderOccupancy();
}
AddTraceMe("MemoryDeallocation");
}
// Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
@ -1009,8 +1035,6 @@ MemoryDump BFCAllocator::RecordMemoryMapInternal() {
mas->set_bytes_in_use(stats_.bytes_in_use);
mas->set_peak_bytes_in_use(stats_.peak_bytes_in_use);
mas->set_largest_alloc_size(stats_.largest_alloc_size);
int64 largest_free_chunk = 0;
int64 free_bytes = 0;
// Record summary data for every bin.
const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
@ -1046,21 +1070,11 @@ MemoryDump BFCAllocator::RecordMemoryMapInternal() {
if (timing_counter_) {
mc->set_freed_at_count(c->in_use() ? 0 : c->freed_at_count);
}
if (!c->in_use()) {
free_bytes += c->size;
if (c->size > largest_free_chunk) {
largest_free_chunk = c->size;
}
}
h = c->next;
}
}
double frag_metric = 0.0;
if (free_bytes > 0) {
frag_metric =
(free_bytes - largest_free_chunk) / static_cast<double>(free_bytes);
}
mas->set_fragmentation_metric(frag_metric);
mas->set_fragmentation_metric(GetFragmentation());
#ifdef TENSORFLOW_MEM_DEBUG
// Record the recent size history
@ -1077,6 +1091,31 @@ MemoryDump BFCAllocator::RecordMemoryMapInternal() {
return md;
}
double BFCAllocator::GetFragmentation() {
int64 largest_free_chunk = 0;
int64 free_bytes = 0;
for (const auto& region : region_manager_.regions()) {
ChunkHandle chunk_handle = region_manager_.get_handle(region.ptr());
while (chunk_handle != kInvalidChunkHandle) {
const Chunk* chunk = ChunkFromHandle(chunk_handle);
if (!chunk->in_use()) {
free_bytes += chunk->size;
if (chunk->size > largest_free_chunk) {
largest_free_chunk = chunk->size;
}
}
chunk_handle = chunk->next;
}
}
double frag_metric = 0.0;
if (free_bytes > 0) {
frag_metric =
(free_bytes - largest_free_chunk) / static_cast<double>(free_bytes);
}
return frag_metric;
}
absl::optional<AllocatorStats> BFCAllocator::GetStats() {
mutex_lock l(lock_);
return stats_;

View File

@ -115,6 +115,11 @@ class BFCAllocator : public Allocator {
bool MergeTimestampedChunks(size_t required_bytes)
EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Add TraceMe (in memory allocation and deallocation) for memory stats
// profiling.
void AddTraceMe(absl::string_view traceme_name)
EXCLUSIVE_LOCKS_REQUIRED(lock_);
// A ChunkHandle is an index into the chunks_ vector in BFCAllocator
// kInvalidChunkHandle means an invalid chunk
typedef size_t ChunkHandle;
@ -438,6 +443,10 @@ class BFCAllocator : public Allocator {
ChunkHandle TryToCoalesce(ChunkHandle h, bool ignore_freed_at)
EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Fragmentation is calculated as the reverse ratio of the largest free chunk
// size over total free memory, and returns a value within [0, 1].
double GetFragmentation() EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Information about a Bin that is useful for debugging.
struct BinDebugInfo {
size_t total_bytes_in_use = 0;

View File

@ -40,6 +40,8 @@ static const absl::string_view kHostEventTypeMetadataMap[] = {
"EagerKernelExecute",
"ExecutorState::Process",
"ExecutorDoneCallback",
"MemoryAllocation",
"MemoryDeallocation",
// tf data captured function events.
"InstantiatedCapturedFunction::Run",
"InstantiatedCapturedFunction::RunWithBorrowedArgs",
@ -81,10 +83,12 @@ static const absl::string_view kStatTypeStrMap[] = {
"step_num",
"iter_num",
"index_on_host",
"allocator_name",
"bytes_reserved",
"bytes_allocated",
"bytes_available",
"fragmentation",
"peak_bytes_in_use",
"device_id",
"context_id",
"correlation_id",
@ -136,10 +140,12 @@ const absl::flat_hash_map<absl::string_view, StatType>& GetStatTypeMap() {
{"step_num", kStepNum},
{"iter_num", kIterNum},
{"index_on_host", kIndexOnHost},
{"allocator_name", kAllocatorName},
{"bytes_reserved", kBytesReserved},
{"bytes_allocated", kBytesAllocated},
{"bytes_available", kBytesAvailable},
{"fragmentation", kFragmentation},
{"peak_bytes_in_use", kPeakBytesInUse},
// Device trace arguments.
{"device_id", kDeviceId},
{"context_id", kContextId},

View File

@ -40,6 +40,8 @@ enum HostEventType {
kEagerKernelExecute,
kExecutorStateProcess,
kExecutorDoneCallback,
kMemoryAllocation,
kMemoryDeallocation,
// tf.data captured function events.
kTfDataCapturedFunctionRun,
kTfDataCapturedFunctionRunWithBorrowedArgs,
@ -80,10 +82,12 @@ enum StatType {
kStepNum,
kIterNum,
kIndexOnHost,
kAllocatorName,
kBytesReserved,
kBytesAllocated,
kBytesAvailable,
kFragmentation,
kPeakBytesInUse,
// Device trace arguments.
kDeviceId,
kContextId,