From d6741e0994161599926854663dc29c4e14c6880c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 14 Feb 2020 15:44:36 -0800 Subject: [PATCH] Additional memory profile annotations in BFCAllocator. PiperOrigin-RevId: 295243569 Change-Id: Iac2a4beb8654fa74ecaabbc9ce82fa2dd75b049e --- .../core/common_runtime/bfc_allocator.cc | 18 ++++++++++++------ tensorflow/core/common_runtime/bfc_allocator.h | 4 ++-- .../core/profiler/utils/xplane_schema.cc | 1 + tensorflow/core/profiler/utils/xplane_schema.h | 1 + 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 985b882d886..31e8fc6fee8 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -382,7 +382,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, } void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before); if (ptr != nullptr) { - AddTraceMe("MemoryAllocation"); + AddTraceMe("MemoryAllocation", num_bytes); return ptr; } @@ -390,7 +390,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, if (Extend(unused_alignment, rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before); if (ptr != nullptr) { - AddTraceMe("MemoryAllocation"); + AddTraceMe("MemoryAllocation", num_bytes); return ptr; } } @@ -403,7 +403,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, if (MergeTimestampedChunks(rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before); if (ptr != nullptr) { - AddTraceMe("MemoryAllocation"); + AddTraceMe("MemoryAllocation", num_bytes); return ptr; } } @@ -417,7 +417,7 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, Extend(unused_alignment, rounded_bytes)) { ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before); if (ptr != nullptr) { - AddTraceMe("MemoryAllocation"); + AddTraceMe("MemoryAllocation", num_bytes); return ptr; } } @@ -441,7 +441,8 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment, return nullptr; } -void BFCAllocator::AddTraceMe(absl::string_view traceme_name) { +void BFCAllocator::AddTraceMe(absl::string_view traceme_name, + int64 requested_bytes) { tensorflow::profiler::TraceMe trace_me( [&]() EXCLUSIVE_LOCKS_REQUIRED(lock_) { AllocatorStats stats = stats_; @@ -452,6 +453,10 @@ void BFCAllocator::AddTraceMe(absl::string_view traceme_name) { ",bytes_allocated=", stats.bytes_in_use, ",bytes_available=", bytes_available, ",peak_bytes_in_use=", stats.peak_bytes_in_use, + ",requested_bytes=", requested_bytes, +#ifdef TENSORFLOW_MEM_DEBUG + ",tf_op=", pending_op_name, ",id=", pending_step_id, +#endif "#"); }, /*level=*/profiler::TraceMeLevel::kInfo); @@ -589,6 +594,7 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) { BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr); CHECK(h != kInvalidChunkHandle); + int64 requested_bytes = ChunkFromHandle(h)->requested_size; MarkFree(h); // Consider coalescing it. @@ -603,7 +609,7 @@ void BFCAllocator::DeallocateRawInternal(void* ptr) { LOG(INFO) << "F: " << RenderOccupancy(); } - AddTraceMe("MemoryDeallocation"); + AddTraceMe("MemoryDeallocation", -requested_bytes); } // Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1. diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index 2dd7125f5c6..cb02127550d 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -116,8 +116,8 @@ class BFCAllocator : public Allocator { EXCLUSIVE_LOCKS_REQUIRED(lock_); // Add TraceMe (in memory allocation and deallocation) for memory stats - // profiling. - void AddTraceMe(absl::string_view traceme_name) + // profiling. The requested_bytes can be negative if it's a deallocation. + void AddTraceMe(absl::string_view traceme_name, int64 requested_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_); // A ChunkHandle is an index into the chunks_ vector in BFCAllocator diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index cd64e1a6aab..3b9531ea6e0 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -114,6 +114,7 @@ const StatTypeMap& GetStatTypeMap() { {"bytes_available", kBytesAvailable}, {"fragmentation", kFragmentation}, {"peak_bytes_in_use", kPeakBytesInUse}, + {"requested_bytes", kRequestedBytes}, {"shape", kTensorShapes}, // Device trace arguments. {"device_id", kDeviceId}, diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index 9aea67b6e12..5e059154afd 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -106,6 +106,7 @@ enum StatType { kBytesAvailable, kFragmentation, kPeakBytesInUse, + kRequestedBytes, kTensorShapes, // Device trace arguments. kDeviceId,