From c4d9a3a647df5598924c663845638abe37adf725 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Mar 2020 11:32:10 -0800 Subject: [PATCH] Enable op name tracking for host memory in BFCAllocator. PiperOrigin-RevId: 299398741 Change-Id: I836f38e844c07e2cc28d8ad4a518438b4075ee88 --- tensorflow/core/common_runtime/bfc_allocator.h | 2 +- tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h | 4 ---- .../core/common_runtime/hierarchical_tree_broadcaster.cc | 1 + .../core/distributed_runtime/collective_rma_distributed.cc | 3 +++ .../core/distributed_runtime/rpc/grpc_worker_service.cc | 1 + 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index c39652692b7..a41ca5a1066 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -81,7 +81,7 @@ class BFCAllocator : public Allocator { void SetSafeFrontier(uint64 count) override; - virtual bool ShouldRecordOpName() const { return false; } + bool ShouldRecordOpName() const { return true; } MemoryDump RecordMemoryMap(); diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index 02b1a7418d8..0f65abd6e9f 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -41,10 +41,6 @@ class GPUBFCAllocator : public BFCAllocator { TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator); -#ifdef TENSORFLOW_MEM_DEBUG - bool ShouldRecordOpName() const override { return true; } -#endif - private: static bool GetAllowGrowthValue(const GPUOptions& gpu_options); static bool GetGarbageCollectionValue(); diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc index 344ea0ac692..66f77bd403e 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc @@ -409,6 +409,7 @@ void HierarchicalTreeBroadcaster::DispatchSend(int subdiv, int dst_rank, int src_rank, const Tensor* src_tensor, const StatusCallback& done) { + MEMDEBUG_CACHE_OP(col_ctx_->op_ctx->op_kernel().name().c_str()); string send_buf_key = BroadcastBufKey(col_ctx_->exec_key, subdiv, src_rank, dst_rank); int dst_idx = diff --git a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc index b2af3c218a8..c7d218258e8 100644 --- a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc @@ -129,6 +129,9 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( } AllocatorAttributes cpu_attr; cpu_attr.set_gpu_compatible(true); + MEMDEBUG_CACHE_OP( + "CollectiveRemoteAccessDistributed::RecvFromPeer" + "::recv_buf_callback"); Tensor* cpu_tensor = new Tensor(cpu_dev->GetAllocator(cpu_attr), to_tensor->dtype(), to_tensor->shape()); PopulateTensorFromExtra(extra, cpu_tensor); diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index 9316cb79c33..a12b392f83a 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -669,6 +669,7 @@ void GrpcWorker::RecvBufAsync(CallOptions* opts, const RecvBufRequest* request, AllocatorAttributes cpu_attr; cpu_attr.set_gpu_compatible(true); cpu_attr.set_nic_compatible(true); + MEMDEBUG_CACHE_OP("GrpcWorker::RecvBufAsync::consumer_callback"); Tensor* cpu_tensor = new Tensor(cpu_dev->GetAllocator(cpu_attr), hook->prod_value->dtype(), hook->prod_value->shape());