Enable op name tracking for host memory in BFCAllocator.
PiperOrigin-RevId: 299398741 Change-Id: I836f38e844c07e2cc28d8ad4a518438b4075ee88
This commit is contained in:
parent
423c2cae26
commit
c4d9a3a647
@ -81,7 +81,7 @@ class BFCAllocator : public Allocator {
|
||||
|
||||
void SetSafeFrontier(uint64 count) override;
|
||||
|
||||
virtual bool ShouldRecordOpName() const { return false; }
|
||||
bool ShouldRecordOpName() const { return true; }
|
||||
|
||||
MemoryDump RecordMemoryMap();
|
||||
|
||||
|
@ -41,10 +41,6 @@ class GPUBFCAllocator : public BFCAllocator {
|
||||
|
||||
TF_DISALLOW_COPY_AND_ASSIGN(GPUBFCAllocator);
|
||||
|
||||
#ifdef TENSORFLOW_MEM_DEBUG
|
||||
bool ShouldRecordOpName() const override { return true; }
|
||||
#endif
|
||||
|
||||
private:
|
||||
static bool GetAllowGrowthValue(const GPUOptions& gpu_options);
|
||||
static bool GetGarbageCollectionValue();
|
||||
|
@ -409,6 +409,7 @@ void HierarchicalTreeBroadcaster::DispatchSend(int subdiv, int dst_rank,
|
||||
int src_rank,
|
||||
const Tensor* src_tensor,
|
||||
const StatusCallback& done) {
|
||||
MEMDEBUG_CACHE_OP(col_ctx_->op_ctx->op_kernel().name().c_str());
|
||||
string send_buf_key =
|
||||
BroadcastBufKey(col_ctx_->exec_key, subdiv, src_rank, dst_rank);
|
||||
int dst_idx =
|
||||
|
@ -129,6 +129,9 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer(
|
||||
}
|
||||
AllocatorAttributes cpu_attr;
|
||||
cpu_attr.set_gpu_compatible(true);
|
||||
MEMDEBUG_CACHE_OP(
|
||||
"CollectiveRemoteAccessDistributed::RecvFromPeer"
|
||||
"::recv_buf_callback");
|
||||
Tensor* cpu_tensor = new Tensor(cpu_dev->GetAllocator(cpu_attr),
|
||||
to_tensor->dtype(), to_tensor->shape());
|
||||
PopulateTensorFromExtra(extra, cpu_tensor);
|
||||
|
@ -669,6 +669,7 @@ void GrpcWorker::RecvBufAsync(CallOptions* opts, const RecvBufRequest* request,
|
||||
AllocatorAttributes cpu_attr;
|
||||
cpu_attr.set_gpu_compatible(true);
|
||||
cpu_attr.set_nic_compatible(true);
|
||||
MEMDEBUG_CACHE_OP("GrpcWorker::RecvBufAsync::consumer_callback");
|
||||
Tensor* cpu_tensor =
|
||||
new Tensor(cpu_dev->GetAllocator(cpu_attr),
|
||||
hook->prod_value->dtype(), hook->prod_value->shape());
|
||||
|
Loading…
Reference in New Issue
Block a user