Trace generic cudaMemcpy / cudaMemcpyAsync

These copies are "generic" as in, I don't know a priori what memory spaces the
src and dest are in.

Previously we traced them as "UNSUPPORTED" ops, and didn't fill in the op's
name.  So consumers of the trace can't tell that it's a copy.  Now we trace it
as a MemcpyOther, which seems close enough.

Also tweak some logging inside cupti_tracer itself.

PiperOrigin-RevId: 346106575
Change-Id: I0d87aea73d6ca6fa7b0ce0ec54f56c0b5d286e54
This commit is contained in:
Justin Lebar 2020-12-07 09:11:51 -08:00 committed by TensorFlower Gardener
parent 91a4d31a1e
commit 03d7d7667f

View File

@ -188,12 +188,12 @@ DecodeDriverMemcpy(CUpti_CallbackId cbid, const void *params) {
}
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy: {
const auto *p = reinterpret_cast<const cuMemcpy_params *>(params);
return std::make_tuple(p->ByteCount, CuptiTracerEventType::Unsupported,
return std::make_tuple(p->ByteCount, CuptiTracerEventType::MemcpyOther,
false);
}
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync: {
const auto *p = reinterpret_cast<const cuMemcpyAsync_params *>(params);
return std::make_tuple(p->ByteCount, CuptiTracerEventType::Unsupported,
return std::make_tuple(p->ByteCount, CuptiTracerEventType::MemcpyOther,
true);
}
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2D_v2: {
@ -384,7 +384,7 @@ void AddCudaMallocEventUponApiExit(CuptiTraceCollector *collector,
event.context_id = cbdata->contextUid;
event.correlation_id = cbdata->correlationId;
event.memalloc_info.num_bytes = params->bytesize;
VLOG(3) << "Cuda Malloc/Free observed: " << params->bytesize;
VLOG(3) << "cudaMalloc observed: " << params->bytesize;
collector->AddEvent(std::move(event));
}
@ -402,6 +402,7 @@ void AddGenericEventUponApiExit(CuptiTraceCollector *collector,
event.device_id = device_id;
event.context_id = cbdata->contextUid;
event.correlation_id = cbdata->correlationId;
VLOG(3) << "Observed generic event " << cbdata->functionName;
collector->AddEvent(std::move(event));
}