launching events is not handled correctly for event-pair based activity tracing, because correlatedData is the index of event record, not start timestamp.
this will fix this problem. PiperOrigin-RevId: 272280857
This commit is contained in:
parent
2b7d3c9a90
commit
d369d43a79
@ -636,6 +636,7 @@ struct KernelRecord {
|
||||
CUevent start_event;
|
||||
CUevent stop_event;
|
||||
KernelDetails details;
|
||||
uint64 start_timestamp;
|
||||
};
|
||||
|
||||
struct MemcpyRecord {
|
||||
@ -646,6 +647,7 @@ struct MemcpyRecord {
|
||||
uint32 correlation_id;
|
||||
CUevent start_event;
|
||||
CUevent stop_event;
|
||||
uint64 start_timestamp;
|
||||
};
|
||||
|
||||
Status CreateAndRecordEvent(CUevent *event, CUstream stream) {
|
||||
@ -688,17 +690,19 @@ class CudaEventRecorder {
|
||||
record.details.grid_x = params->gridDimX;
|
||||
record.details.grid_y = params->gridDimY;
|
||||
record.details.grid_z = params->gridDimZ;
|
||||
record.start_timestamp = CuptiTracer::GetTimestamp();
|
||||
LogIfError(CreateAndRecordEvent(&record.start_event, stream));
|
||||
absl::MutexLock lock(&mutex_);
|
||||
if (stopped_) return -1;
|
||||
kernel_records_.push_back(record);
|
||||
return kernel_records_.size() - 1;
|
||||
}
|
||||
void StopKernel(size_t index) {
|
||||
uint64 StopKernel(size_t index) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
if (index >= kernel_records_.size()) return;
|
||||
if (index >= kernel_records_.size()) return 0;
|
||||
auto &record = kernel_records_[index];
|
||||
LogIfError(CreateAndRecordEvent(&record.stop_event, record.stream));
|
||||
return record.start_timestamp;
|
||||
}
|
||||
|
||||
// Registers the start of a copy operation. The returned index should be
|
||||
@ -707,17 +711,19 @@ class CudaEventRecorder {
|
||||
CUcontext context, CUstream stream,
|
||||
uint32 correlation_id) {
|
||||
MemcpyRecord record = {type, size_bytes, context, stream, correlation_id};
|
||||
record.start_timestamp = CuptiTracer::GetTimestamp();
|
||||
LogIfError(CreateAndRecordEvent(&record.start_event, stream));
|
||||
absl::MutexLock lock(&mutex_);
|
||||
if (stopped_) return -1;
|
||||
memcpy_records_.push_back(record);
|
||||
return memcpy_records_.size() - 1;
|
||||
}
|
||||
void StopMemcpy(size_t index) {
|
||||
uint64 StopMemcpy(size_t index) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
if (index >= memcpy_records_.size()) return;
|
||||
if (index >= memcpy_records_.size()) return 0;
|
||||
auto &record = memcpy_records_[index];
|
||||
LogIfError(CreateAndRecordEvent(&record.stop_event, record.stream));
|
||||
return record.start_timestamp;
|
||||
}
|
||||
|
||||
Status Stop() {
|
||||
@ -1008,9 +1014,10 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
|
||||
auto *recorder = cuda_event_recorders_[device_id].get();
|
||||
if (*cbdata->correlationData == static_cast<size_t>(-1))
|
||||
return Status::OK();
|
||||
uint64 start_tsc = 0;
|
||||
switch (cbid) {
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel:
|
||||
recorder->StopKernel(*cbdata->correlationData);
|
||||
start_tsc = recorder->StopKernel(*cbdata->correlationData);
|
||||
break;
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy:
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync:
|
||||
@ -1020,11 +1027,12 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2:
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2:
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2:
|
||||
recorder->StopMemcpy(*cbdata->correlationData);
|
||||
start_tsc = recorder->StopMemcpy(*cbdata->correlationData);
|
||||
break;
|
||||
default:
|
||||
VLOG(1) << "Unexpected callback id: " << cbid;
|
||||
break;
|
||||
// TODO: figure out how to get start timestamp in this case.
|
||||
return Status::OK();
|
||||
}
|
||||
// If we are not collecting CPU events from Callback API, we can return now.
|
||||
if (!option_.required_callback_api_events) {
|
||||
@ -1033,11 +1041,8 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
|
||||
|
||||
// Grab timestamp for API exit. API entry timestamp saved in cbdata.
|
||||
uint64 end_tsc = CuptiTracer::GetTimestamp();
|
||||
uint64 start_tsc = *cbdata->correlationData;
|
||||
return AddDriverApiCallbackEvent(collector_, cupti_interface_, device_id,
|
||||
start_tsc, end_tsc, domain, cbid, cbdata);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
Status Flush() override {
|
||||
for (auto &recorder : cuda_event_recorders_) {
|
||||
|
Loading…
Reference in New Issue
Block a user