launching events is not handled correctly for event-pair based activity tracing, because correlatedData is the index of event record, not start timestamp.

this will fix this problem.

PiperOrigin-RevId: 272280857
This commit is contained in:
A. Unique TensorFlower 2019-10-01 13:26:01 -07:00 committed by TensorFlower Gardener
parent 2b7d3c9a90
commit d369d43a79

View File

@ -636,6 +636,7 @@ struct KernelRecord {
CUevent start_event;
CUevent stop_event;
KernelDetails details;
uint64 start_timestamp;
};
struct MemcpyRecord {
@ -646,6 +647,7 @@ struct MemcpyRecord {
uint32 correlation_id;
CUevent start_event;
CUevent stop_event;
uint64 start_timestamp;
};
Status CreateAndRecordEvent(CUevent *event, CUstream stream) {
@ -688,17 +690,19 @@ class CudaEventRecorder {
record.details.grid_x = params->gridDimX;
record.details.grid_y = params->gridDimY;
record.details.grid_z = params->gridDimZ;
record.start_timestamp = CuptiTracer::GetTimestamp();
LogIfError(CreateAndRecordEvent(&record.start_event, stream));
absl::MutexLock lock(&mutex_);
if (stopped_) return -1;
kernel_records_.push_back(record);
return kernel_records_.size() - 1;
}
void StopKernel(size_t index) {
uint64 StopKernel(size_t index) {
absl::MutexLock lock(&mutex_);
if (index >= kernel_records_.size()) return;
if (index >= kernel_records_.size()) return 0;
auto &record = kernel_records_[index];
LogIfError(CreateAndRecordEvent(&record.stop_event, record.stream));
return record.start_timestamp;
}
// Registers the start of a copy operation. The returned index should be
@ -707,17 +711,19 @@ class CudaEventRecorder {
CUcontext context, CUstream stream,
uint32 correlation_id) {
MemcpyRecord record = {type, size_bytes, context, stream, correlation_id};
record.start_timestamp = CuptiTracer::GetTimestamp();
LogIfError(CreateAndRecordEvent(&record.start_event, stream));
absl::MutexLock lock(&mutex_);
if (stopped_) return -1;
memcpy_records_.push_back(record);
return memcpy_records_.size() - 1;
}
void StopMemcpy(size_t index) {
uint64 StopMemcpy(size_t index) {
absl::MutexLock lock(&mutex_);
if (index >= memcpy_records_.size()) return;
if (index >= memcpy_records_.size()) return 0;
auto &record = memcpy_records_[index];
LogIfError(CreateAndRecordEvent(&record.stop_event, record.stream));
return record.start_timestamp;
}
Status Stop() {
@ -1008,9 +1014,10 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
auto *recorder = cuda_event_recorders_[device_id].get();
if (*cbdata->correlationData == static_cast<size_t>(-1))
return Status::OK();
uint64 start_tsc = 0;
switch (cbid) {
case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel:
recorder->StopKernel(*cbdata->correlationData);
start_tsc = recorder->StopKernel(*cbdata->correlationData);
break;
case CUPTI_DRIVER_TRACE_CBID_cuMemcpy:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync:
@ -1020,11 +1027,12 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoD_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2:
recorder->StopMemcpy(*cbdata->correlationData);
start_tsc = recorder->StopMemcpy(*cbdata->correlationData);
break;
default:
VLOG(1) << "Unexpected callback id: " << cbid;
break;
// TODO: figure out how to get start timestamp in this case.
return Status::OK();
}
// If we are not collecting CPU events from Callback API, we can return now.
if (!option_.required_callback_api_events) {
@ -1033,11 +1041,8 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
// Grab timestamp for API exit. API entry timestamp saved in cbdata.
uint64 end_tsc = CuptiTracer::GetTimestamp();
uint64 start_tsc = *cbdata->correlationData;
return AddDriverApiCallbackEvent(collector_, cupti_interface_, device_id,
start_tsc, end_tsc, domain, cbid, cbdata);
return Status::OK();
}
Status Flush() override {
for (auto &recorder : cuda_event_recorders_) {