try to deduplicate thread id. pthread_t is 8bytes, in some platform , casting them to 32bits might causing duplication.
1. when gettid is available, use that will give us a 32 bits unique value. 2. when gettid is not available, use hash rather than truncate give us better chance to have unique thread id. PiperOrigin-RevId: 316155614 Change-Id: I4fdbd43d22ef4420ca1b54efa8732d35ee35ffe1
This commit is contained in:
parent
798f7515f9
commit
0511d9fd95
@ -135,15 +135,8 @@ class PosixEnv : public Env {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int32 GetCurrentThreadId() override {
|
int32 GetCurrentThreadId() override {
|
||||||
#ifdef __APPLE__
|
static thread_local int32 current_thread_id = GetCurrentThreadIdInternal();
|
||||||
uint64_t tid64;
|
return current_thread_id;
|
||||||
pthread_threadid_np(nullptr, &tid64);
|
|
||||||
return static_cast<int32>(tid64);
|
|
||||||
#elif defined(__FreeBSD__)
|
|
||||||
return pthread_getthreadid_np();
|
|
||||||
#else
|
|
||||||
return static_cast<int32>(pthread_self());
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GetCurrentThreadName(string* name) override {
|
bool GetCurrentThreadName(string* name) override {
|
||||||
@ -232,6 +225,20 @@ class PosixEnv : public Env {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void GetLocalTempDirectories(std::vector<string>* list) override;
|
void GetLocalTempDirectories(std::vector<string>* list) override;
|
||||||
|
|
||||||
|
int32 GetCurrentThreadIdInternal() {
|
||||||
|
#ifdef __APPLE__
|
||||||
|
uint64_t tid64;
|
||||||
|
pthread_threadid_np(nullptr, &tid64);
|
||||||
|
return static_cast<int32>(tid64);
|
||||||
|
#elif defined(__FreeBSD__)
|
||||||
|
return pthread_getthreadid_np();
|
||||||
|
#elif defined(__NR_gettid)
|
||||||
|
return static_cast<int32>(syscall(__NR_gettid));
|
||||||
|
#else
|
||||||
|
return std::hash<std::thread::id>()(std::this_thread::get_id());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -126,15 +126,6 @@ const char *getActivityUnifiedMemoryKindString(
|
|||||||
} \
|
} \
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
// GetCachedTID() caches the thread ID in thread-local storage (which is a
|
|
||||||
// userspace construct) to avoid unnecessary system calls. Without this caching,
|
|
||||||
// it can take roughly 98ns, while it takes roughly 1ns with this caching.
|
|
||||||
int32 GetCachedTID() {
|
|
||||||
static thread_local int32 current_thread_id =
|
|
||||||
Env::Default()->GetCurrentThreadId();
|
|
||||||
return current_thread_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t Bytes2D(const CUDA_MEMCPY2D *p) { return p->Height * p->WidthInBytes; }
|
size_t Bytes2D(const CUDA_MEMCPY2D *p) { return p->Height * p->WidthInBytes; }
|
||||||
|
|
||||||
size_t Bytes3D(const CUDA_MEMCPY3D *p) {
|
size_t Bytes3D(const CUDA_MEMCPY3D *p) {
|
||||||
@ -305,7 +296,7 @@ void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id,
|
|||||||
event.name = cbdata->symbolName ? cbdata->symbolName : cbdata->functionName;
|
event.name = cbdata->symbolName ? cbdata->symbolName : cbdata->functionName;
|
||||||
event.start_time_ns = start_time;
|
event.start_time_ns = start_time;
|
||||||
event.end_time_ns = end_time;
|
event.end_time_ns = end_time;
|
||||||
event.thread_id = GetCachedTID();
|
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||||
event.device_id = device_id;
|
event.device_id = device_id;
|
||||||
event.context_id = cbdata->contextUid;
|
event.context_id = cbdata->contextUid;
|
||||||
event.correlation_id = cbdata->correlationId;
|
event.correlation_id = cbdata->correlationId;
|
||||||
@ -323,7 +314,7 @@ CuptiTracerEvent PopulateMemcpyCallbackEvent(
|
|||||||
event.source = CuptiTracerEventSource::DriverCallback;
|
event.source = CuptiTracerEventSource::DriverCallback;
|
||||||
event.start_time_ns = start_time;
|
event.start_time_ns = start_time;
|
||||||
event.end_time_ns = end_time;
|
event.end_time_ns = end_time;
|
||||||
event.thread_id = GetCachedTID();
|
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||||
event.device_id = src_device;
|
event.device_id = src_device;
|
||||||
event.context_id = cbdata->contextUid;
|
event.context_id = cbdata->contextUid;
|
||||||
event.correlation_id = cbdata->correlationId;
|
event.correlation_id = cbdata->correlationId;
|
||||||
@ -387,7 +378,7 @@ void AddCudaMallocEventUponApiExit(CuptiTraceCollector *collector,
|
|||||||
event.name = cbdata->functionName;
|
event.name = cbdata->functionName;
|
||||||
event.start_time_ns = start_time;
|
event.start_time_ns = start_time;
|
||||||
event.end_time_ns = end_time;
|
event.end_time_ns = end_time;
|
||||||
event.thread_id = GetCachedTID();
|
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||||
event.device_id = device_id;
|
event.device_id = device_id;
|
||||||
event.context_id = cbdata->contextUid;
|
event.context_id = cbdata->contextUid;
|
||||||
event.correlation_id = cbdata->correlationId;
|
event.correlation_id = cbdata->correlationId;
|
||||||
@ -406,7 +397,7 @@ void AddGenericEventUponApiExit(CuptiTraceCollector *collector,
|
|||||||
event.name = cbdata->functionName;
|
event.name = cbdata->functionName;
|
||||||
event.start_time_ns = start_time;
|
event.start_time_ns = start_time;
|
||||||
event.end_time_ns = end_time;
|
event.end_time_ns = end_time;
|
||||||
event.thread_id = GetCachedTID();
|
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||||
event.device_id = device_id;
|
event.device_id = device_id;
|
||||||
event.context_id = cbdata->contextUid;
|
event.context_id = cbdata->contextUid;
|
||||||
event.correlation_id = cbdata->correlationId;
|
event.correlation_id = cbdata->correlationId;
|
||||||
|
Loading…
Reference in New Issue
Block a user