try to deduplicate thread id. pthread_t is 8bytes, in some platform , casting them to 32bits might causing duplication.
1. when gettid is available, use that will give us a 32 bits unique value. 2. when gettid is not available, use hash rather than truncate give us better chance to have unique thread id. PiperOrigin-RevId: 316155614 Change-Id: I4fdbd43d22ef4420ca1b54efa8732d35ee35ffe1
This commit is contained in:
parent
798f7515f9
commit
0511d9fd95
tensorflow/core
@ -135,15 +135,8 @@ class PosixEnv : public Env {
|
||||
}
|
||||
|
||||
int32 GetCurrentThreadId() override {
|
||||
#ifdef __APPLE__
|
||||
uint64_t tid64;
|
||||
pthread_threadid_np(nullptr, &tid64);
|
||||
return static_cast<int32>(tid64);
|
||||
#elif defined(__FreeBSD__)
|
||||
return pthread_getthreadid_np();
|
||||
#else
|
||||
return static_cast<int32>(pthread_self());
|
||||
#endif
|
||||
static thread_local int32 current_thread_id = GetCurrentThreadIdInternal();
|
||||
return current_thread_id;
|
||||
}
|
||||
|
||||
bool GetCurrentThreadName(string* name) override {
|
||||
@ -232,6 +225,20 @@ class PosixEnv : public Env {
|
||||
|
||||
private:
|
||||
void GetLocalTempDirectories(std::vector<string>* list) override;
|
||||
|
||||
int32 GetCurrentThreadIdInternal() {
|
||||
#ifdef __APPLE__
|
||||
uint64_t tid64;
|
||||
pthread_threadid_np(nullptr, &tid64);
|
||||
return static_cast<int32>(tid64);
|
||||
#elif defined(__FreeBSD__)
|
||||
return pthread_getthreadid_np();
|
||||
#elif defined(__NR_gettid)
|
||||
return static_cast<int32>(syscall(__NR_gettid));
|
||||
#else
|
||||
return std::hash<std::thread::id>()(std::this_thread::get_id());
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -126,15 +126,6 @@ const char *getActivityUnifiedMemoryKindString(
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
// GetCachedTID() caches the thread ID in thread-local storage (which is a
|
||||
// userspace construct) to avoid unnecessary system calls. Without this caching,
|
||||
// it can take roughly 98ns, while it takes roughly 1ns with this caching.
|
||||
int32 GetCachedTID() {
|
||||
static thread_local int32 current_thread_id =
|
||||
Env::Default()->GetCurrentThreadId();
|
||||
return current_thread_id;
|
||||
}
|
||||
|
||||
size_t Bytes2D(const CUDA_MEMCPY2D *p) { return p->Height * p->WidthInBytes; }
|
||||
|
||||
size_t Bytes3D(const CUDA_MEMCPY3D *p) {
|
||||
@ -305,7 +296,7 @@ void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id,
|
||||
event.name = cbdata->symbolName ? cbdata->symbolName : cbdata->functionName;
|
||||
event.start_time_ns = start_time;
|
||||
event.end_time_ns = end_time;
|
||||
event.thread_id = GetCachedTID();
|
||||
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||
event.device_id = device_id;
|
||||
event.context_id = cbdata->contextUid;
|
||||
event.correlation_id = cbdata->correlationId;
|
||||
@ -323,7 +314,7 @@ CuptiTracerEvent PopulateMemcpyCallbackEvent(
|
||||
event.source = CuptiTracerEventSource::DriverCallback;
|
||||
event.start_time_ns = start_time;
|
||||
event.end_time_ns = end_time;
|
||||
event.thread_id = GetCachedTID();
|
||||
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||
event.device_id = src_device;
|
||||
event.context_id = cbdata->contextUid;
|
||||
event.correlation_id = cbdata->correlationId;
|
||||
@ -387,7 +378,7 @@ void AddCudaMallocEventUponApiExit(CuptiTraceCollector *collector,
|
||||
event.name = cbdata->functionName;
|
||||
event.start_time_ns = start_time;
|
||||
event.end_time_ns = end_time;
|
||||
event.thread_id = GetCachedTID();
|
||||
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||
event.device_id = device_id;
|
||||
event.context_id = cbdata->contextUid;
|
||||
event.correlation_id = cbdata->correlationId;
|
||||
@ -406,7 +397,7 @@ void AddGenericEventUponApiExit(CuptiTraceCollector *collector,
|
||||
event.name = cbdata->functionName;
|
||||
event.start_time_ns = start_time;
|
||||
event.end_time_ns = end_time;
|
||||
event.thread_id = GetCachedTID();
|
||||
event.thread_id = Env::Default()->GetCurrentThreadId();
|
||||
event.device_id = device_id;
|
||||
event.context_id = cbdata->contextUid;
|
||||
event.correlation_id = cbdata->correlationId;
|
||||
|
Loading…
Reference in New Issue
Block a user