try to deduplicate thread id. pthread_t is 8bytes, in some platform , casting them to 32bits might causing duplication.

1. when gettid is available, use that will give us a 32 bits unique value.
2. when gettid is not available, use hash rather than truncate give us better chance to have unique thread id.

PiperOrigin-RevId: 316155614
Change-Id: I4fdbd43d22ef4420ca1b54efa8732d35ee35ffe1
This commit is contained in:
A. Unique TensorFlower 2020-06-12 12:32:04 -07:00 committed by TensorFlower Gardener
parent 798f7515f9
commit 0511d9fd95
2 changed files with 20 additions and 22 deletions
tensorflow/core
platform/default
profiler/internal/gpu

View File

@ -135,15 +135,8 @@ class PosixEnv : public Env {
}
int32 GetCurrentThreadId() override {
#ifdef __APPLE__
uint64_t tid64;
pthread_threadid_np(nullptr, &tid64);
return static_cast<int32>(tid64);
#elif defined(__FreeBSD__)
return pthread_getthreadid_np();
#else
return static_cast<int32>(pthread_self());
#endif
static thread_local int32 current_thread_id = GetCurrentThreadIdInternal();
return current_thread_id;
}
bool GetCurrentThreadName(string* name) override {
@ -232,6 +225,20 @@ class PosixEnv : public Env {
private:
void GetLocalTempDirectories(std::vector<string>* list) override;
int32 GetCurrentThreadIdInternal() {
#ifdef __APPLE__
uint64_t tid64;
pthread_threadid_np(nullptr, &tid64);
return static_cast<int32>(tid64);
#elif defined(__FreeBSD__)
return pthread_getthreadid_np();
#elif defined(__NR_gettid)
return static_cast<int32>(syscall(__NR_gettid));
#else
return std::hash<std::thread::id>()(std::this_thread::get_id());
#endif
}
};
} // namespace

View File

@ -126,15 +126,6 @@ const char *getActivityUnifiedMemoryKindString(
} \
} while (false)
// GetCachedTID() caches the thread ID in thread-local storage (which is a
// userspace construct) to avoid unnecessary system calls. Without this caching,
// it can take roughly 98ns, while it takes roughly 1ns with this caching.
int32 GetCachedTID() {
static thread_local int32 current_thread_id =
Env::Default()->GetCurrentThreadId();
return current_thread_id;
}
size_t Bytes2D(const CUDA_MEMCPY2D *p) { return p->Height * p->WidthInBytes; }
size_t Bytes3D(const CUDA_MEMCPY3D *p) {
@ -305,7 +296,7 @@ void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id,
event.name = cbdata->symbolName ? cbdata->symbolName : cbdata->functionName;
event.start_time_ns = start_time;
event.end_time_ns = end_time;
event.thread_id = GetCachedTID();
event.thread_id = Env::Default()->GetCurrentThreadId();
event.device_id = device_id;
event.context_id = cbdata->contextUid;
event.correlation_id = cbdata->correlationId;
@ -323,7 +314,7 @@ CuptiTracerEvent PopulateMemcpyCallbackEvent(
event.source = CuptiTracerEventSource::DriverCallback;
event.start_time_ns = start_time;
event.end_time_ns = end_time;
event.thread_id = GetCachedTID();
event.thread_id = Env::Default()->GetCurrentThreadId();
event.device_id = src_device;
event.context_id = cbdata->contextUid;
event.correlation_id = cbdata->correlationId;
@ -387,7 +378,7 @@ void AddCudaMallocEventUponApiExit(CuptiTraceCollector *collector,
event.name = cbdata->functionName;
event.start_time_ns = start_time;
event.end_time_ns = end_time;
event.thread_id = GetCachedTID();
event.thread_id = Env::Default()->GetCurrentThreadId();
event.device_id = device_id;
event.context_id = cbdata->contextUid;
event.correlation_id = cbdata->correlationId;
@ -406,7 +397,7 @@ void AddGenericEventUponApiExit(CuptiTraceCollector *collector,
event.name = cbdata->functionName;
event.start_time_ns = start_time;
event.end_time_ns = end_time;
event.thread_id = GetCachedTID();
event.thread_id = Env::Default()->GetCurrentThreadId();
event.device_id = device_id;
event.context_id = cbdata->contextUid;
event.correlation_id = cbdata->correlationId;