[TF port] Add port::GetCurrentCPU and port::NumTotalCPUs.
GetCurrentCPU: returns the current CPU of the calling thread. NumTotalCPUs: attempts to get the total number of physical cores on the system When both return non-failing values, we expect 0 <= GetCurrentCPU < NumTotalCPUs. PiperOrigin-RevId: 225088316
This commit is contained in:
parent
2087bffc23
commit
9748092a5d
@ -32,9 +32,22 @@ namespace port {
|
|||||||
// Returns an estimate of the number of schedulable CPUs for this
|
// Returns an estimate of the number of schedulable CPUs for this
|
||||||
// process. Usually, it's constant throughout the lifetime of a
|
// process. Usually, it's constant throughout the lifetime of a
|
||||||
// process, but it might change if the underlying cluster management
|
// process, but it might change if the underlying cluster management
|
||||||
// software can change it dynamically.
|
// software can change it dynamically. If the underlying call fails, a default
|
||||||
|
// value (e.g. `4`) may be returned.
|
||||||
int NumSchedulableCPUs();
|
int NumSchedulableCPUs();
|
||||||
|
|
||||||
|
// Returns the total number of CPUs on the system. This number should
|
||||||
|
// not change even if the underlying cluster management software may
|
||||||
|
// change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the
|
||||||
|
// underlying call fails, an invalid value of -1 will be returned;
|
||||||
|
// the user must check for validity.
|
||||||
|
static constexpr int kUnknownCPU = -1;
|
||||||
|
int NumTotalCPUs();
|
||||||
|
|
||||||
|
// Returns the id of the current CPU. Returns -1 if the current CPU cannot be
|
||||||
|
// identified. If successful, the return value will be in [0, NumTotalCPUs()).
|
||||||
|
int GetCurrentCPU();
|
||||||
|
|
||||||
// Returns an estimate of the number of hyperthreads per physical core
|
// Returns an estimate of the number of hyperthreads per physical core
|
||||||
// on the CPU
|
// on the CPU
|
||||||
int NumHyperthreadsPerCore();
|
int NumHyperthreadsPerCore();
|
||||||
|
@ -33,6 +33,12 @@ TEST(Port, AlignedMalloc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Port, GetCurrentCPU) {
|
||||||
|
const int cpu = GetCurrentCPU();
|
||||||
|
EXPECT_GE(cpu, 0);
|
||||||
|
EXPECT_LT(cpu, NumTotalCPUs());
|
||||||
|
}
|
||||||
|
|
||||||
TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
|
TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
|
||||||
mutex m;
|
mutex m;
|
||||||
mutex_lock l(m);
|
mutex_lock l(m);
|
||||||
@ -78,3 +84,9 @@ TEST(TestCPUFeature, TestFeature) {
|
|||||||
|
|
||||||
} // namespace port
|
} // namespace port
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
// On Linux, add: FLAGS_logtostderr = true;
|
||||||
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
||||||
|
@ -25,7 +25,14 @@ limitations under the License.
|
|||||||
#if defined(__linux__) && !defined(__ANDROID__)
|
#if defined(__linux__) && !defined(__ANDROID__)
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
|
#else
|
||||||
|
#include <sys/syscall.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__APPLE__) && (__x86_64__ || __i386__)
|
||||||
|
#include <cpuid.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -69,6 +76,34 @@ int NumSchedulableCPUs() {
|
|||||||
return kDefaultCores;
|
return kDefaultCores;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int NumTotalCPUs() {
|
||||||
|
int count = absl::base_internal::NumCPUs();
|
||||||
|
return (count == 0) ? kUnknownCPU : count;
|
||||||
|
}
|
||||||
|
|
||||||
|
int GetCurrentCPU() {
|
||||||
|
#if defined(__linux__) && !defined(__ANDROID__)
|
||||||
|
return sched_getcpu();
|
||||||
|
#elif defined(__cpuid_count)
|
||||||
|
// Attempt to use cpuid on all other platforms. If that fails, perform a
|
||||||
|
// syscall.
|
||||||
|
uint32_t eax, ebx, ecx, edx;
|
||||||
|
__cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx);
|
||||||
|
if ((edx & (1 << 9)) != 0) {
|
||||||
|
// EBX bits 24-31 are APIC ID
|
||||||
|
return static_cast<unsigned int>(ebx >> 24);
|
||||||
|
}
|
||||||
|
#elif defined(__NR_getcpu)
|
||||||
|
unsigned int cpu;
|
||||||
|
if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
|
||||||
|
return kUnknownCPU;
|
||||||
|
} else {
|
||||||
|
return static_cast<int>(cpu);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return kUnknownCPU;
|
||||||
|
}
|
||||||
|
|
||||||
int NumHyperthreadsPerCore() {
|
int NumHyperthreadsPerCore() {
|
||||||
static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
|
static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
|
||||||
return (ht_per_core > 0) ? ht_per_core : 1;
|
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||||
@ -83,9 +118,7 @@ int NUMANumNodes() { return 1; }
|
|||||||
|
|
||||||
void NUMASetThreadNodeAffinity(int node) {}
|
void NUMASetThreadNodeAffinity(int node) {}
|
||||||
|
|
||||||
int NUMAGetThreadNodeAffinity() {
|
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
|
||||||
return kNUMANoAffinity;
|
|
||||||
}
|
|
||||||
|
|
||||||
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
|
#include <processthreadsapi.h>
|
||||||
#include <shlwapi.h>
|
#include <shlwapi.h>
|
||||||
|
|
||||||
#include "tensorflow/core/platform/cpu_info.h"
|
#include "tensorflow/core/platform/cpu_info.h"
|
||||||
@ -54,6 +55,30 @@ int NumSchedulableCPUs() {
|
|||||||
return system_info.dwNumberOfProcessors;
|
return system_info.dwNumberOfProcessors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int NumTotalCPUs() {
|
||||||
|
// TODO(ebrevdo): Make this more accurate.
|
||||||
|
//
|
||||||
|
// This only returns the number of processors in the current
|
||||||
|
// processor group; which may be undercounting if you have more than 64 cores.
|
||||||
|
// For that case, one needs to call
|
||||||
|
// GetLogicalProcessorInformationEx(RelationProcessorCore, ...) and accumulate
|
||||||
|
// the Size fields by iterating over the written-to buffer. Since I can't
|
||||||
|
// easily test this on Windows, I'm deferring this to someone who can!
|
||||||
|
//
|
||||||
|
// If you fix this, also consider updatig GetCurrentCPU below.
|
||||||
|
return NumSchedulableCPUs();
|
||||||
|
}
|
||||||
|
|
||||||
|
int GetCurrentCPU() {
|
||||||
|
// NOTE(ebrevdo): This returns the processor number within the processor
|
||||||
|
// group on systems with >64 processors. Therefore it doesn't necessarily map
|
||||||
|
// naturally to an index in NumSchedulableCPUs().
|
||||||
|
//
|
||||||
|
// On the plus side, this number is probably guaranteed to be within
|
||||||
|
// [0, NumTotalCPUs()) due to its incomplete implementation.
|
||||||
|
return GetCurrentProcessorNumber();
|
||||||
|
}
|
||||||
|
|
||||||
bool NUMAEnabled() {
|
bool NUMAEnabled() {
|
||||||
// Not yet implemented: coming soon.
|
// Not yet implemented: coming soon.
|
||||||
return false;
|
return false;
|
||||||
|
Loading…
Reference in New Issue
Block a user