diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index 6eba83224a4..c9208cc7553 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -32,9 +32,22 @@ namespace port { // Returns an estimate of the number of schedulable CPUs for this // process. Usually, it's constant throughout the lifetime of a // process, but it might change if the underlying cluster management -// software can change it dynamically. +// software can change it dynamically. If the underlying call fails, a default +// value (e.g. `4`) may be returned. int NumSchedulableCPUs(); +// Returns the total number of CPUs on the system. This number should +// not change even if the underlying cluster management software may +// change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the +// underlying call fails, an invalid value of -1 will be returned; +// the user must check for validity. +static constexpr int kUnknownCPU = -1; +int NumTotalCPUs(); + +// Returns the id of the current CPU. Returns -1 if the current CPU cannot be +// identified. If successful, the return value will be in [0, NumTotalCPUs()). +int GetCurrentCPU(); + // Returns an estimate of the number of hyperthreads per physical core // on the CPU int NumHyperthreadsPerCore(); diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc index 15c3cb24f04..9d144efbfd9 100644 --- a/tensorflow/core/platform/port_test.cc +++ b/tensorflow/core/platform/port_test.cc @@ -33,6 +33,12 @@ TEST(Port, AlignedMalloc) { } } +TEST(Port, GetCurrentCPU) { + const int cpu = GetCurrentCPU(); + EXPECT_GE(cpu, 0); + EXPECT_LT(cpu, NumTotalCPUs()); +} + TEST(ConditionVariable, WaitForMilliseconds_Timeout) { mutex m; mutex_lock l(m); @@ -78,3 +84,9 @@ TEST(TestCPUFeature, TestFeature) { } // namespace port } // namespace tensorflow + +int main(int argc, char** argv) { + // On Linux, add: FLAGS_logtostderr = true; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index acdd7798ea9..0fac8b1a889 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -25,7 +25,14 @@ limitations under the License. #if defined(__linux__) && !defined(__ANDROID__) #include #include +#else +#include #endif + +#if !defined(__APPLE__) && (__x86_64__ || __i386__) +#include +#endif + #include #include #include @@ -69,6 +76,34 @@ int NumSchedulableCPUs() { return kDefaultCores; } +int NumTotalCPUs() { + int count = absl::base_internal::NumCPUs(); + return (count == 0) ? kUnknownCPU : count; +} + +int GetCurrentCPU() { +#if defined(__linux__) && !defined(__ANDROID__) + return sched_getcpu(); +#elif defined(__cpuid_count) + // Attempt to use cpuid on all other platforms. If that fails, perform a + // syscall. + uint32_t eax, ebx, ecx, edx; + __cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx); + if ((edx & (1 << 9)) != 0) { + // EBX bits 24-31 are APIC ID + return static_cast(ebx >> 24); + } +#elif defined(__NR_getcpu) + unsigned int cpu; + if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) { + return kUnknownCPU; + } else { + return static_cast(cpu); + } +#endif + return kUnknownCPU; +} + int NumHyperthreadsPerCore() { static const int ht_per_core = tensorflow::port::CPUIDNumSMT(); return (ht_per_core > 0) ? ht_per_core : 1; @@ -83,9 +118,7 @@ int NUMANumNodes() { return 1; } void NUMASetThreadNodeAffinity(int node) {} -int NUMAGetThreadNodeAffinity() { - return kNUMANoAffinity; -} +int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; } void* AlignedMalloc(size_t size, int minimum_alignment) { #if defined(__ANDROID__) diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc index 911ea1902f8..b902c85cdcf 100644 --- a/tensorflow/core/platform/windows/port.cc +++ b/tensorflow/core/platform/windows/port.cc @@ -21,6 +21,7 @@ limitations under the License. #endif #include +#include #include #include "tensorflow/core/platform/cpu_info.h" @@ -54,6 +55,30 @@ int NumSchedulableCPUs() { return system_info.dwNumberOfProcessors; } +int NumTotalCPUs() { + // TODO(ebrevdo): Make this more accurate. + // + // This only returns the number of processors in the current + // processor group; which may be undercounting if you have more than 64 cores. + // For that case, one needs to call + // GetLogicalProcessorInformationEx(RelationProcessorCore, ...) and accumulate + // the Size fields by iterating over the written-to buffer. Since I can't + // easily test this on Windows, I'm deferring this to someone who can! + // + // If you fix this, also consider updatig GetCurrentCPU below. + return NumSchedulableCPUs(); +} + +int GetCurrentCPU() { + // NOTE(ebrevdo): This returns the processor number within the processor + // group on systems with >64 processors. Therefore it doesn't necessarily map + // naturally to an index in NumSchedulableCPUs(). + // + // On the plus side, this number is probably guaranteed to be within + // [0, NumTotalCPUs()) due to its incomplete implementation. + return GetCurrentProcessorNumber(); +} + bool NUMAEnabled() { // Not yet implemented: coming soon. return false;