[TF port] Add port::GetCurrentCPU and port::NumTotalCPUs.
GetCurrentCPU: returns the current CPU of the calling thread. NumTotalCPUs: attempts to get the total number of physical cores on the system When both return non-failing values, we expect 0 <= GetCurrentCPU < NumTotalCPUs. PiperOrigin-RevId: 225088316
This commit is contained in:
parent
2087bffc23
commit
9748092a5d
@ -32,9 +32,22 @@ namespace port {
|
||||
// Returns an estimate of the number of schedulable CPUs for this
|
||||
// process. Usually, it's constant throughout the lifetime of a
|
||||
// process, but it might change if the underlying cluster management
|
||||
// software can change it dynamically.
|
||||
// software can change it dynamically. If the underlying call fails, a default
|
||||
// value (e.g. `4`) may be returned.
|
||||
int NumSchedulableCPUs();
|
||||
|
||||
// Returns the total number of CPUs on the system. This number should
|
||||
// not change even if the underlying cluster management software may
|
||||
// change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the
|
||||
// underlying call fails, an invalid value of -1 will be returned;
|
||||
// the user must check for validity.
|
||||
static constexpr int kUnknownCPU = -1;
|
||||
int NumTotalCPUs();
|
||||
|
||||
// Returns the id of the current CPU. Returns -1 if the current CPU cannot be
|
||||
// identified. If successful, the return value will be in [0, NumTotalCPUs()).
|
||||
int GetCurrentCPU();
|
||||
|
||||
// Returns an estimate of the number of hyperthreads per physical core
|
||||
// on the CPU
|
||||
int NumHyperthreadsPerCore();
|
||||
|
@ -33,6 +33,12 @@ TEST(Port, AlignedMalloc) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Port, GetCurrentCPU) {
|
||||
const int cpu = GetCurrentCPU();
|
||||
EXPECT_GE(cpu, 0);
|
||||
EXPECT_LT(cpu, NumTotalCPUs());
|
||||
}
|
||||
|
||||
TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
|
||||
mutex m;
|
||||
mutex_lock l(m);
|
||||
@ -78,3 +84,9 @@ TEST(TestCPUFeature, TestFeature) {
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
// On Linux, add: FLAGS_logtostderr = true;
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
@ -25,7 +25,14 @@ limitations under the License.
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#include <sched.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#else
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__APPLE__) && (__x86_64__ || __i386__)
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -69,6 +76,34 @@ int NumSchedulableCPUs() {
|
||||
return kDefaultCores;
|
||||
}
|
||||
|
||||
int NumTotalCPUs() {
|
||||
int count = absl::base_internal::NumCPUs();
|
||||
return (count == 0) ? kUnknownCPU : count;
|
||||
}
|
||||
|
||||
int GetCurrentCPU() {
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
return sched_getcpu();
|
||||
#elif defined(__cpuid_count)
|
||||
// Attempt to use cpuid on all other platforms. If that fails, perform a
|
||||
// syscall.
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
__cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx);
|
||||
if ((edx & (1 << 9)) != 0) {
|
||||
// EBX bits 24-31 are APIC ID
|
||||
return static_cast<unsigned int>(ebx >> 24);
|
||||
}
|
||||
#elif defined(__NR_getcpu)
|
||||
unsigned int cpu;
|
||||
if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
|
||||
return kUnknownCPU;
|
||||
} else {
|
||||
return static_cast<int>(cpu);
|
||||
}
|
||||
#endif
|
||||
return kUnknownCPU;
|
||||
}
|
||||
|
||||
int NumHyperthreadsPerCore() {
|
||||
static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
|
||||
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||
@ -83,9 +118,7 @@ int NUMANumNodes() { return 1; }
|
||||
|
||||
void NUMASetThreadNodeAffinity(int node) {}
|
||||
|
||||
int NUMAGetThreadNodeAffinity() {
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
|
||||
|
||||
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
||||
#if defined(__ANDROID__)
|
||||
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||
#endif
|
||||
|
||||
#include <Windows.h>
|
||||
#include <processthreadsapi.h>
|
||||
#include <shlwapi.h>
|
||||
|
||||
#include "tensorflow/core/platform/cpu_info.h"
|
||||
@ -54,6 +55,30 @@ int NumSchedulableCPUs() {
|
||||
return system_info.dwNumberOfProcessors;
|
||||
}
|
||||
|
||||
int NumTotalCPUs() {
|
||||
// TODO(ebrevdo): Make this more accurate.
|
||||
//
|
||||
// This only returns the number of processors in the current
|
||||
// processor group; which may be undercounting if you have more than 64 cores.
|
||||
// For that case, one needs to call
|
||||
// GetLogicalProcessorInformationEx(RelationProcessorCore, ...) and accumulate
|
||||
// the Size fields by iterating over the written-to buffer. Since I can't
|
||||
// easily test this on Windows, I'm deferring this to someone who can!
|
||||
//
|
||||
// If you fix this, also consider updatig GetCurrentCPU below.
|
||||
return NumSchedulableCPUs();
|
||||
}
|
||||
|
||||
int GetCurrentCPU() {
|
||||
// NOTE(ebrevdo): This returns the processor number within the processor
|
||||
// group on systems with >64 processors. Therefore it doesn't necessarily map
|
||||
// naturally to an index in NumSchedulableCPUs().
|
||||
//
|
||||
// On the plus side, this number is probably guaranteed to be within
|
||||
// [0, NumTotalCPUs()) due to its incomplete implementation.
|
||||
return GetCurrentProcessorNumber();
|
||||
}
|
||||
|
||||
bool NUMAEnabled() {
|
||||
// Not yet implemented: coming soon.
|
||||
return false;
|
||||
|
Loading…
Reference in New Issue
Block a user