[TF port] Add port::GetCurrentCPU and port::NumTotalCPUs.

GetCurrentCPU: returns the current CPU of the calling thread. NumTotalCPUs: attempts to get the total number of physical cores on the system When both return non-failing values, we expect 0 <= GetCurrentCPU < NumTotalCPUs. PiperOrigin-RevId: 225088316
2018-12-11 16:05:17 -08:00 · 2018-12-11 16:05:17 -08:00 · 9748092a5d
commit 9748092a5d
parent 2087bffc23
4 changed files with 87 additions and 4 deletions
--- a/tensorflow/core/platform/cpu_info.h
+++ b/tensorflow/core/platform/cpu_info.h
@ -32,9 +32,22 @@ namespace port {
 // Returns an estimate of the number of schedulable CPUs for this
 // process.  Usually, it's constant throughout the lifetime of a
 // process, but it might change if the underlying cluster management
-// software can change it dynamically.
+// software can change it dynamically.  If the underlying call fails, a default
 // value (e.g. `4`) may be returned.
 int NumSchedulableCPUs();
 // Returns the total number of CPUs on the system.  This number should
 // not change even if the underlying cluster management software may
 // change the number of schedulable CPUs.  Unlike `NumSchedulableCPUs`, if the
 // underlying call fails, an invalid value of -1 will be returned;
 // the user must check for validity.
 static constexpr int kUnknownCPU = -1;
 int NumTotalCPUs();
 // Returns the id of the current CPU.  Returns -1 if the current CPU cannot be
 // identified.  If successful, the return value will be in [0, NumTotalCPUs()).
 int GetCurrentCPU();
 // Returns an estimate of the number of hyperthreads per physical core
 // on the CPU
 int NumHyperthreadsPerCore();
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@ -33,6 +33,12 @@ TEST(Port, AlignedMalloc) {
  }
 }
 TEST(Port, GetCurrentCPU) {
  const int cpu = GetCurrentCPU();
  EXPECT_GE(cpu, 0);
  EXPECT_LT(cpu, NumTotalCPUs());
 }
 TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
  mutex m;
  mutex_lock l(m);
@ -78,3 +84,9 @@ TEST(TestCPUFeature, TestFeature) {
 }  // namespace port
 }  // namespace tensorflow
 int main(int argc, char** argv) {
  // On Linux, add: FLAGS_logtostderr = true;
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
 }
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@ -25,7 +25,14 @@ limitations under the License.
 #if defined(__linux__) && !defined(__ANDROID__)
 #include <sched.h>
 #include <sys/sysinfo.h>
 #else
 #include <sys/syscall.h>
 #endif
 #if !defined(__APPLE__) && (__x86_64__ || __i386__)
 #include <cpuid.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@ -69,6 +76,34 @@ int NumSchedulableCPUs() {
  return kDefaultCores;
 }
 int NumTotalCPUs() {
  int count = absl::base_internal::NumCPUs();
  return (count == 0) ? kUnknownCPU : count;
 }
 int GetCurrentCPU() {
 #if defined(__linux__) && !defined(__ANDROID__)
  return sched_getcpu();
 #elif defined(__cpuid_count)
  // Attempt to use cpuid on all other platforms.  If that fails, perform a
  // syscall.
  uint32_t eax, ebx, ecx, edx;
  __cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx);
  if ((edx & (1 << 9)) != 0) {
    // EBX bits 24-31 are APIC ID
    return static_cast<unsigned int>(ebx >> 24);
  }
 #elif defined(__NR_getcpu)
  unsigned int cpu;
  if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
    return kUnknownCPU;
  } else {
    return static_cast<int>(cpu);
  }
 #endif
  return kUnknownCPU;
 }
 int NumHyperthreadsPerCore() {
  static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
  return (ht_per_core > 0) ? ht_per_core : 1;
@ -83,9 +118,7 @@ int NUMANumNodes() { return 1; }
 void NUMASetThreadNodeAffinity(int node) {}
-int NUMAGetThreadNodeAffinity() {
+int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
  return kNUMANoAffinity;
 }
 void* AlignedMalloc(size_t size, int minimum_alignment) {
 #if defined(__ANDROID__)
--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@ -21,6 +21,7 @@ limitations under the License.
 #endif
 #include <Windows.h>
 #include <processthreadsapi.h>
 #include <shlwapi.h>
 #include "tensorflow/core/platform/cpu_info.h"
@ -54,6 +55,30 @@ int NumSchedulableCPUs() {
  return system_info.dwNumberOfProcessors;
 }
 int NumTotalCPUs() {
  // TODO(ebrevdo): Make this more accurate.
  //
  // This only returns the number of processors in the current
  // processor group; which may be undercounting if you have more than 64 cores.
  // For that case, one needs to call
  // GetLogicalProcessorInformationEx(RelationProcessorCore, ...) and accumulate
  // the Size fields by iterating over the written-to buffer.  Since I can't
  // easily test this on Windows, I'm deferring this to someone who can!
  //
  // If you fix this, also consider updatig GetCurrentCPU below.
  return NumSchedulableCPUs();
 }
 int GetCurrentCPU() {
  // NOTE(ebrevdo): This returns the processor number within the processor
  // group on systems with >64 processors.  Therefore it doesn't necessarily map
  // naturally to an index in NumSchedulableCPUs().
  //
  // On the plus side, this number is probably guaranteed to be within
  // [0, NumTotalCPUs()) due to its incomplete implementation.
  return GetCurrentProcessorNumber();
 }
 bool NUMAEnabled() {
  // Not yet implemented: coming soon.
  return false;