From 92511b345c7fb72217cdb36bc0251596a9c68319 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 20 Jun 2019 07:25:00 -0700 Subject: [PATCH] Adds a MaxParallelism() method returning a suggested maximum parallelism for a specific numa node. PiperOrigin-RevId: 254191552 --- tensorflow/core/common_runtime/local_device.cc | 8 +------- tensorflow/core/platform/cpu_info.h | 5 +++++ tensorflow/core/platform/posix/port.cc | 10 ++++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc index 835463eab55..84b3af721da 100644 --- a/tensorflow/core/common_runtime/local_device.cc +++ b/tensorflow/core/common_runtime/local_device.cc @@ -95,13 +95,7 @@ struct LocalDevice::EigenThreadPoolInfo { intra_op_parallelism_threads = env_num_threads; // If no session setting or environment, compute a reasonable default. if (intra_op_parallelism_threads == 0) { - intra_op_parallelism_threads = port::NumSchedulableCPUs(); - if (numa_node != port::kNUMANoAffinity) { - // Assume that CPUs are equally distributed over available NUMA nodes. - // This may not be true, but there isn't currently a better way of - // determining the number of CPUs specific to the requested node. - intra_op_parallelism_threads /= port::NUMANumNodes(); - } + intra_op_parallelism_threads = port::MaxParallelism(numa_node); } } ThreadOptions thread_opts; diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h index fd76047edc5..b2d0f21fe7a 100644 --- a/tensorflow/core/platform/cpu_info.h +++ b/tensorflow/core/platform/cpu_info.h @@ -46,6 +46,11 @@ int NumSchedulableCPUs(); // called during initialization, i.e., before before main() has started. int MaxParallelism(); +// Returns an estimate for the maximum parallelism for this process on the +// provided numa node, or any numa node if `numa_node` is kNUMANoAffinity. +// See MaxParallelism() for more information. +int MaxParallelism(int numa_node); + // Returns the total number of CPUs on the system. This number should // not change even if the underlying cluster management software may // change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 13a904295c1..a3699de965a 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -82,6 +82,16 @@ int NumSchedulableCPUs() { int MaxParallelism() { return NumSchedulableCPUs(); } +int MaxParallelism(int numa_node) { + if (numa_node != port::kNUMANoAffinity) { + // Assume that CPUs are equally distributed over available NUMA nodes. + // This may not be true, but there isn't currently a better way of + // determining the number of CPUs specific to the requested node. + return NumSchedulableCPUs() / port::NUMANumNodes(); + } + return NumSchedulableCPUs(); +} + int NumTotalCPUs() { int count = absl::base_internal::NumCPUs(); return (count <= 0) ? kUnknownCPU : count;