Add posix implementation for platform/numa.h functions, relying
on hwloc. PiperOrigin-RevId: 235742876
This commit is contained in:
parent
9c01b03c4e
commit
a6bf9c8476
1
.bazelrc
1
.bazelrc
@ -67,6 +67,7 @@ build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
|
|||||||
build:gdr --define=with_gdr_support=true
|
build:gdr --define=with_gdr_support=true
|
||||||
build:ngraph --define=with_ngraph_support=true
|
build:ngraph --define=with_ngraph_support=true
|
||||||
build:verbs --define=with_verbs_support=true
|
build:verbs --define=with_verbs_support=true
|
||||||
|
build:numa --define=with_numa_support=true
|
||||||
|
|
||||||
# Options to disable default on features
|
# Options to disable default on features
|
||||||
build:noaws --define=no_aws_support=true
|
build:noaws --define=no_aws_support=true
|
||||||
|
@ -1751,6 +1751,7 @@ def main():
|
|||||||
config_info_line('gdr', 'Build with GDR support.')
|
config_info_line('gdr', 'Build with GDR support.')
|
||||||
config_info_line('verbs', 'Build with libverbs support.')
|
config_info_line('verbs', 'Build with libverbs support.')
|
||||||
config_info_line('ngraph', 'Build with Intel nGraph support.')
|
config_info_line('ngraph', 'Build with Intel nGraph support.')
|
||||||
|
config_info_line('numa', 'Build with NUMA support.')
|
||||||
config_info_line(
|
config_info_line(
|
||||||
'dynamic_kernels',
|
'dynamic_kernels',
|
||||||
'(Experimental) Build kernels into separate shared objects.')
|
'(Experimental) Build kernels into separate shared objects.')
|
||||||
|
@ -304,6 +304,12 @@ config_setting(
|
|||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "with_numa_support",
|
||||||
|
define_values = {"with_numa_support": "true"},
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
# Crosses between framework_shared_object and a bunch of other configurations
|
# Crosses between framework_shared_object and a bunch of other configurations
|
||||||
# due to limitations in nested select() statements.
|
# due to limitations in nested select() statements.
|
||||||
config_setting(
|
config_setting(
|
||||||
|
@ -128,6 +128,9 @@ load(
|
|||||||
"tf_additional_libdevice_srcs",
|
"tf_additional_libdevice_srcs",
|
||||||
"tf_additional_minimal_lib_srcs",
|
"tf_additional_minimal_lib_srcs",
|
||||||
"tf_additional_mpi_lib_defines",
|
"tf_additional_mpi_lib_defines",
|
||||||
|
"tf_additional_numa_deps",
|
||||||
|
"tf_additional_numa_lib_defines",
|
||||||
|
"tf_additional_numa_copts",
|
||||||
"tf_additional_proto_hdrs",
|
"tf_additional_proto_hdrs",
|
||||||
"tf_additional_proto_srcs",
|
"tf_additional_proto_srcs",
|
||||||
"tf_additional_test_deps",
|
"tf_additional_test_deps",
|
||||||
@ -388,15 +391,15 @@ cc_library(
|
|||||||
":platform_port_hdrs",
|
":platform_port_hdrs",
|
||||||
":platform_port_internal_hdrs",
|
":platform_port_internal_hdrs",
|
||||||
],
|
],
|
||||||
copts = tf_copts(),
|
copts = tf_copts() + tf_additional_numa_copts(),
|
||||||
visibility = ["//tensorflow/core:__subpackages__"],
|
visibility = ["//tensorflow/core:__subpackages__"],
|
||||||
deps = [
|
deps = [
|
||||||
":lib_platform",
|
":lib_platform",
|
||||||
":platform_base",
|
":platform_base",
|
||||||
"//tensorflow/core/platform/default/build_config:port",
|
|
||||||
"@com_google_absl//absl/base",
|
"@com_google_absl//absl/base",
|
||||||
|
"//tensorflow/core/platform/default/build_config:port",
|
||||||
"@snappy",
|
"@snappy",
|
||||||
],
|
] + tf_additional_numa_deps(),
|
||||||
)
|
)
|
||||||
|
|
||||||
filegroup(
|
filegroup(
|
||||||
@ -2278,11 +2281,14 @@ LIB_INTERNAL_PUBLIC_HEADERS = tf_additional_lib_hdrs() + [
|
|||||||
]
|
]
|
||||||
|
|
||||||
# Replicated for lib_internal and lib_internal_impl.
|
# Replicated for lib_internal and lib_internal_impl.
|
||||||
LIB_INTERNAL_DEFINES = (tf_additional_lib_defines() + [
|
LIB_INTERNAL_DEFINES = (
|
||||||
"TF_USE_SNAPPY",
|
tf_additional_lib_defines() + [
|
||||||
] + tf_additional_verbs_lib_defines() +
|
"TF_USE_SNAPPY",
|
||||||
tf_additional_mpi_lib_defines() +
|
] + tf_additional_verbs_lib_defines() +
|
||||||
tf_additional_gdr_lib_defines())
|
tf_additional_mpi_lib_defines() +
|
||||||
|
tf_additional_gdr_lib_defines() +
|
||||||
|
tf_additional_numa_lib_defines()
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "lib_internal",
|
name = "lib_internal",
|
||||||
@ -2355,19 +2361,20 @@ cc_library(
|
|||||||
copts = tf_copts(),
|
copts = tf_copts(),
|
||||||
defines = LIB_INTERNAL_DEFINES,
|
defines = LIB_INTERNAL_DEFINES,
|
||||||
deps = tf_additional_lib_deps() + [
|
deps = tf_additional_lib_deps() + [
|
||||||
":lib_hash_crc32c_accelerate_internal",
|
":lib_hash_crc32c_accelerate_internal",
|
||||||
":lib_proto_parsing",
|
":lib_proto_parsing",
|
||||||
":abi",
|
":abi",
|
||||||
":core_stringpiece",
|
":core_stringpiece",
|
||||||
"@com_google_absl//absl/memory",
|
"@com_google_absl//absl/memory",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
"//third_party/eigen3",
|
"//third_party/eigen3",
|
||||||
"//tensorflow/core/platform/default/build_config:platformlib",
|
"//tensorflow/core/platform/default/build_config:platformlib",
|
||||||
"@snappy",
|
"@snappy",
|
||||||
"@zlib_archive//:zlib",
|
"@zlib_archive//:zlib",
|
||||||
"@double_conversion//:double-conversion",
|
"@double_conversion//:double-conversion",
|
||||||
"@protobuf_archive//:protobuf",
|
"@protobuf_archive//:protobuf",
|
||||||
] + tf_protos_all_impl() + tf_protos_grappler_impl(),
|
] + tf_protos_all_impl() + tf_protos_grappler_impl() +
|
||||||
|
tf_additional_numa_deps(),
|
||||||
)
|
)
|
||||||
|
|
||||||
# File compiled with extra flags to get cpu-specific acceleration.
|
# File compiled with extra flags to get cpu-specific acceleration.
|
||||||
|
@ -725,6 +725,12 @@ def tf_additional_gdr_lib_defines():
|
|||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def tf_additional_numa_lib_defines():
|
||||||
|
return select({
|
||||||
|
"//tensorflow:with_numa_support": ["TENSORFLOW_USE_NUMA"],
|
||||||
|
"//conditions:default": [],
|
||||||
|
})
|
||||||
|
|
||||||
def tf_py_clif_cc(name, visibility = None, **kwargs):
|
def tf_py_clif_cc(name, visibility = None, **kwargs):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -757,3 +763,26 @@ def tf_additional_binary_deps():
|
|||||||
"//third_party/mkl:intel_binary_blob",
|
"//third_party/mkl:intel_binary_blob",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def tf_additional_numa_deps():
|
||||||
|
return select({
|
||||||
|
"//tensorflow:android": [],
|
||||||
|
"//tensorflow:ios": [],
|
||||||
|
"//tensorflow:windows": [],
|
||||||
|
"//tensorflow:darwin": [],
|
||||||
|
"//conditions:default": [
|
||||||
|
"@hwloc",
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
def tf_additional_numa_copts():
|
||||||
|
return select({
|
||||||
|
"//tensorflow:android": [],
|
||||||
|
"//tensorflow:ios": [],
|
||||||
|
"//tensorflow:windows": [],
|
||||||
|
"//tensorflow:darwin": [],
|
||||||
|
"//conditions:default": [
|
||||||
|
"-Ithird_party/hwloc/hwloc-master/include",
|
||||||
|
"-DTENSORFLOW_USE_NUMA",
|
||||||
|
],
|
||||||
|
})
|
||||||
|
@ -45,6 +45,10 @@ limitations under the License.
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if TENSORFLOW_USE_NUMA
|
||||||
|
#include "hwloc.h" // TF:hwloc
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace port {
|
namespace port {
|
||||||
|
|
||||||
@ -115,16 +119,94 @@ int NumHyperthreadsPerCore() {
|
|||||||
return (ht_per_core > 0) ? ht_per_core : 1;
|
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool NUMAEnabled() {
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
// Not yet implemented: coming soon.
|
namespace {
|
||||||
return false;
|
static hwloc_topology_t hwloc_topology_handle;
|
||||||
|
|
||||||
|
bool HaveHWLocTopology() {
|
||||||
|
// One time initialization
|
||||||
|
static bool init = []() {
|
||||||
|
if (hwloc_topology_init(&hwloc_topology_handle)) {
|
||||||
|
LOG(ERROR) << "Call to hwloc_topology_init() failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hwloc_topology_load(hwloc_topology_handle)) {
|
||||||
|
LOG(ERROR) << "Call to hwloc_topology_load() failed";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}();
|
||||||
|
return init;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NUMANumNodes() { return 1; }
|
// Return the first hwloc object of the given type whose os_index
|
||||||
|
// matches 'index'.
|
||||||
|
hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
|
||||||
|
hwloc_obj_t obj = nullptr;
|
||||||
|
if (index >= 0) {
|
||||||
|
while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
|
||||||
|
nullptr) {
|
||||||
|
if (obj->os_index == index) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
|
|
||||||
void NUMASetThreadNodeAffinity(int node) {}
|
bool NUMAEnabled() { return (NUMANumNodes() > 1); }
|
||||||
|
|
||||||
int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
|
int NUMANumNodes() {
|
||||||
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
|
if (HaveHWLocTopology()) {
|
||||||
|
int num_numanodes =
|
||||||
|
hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
|
||||||
|
return std::max(1, num_numanodes);
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
return 1;
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
|
}
|
||||||
|
|
||||||
|
void NUMASetThreadNodeAffinity(int node) {
|
||||||
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
|
if (HaveHWLocTopology()) {
|
||||||
|
// Find the corresponding NUMA node topology object.
|
||||||
|
hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
|
||||||
|
if (obj) {
|
||||||
|
hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
|
||||||
|
HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
|
||||||
|
} else {
|
||||||
|
LOG(ERROR) << "Could not find hwloc NUMA node " << node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
|
}
|
||||||
|
|
||||||
|
int NUMAGetThreadNodeAffinity() {
|
||||||
|
int node_index = kNUMANoAffinity;
|
||||||
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
|
if (HaveHWLocTopology()) {
|
||||||
|
hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
|
||||||
|
hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
|
||||||
|
HWLOC_CPUBIND_THREAD);
|
||||||
|
hwloc_obj_t obj = nullptr;
|
||||||
|
// Return the first NUMA node whose cpuset is a (non-proper) superset of
|
||||||
|
// that of the current thread.
|
||||||
|
while ((obj = hwloc_get_next_obj_by_type(
|
||||||
|
hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
|
||||||
|
if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
|
||||||
|
node_index = obj->os_index;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hwloc_bitmap_free(thread_cpuset);
|
||||||
|
}
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
|
return node_index;
|
||||||
|
}
|
||||||
|
|
||||||
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
@ -154,12 +236,54 @@ void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
|
|||||||
void Free(void* ptr) { free(ptr); }
|
void Free(void* ptr) { free(ptr); }
|
||||||
|
|
||||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||||
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
|
if (HaveHWLocTopology()) {
|
||||||
|
hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
|
||||||
|
if (numa_node) {
|
||||||
|
return hwloc_alloc_membind(hwloc_topology_handle, size,
|
||||||
|
numa_node->nodeset, HWLOC_MEMBIND_BIND,
|
||||||
|
HWLOC_MEMBIND_BYNODESET);
|
||||||
|
} else {
|
||||||
|
LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
return AlignedMalloc(size, minimum_alignment);
|
return AlignedMalloc(size, minimum_alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
void NUMAFree(void* ptr, size_t size) { Free(ptr); }
|
void NUMAFree(void* ptr, size_t size) {
|
||||||
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
|
if (HaveHWLocTopology()) {
|
||||||
|
hwloc_free(hwloc_topology_handle, ptr, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
|
Free(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
|
int NUMAGetMemAffinity(const void* addr) {
|
||||||
|
int node = kNUMANoAffinity;
|
||||||
|
#ifdef TENSORFLOW_USE_NUMA
|
||||||
|
if (HaveHWLocTopology() && addr) {
|
||||||
|
hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
|
||||||
|
if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
|
||||||
|
HWLOC_MEMBIND_BYNODESET)) {
|
||||||
|
hwloc_obj_t obj = nullptr;
|
||||||
|
while ((obj = hwloc_get_next_obj_by_type(
|
||||||
|
hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
|
||||||
|
if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
|
||||||
|
node = obj->os_index;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hwloc_bitmap_free(nodeset);
|
||||||
|
} else {
|
||||||
|
LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // TENSORFLOW_USE_NUMA
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
|
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -130,6 +130,7 @@ genrule(
|
|||||||
"@gemmlowp//:LICENSE",
|
"@gemmlowp//:LICENSE",
|
||||||
"@gif_archive//:COPYING",
|
"@gif_archive//:COPYING",
|
||||||
"@highwayhash//:LICENSE",
|
"@highwayhash//:LICENSE",
|
||||||
|
"@hwloc//:LICENSE",
|
||||||
"@icu//:icu4c/LICENSE",
|
"@icu//:icu4c/LICENSE",
|
||||||
"@jpeg//:LICENSE.md",
|
"@jpeg//:LICENSE.md",
|
||||||
"@llvm//:LICENSE.TXT",
|
"@llvm//:LICENSE.TXT",
|
||||||
@ -199,6 +200,7 @@ genrule(
|
|||||||
"@gemmlowp//:LICENSE",
|
"@gemmlowp//:LICENSE",
|
||||||
"@gif_archive//:COPYING",
|
"@gif_archive//:COPYING",
|
||||||
"@highwayhash//:LICENSE",
|
"@highwayhash//:LICENSE",
|
||||||
|
"@hwloc//:LICENSE",
|
||||||
"@icu//:icu4j/main/shared/licenses/LICENSE",
|
"@icu//:icu4j/main/shared/licenses/LICENSE",
|
||||||
"@jpeg//:LICENSE.md",
|
"@jpeg//:LICENSE.md",
|
||||||
"@llvm//:LICENSE.TXT",
|
"@llvm//:LICENSE.TXT",
|
||||||
|
@ -171,6 +171,7 @@ filegroup(
|
|||||||
"@gemmlowp//:LICENSE",
|
"@gemmlowp//:LICENSE",
|
||||||
"@gif_archive//:COPYING",
|
"@gif_archive//:COPYING",
|
||||||
"@highwayhash//:LICENSE",
|
"@highwayhash//:LICENSE",
|
||||||
|
"@hwloc//:LICENSE",
|
||||||
"@icu//:icu4c/LICENSE",
|
"@icu//:icu4c/LICENSE",
|
||||||
"@jpeg//:LICENSE.md",
|
"@jpeg//:LICENSE.md",
|
||||||
"@keras_applications_archive//:LICENSE",
|
"@keras_applications_archive//:LICENSE",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user