Begin introducing NUMA support for CPU threads and memory
by extending the core/platform API with some basic functionality. The new functions allow: 1. Determining how many NUMA nodes are available. 2. Setting the executing thread to be bound to a particular node, or not bound at all. 3. Allocating memory affiliated with a particular node. This change introduces the API only, there is not yet a real implementation. PiperOrigin-RevId: 204042160
This commit is contained in:
parent
7903d8d3f4
commit
5c7a6fba35
tensorflow/core
@ -333,6 +333,7 @@ filegroup(
|
||||
"platform/init_main.h",
|
||||
"platform/mem.h",
|
||||
"platform/mutex.h",
|
||||
"platform/numa.h",
|
||||
"platform/thread_annotations.h",
|
||||
],
|
||||
visibility = ["//visibility:private"],
|
||||
@ -3235,6 +3236,28 @@ tf_cc_test(
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "platform_numa_test",
|
||||
size = "small",
|
||||
srcs = ["platform/numa_test.cc"],
|
||||
tags = [
|
||||
# This test will not pass unless it has access to all NUMA nodes
|
||||
# on the executing machine.
|
||||
"manual",
|
||||
"notap",
|
||||
],
|
||||
deps = [
|
||||
":framework",
|
||||
":lib",
|
||||
":lib_internal",
|
||||
":lib_test_internal",
|
||||
":protos_all_cc",
|
||||
":test",
|
||||
":test_main",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test(
|
||||
name = "platform_setround_test",
|
||||
size = "small",
|
||||
|
62
tensorflow/core/platform/numa.h
Normal file
62
tensorflow/core/platform/numa.h
Normal file
@ -0,0 +1,62 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_PLATFORM_NUMA_H_
|
||||
#define TENSORFLOW_CORE_PLATFORM_NUMA_H_
|
||||
|
||||
#include "tensorflow/core/platform/platform.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace port {
|
||||
|
||||
// Returns true iff NUMA functions are supported.
|
||||
bool NUMAEnabled();
|
||||
|
||||
// Returns the number of NUMA nodes present with respect to CPU operations.
|
||||
// Typically this will be the number of sockets where some RAM has greater
|
||||
// affinity with one socket than another.
|
||||
int NUMANumNodes();
|
||||
|
||||
static const int kNUMANoAffinity = -1;
|
||||
|
||||
// If possible sets affinity of the current thread to the specified NUMA node.
|
||||
// If node == kNUMANoAffinity removes affinity to any particular node.
|
||||
void NUMASetThreadNodeAffinity(int node);
|
||||
|
||||
// Returns NUMA node affinity of the current thread, kNUMANoAffinity if none.
|
||||
int NUMAGetThreadNodeAffinity();
|
||||
|
||||
// Like AlignedMalloc, but allocates memory with affinity to the specified NUMA
|
||||
// node.
|
||||
//
|
||||
// Notes:
|
||||
// 1. node must be >= 0 and < NUMANumNodes.
|
||||
// 1. minimum_alignment must a factor of system page size, the memory
|
||||
// returned will be page-aligned.
|
||||
// 2. This function is likely significantly slower than AlignedMalloc
|
||||
// and should not be used for lots of small allocations. It makes more
|
||||
// sense as a backing allocator for BFCAllocator, PoolAllocator, or similar.
|
||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment);
|
||||
|
||||
// Memory allocated by NUMAMalloc must be freed via NUMAFree.
|
||||
void NUMAFree(void* ptr, size_t size);
|
||||
|
||||
// Returns NUMA node affinity of memory address, kNUMANoAffinity if none.
|
||||
int NUMAGetMemAffinity(const void* ptr);
|
||||
|
||||
} // namespace port
|
||||
} // namespace tensorflow
|
||||
#endif // TENSORFLOW_CORE_PLATFORM_NUMA_H_
|
61
tensorflow/core/platform/numa_test.cc
Normal file
61
tensorflow/core/platform/numa_test.cc
Normal file
@ -0,0 +1,61 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/platform/numa.h"
|
||||
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace internal {
|
||||
|
||||
TEST(Numa, NumNodes) {
|
||||
if (port::NUMAEnabled()) {
|
||||
EXPECT_GE(port::NUMANumNodes(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Numa, Malloc) {
|
||||
if (port::NUMAEnabled()) {
|
||||
int num_nodes = port::NUMANumNodes();
|
||||
for (int request_node = 0; request_node < num_nodes; ++request_node) {
|
||||
void* ptr = port::NUMAMalloc(request_node, 8, 0);
|
||||
EXPECT_NE(ptr, nullptr);
|
||||
// Affinity cannot be tested until page is touched, so save a value.
|
||||
*(reinterpret_cast<int*>(ptr)) = 0;
|
||||
int affinity_node = port::NUMAGetMemAffinity(ptr);
|
||||
EXPECT_EQ(affinity_node, request_node);
|
||||
port::NUMAFree(ptr, 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Numa, SetNodeAffinity) {
|
||||
// NOTE(tucker): This test is not reliable when executed under tap because
|
||||
// the virtual machine may not have access to all of the availble NUMA
|
||||
// nodes. Not sure what to do about that.
|
||||
EXPECT_EQ(-1, port::NUMAGetThreadNodeAffinity());
|
||||
if (port::NUMAEnabled()) {
|
||||
int num_nodes = port::NUMANumNodes();
|
||||
for (int request_node = 0; request_node < num_nodes; ++request_node) {
|
||||
port::NUMASetThreadNodeAffinity(request_node);
|
||||
int affinity_node = port::NUMAGetThreadNodeAffinity();
|
||||
EXPECT_EQ(affinity_node, request_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace tensorflow
|
@ -24,6 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/cpu_info.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/mem.h"
|
||||
#include "tensorflow/core/platform/numa.h"
|
||||
#include "tensorflow/core/platform/snappy.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
@ -79,6 +80,19 @@ int NumHyperthreadsPerCore() {
|
||||
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||
}
|
||||
|
||||
bool NUMAEnabled() {
|
||||
// Not yet implemented: coming soon.
|
||||
return false;
|
||||
}
|
||||
|
||||
int NUMANumNodes() { return 1; }
|
||||
|
||||
void NUMASetThreadNodeAffinity(int node) {}
|
||||
|
||||
int NUMAGetThreadNodeAffinity() {
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
||||
#if defined(__ANDROID__)
|
||||
return memalign(minimum_alignment, size);
|
||||
@ -128,6 +142,16 @@ void Free(void* ptr) {
|
||||
#endif
|
||||
}
|
||||
|
||||
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||
return AlignedMalloc(size, minimum_alignment);
|
||||
}
|
||||
|
||||
void NUMAFree(void* ptr, size_t size) { Free(ptr); }
|
||||
|
||||
int NUMAGetMemAffinity(const void* addr) {
|
||||
return kNUMANoAffinity;
|
||||
}
|
||||
|
||||
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
|
||||
// No-op.
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user