Begin introducing NUMA support for CPU threads and memory
by extending the core/platform API with some basic functionality. The new functions allow: 1. Determining how many NUMA nodes are available. 2. Setting the executing thread to be bound to a particular node, or not bound at all. 3. Allocating memory affiliated with a particular node. This change introduces the API only, there is not yet a real implementation. PiperOrigin-RevId: 204042160
This commit is contained in:
parent
7903d8d3f4
commit
5c7a6fba35
@ -333,6 +333,7 @@ filegroup(
|
|||||||
"platform/init_main.h",
|
"platform/init_main.h",
|
||||||
"platform/mem.h",
|
"platform/mem.h",
|
||||||
"platform/mutex.h",
|
"platform/mutex.h",
|
||||||
|
"platform/numa.h",
|
||||||
"platform/thread_annotations.h",
|
"platform/thread_annotations.h",
|
||||||
],
|
],
|
||||||
visibility = ["//visibility:private"],
|
visibility = ["//visibility:private"],
|
||||||
@ -3235,6 +3236,28 @@ tf_cc_test(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tf_cc_test(
|
||||||
|
name = "platform_numa_test",
|
||||||
|
size = "small",
|
||||||
|
srcs = ["platform/numa_test.cc"],
|
||||||
|
tags = [
|
||||||
|
# This test will not pass unless it has access to all NUMA nodes
|
||||||
|
# on the executing machine.
|
||||||
|
"manual",
|
||||||
|
"notap",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":framework",
|
||||||
|
":lib",
|
||||||
|
":lib_internal",
|
||||||
|
":lib_test_internal",
|
||||||
|
":protos_all_cc",
|
||||||
|
":test",
|
||||||
|
":test_main",
|
||||||
|
"//third_party/eigen3",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
tf_cc_test(
|
tf_cc_test(
|
||||||
name = "platform_setround_test",
|
name = "platform_setround_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
|
62
tensorflow/core/platform/numa.h
Normal file
62
tensorflow/core/platform/numa.h
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CORE_PLATFORM_NUMA_H_
|
||||||
|
#define TENSORFLOW_CORE_PLATFORM_NUMA_H_
|
||||||
|
|
||||||
|
#include "tensorflow/core/platform/platform.h"
|
||||||
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace port {
|
||||||
|
|
||||||
|
// Returns true iff NUMA functions are supported.
|
||||||
|
bool NUMAEnabled();
|
||||||
|
|
||||||
|
// Returns the number of NUMA nodes present with respect to CPU operations.
|
||||||
|
// Typically this will be the number of sockets where some RAM has greater
|
||||||
|
// affinity with one socket than another.
|
||||||
|
int NUMANumNodes();
|
||||||
|
|
||||||
|
static const int kNUMANoAffinity = -1;
|
||||||
|
|
||||||
|
// If possible sets affinity of the current thread to the specified NUMA node.
|
||||||
|
// If node == kNUMANoAffinity removes affinity to any particular node.
|
||||||
|
void NUMASetThreadNodeAffinity(int node);
|
||||||
|
|
||||||
|
// Returns NUMA node affinity of the current thread, kNUMANoAffinity if none.
|
||||||
|
int NUMAGetThreadNodeAffinity();
|
||||||
|
|
||||||
|
// Like AlignedMalloc, but allocates memory with affinity to the specified NUMA
|
||||||
|
// node.
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
// 1. node must be >= 0 and < NUMANumNodes.
|
||||||
|
// 1. minimum_alignment must a factor of system page size, the memory
|
||||||
|
// returned will be page-aligned.
|
||||||
|
// 2. This function is likely significantly slower than AlignedMalloc
|
||||||
|
// and should not be used for lots of small allocations. It makes more
|
||||||
|
// sense as a backing allocator for BFCAllocator, PoolAllocator, or similar.
|
||||||
|
void* NUMAMalloc(int node, size_t size, int minimum_alignment);
|
||||||
|
|
||||||
|
// Memory allocated by NUMAMalloc must be freed via NUMAFree.
|
||||||
|
void NUMAFree(void* ptr, size_t size);
|
||||||
|
|
||||||
|
// Returns NUMA node affinity of memory address, kNUMANoAffinity if none.
|
||||||
|
int NUMAGetMemAffinity(const void* ptr);
|
||||||
|
|
||||||
|
} // namespace port
|
||||||
|
} // namespace tensorflow
|
||||||
|
#endif // TENSORFLOW_CORE_PLATFORM_NUMA_H_
|
61
tensorflow/core/platform/numa_test.cc
Normal file
61
tensorflow/core/platform/numa_test.cc
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/core/platform/numa.h"
|
||||||
|
|
||||||
|
#include "tensorflow/core/platform/logging.h"
|
||||||
|
#include "tensorflow/core/platform/test.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
TEST(Numa, NumNodes) {
|
||||||
|
if (port::NUMAEnabled()) {
|
||||||
|
EXPECT_GE(port::NUMANumNodes(), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Numa, Malloc) {
|
||||||
|
if (port::NUMAEnabled()) {
|
||||||
|
int num_nodes = port::NUMANumNodes();
|
||||||
|
for (int request_node = 0; request_node < num_nodes; ++request_node) {
|
||||||
|
void* ptr = port::NUMAMalloc(request_node, 8, 0);
|
||||||
|
EXPECT_NE(ptr, nullptr);
|
||||||
|
// Affinity cannot be tested until page is touched, so save a value.
|
||||||
|
*(reinterpret_cast<int*>(ptr)) = 0;
|
||||||
|
int affinity_node = port::NUMAGetMemAffinity(ptr);
|
||||||
|
EXPECT_EQ(affinity_node, request_node);
|
||||||
|
port::NUMAFree(ptr, 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Numa, SetNodeAffinity) {
|
||||||
|
// NOTE(tucker): This test is not reliable when executed under tap because
|
||||||
|
// the virtual machine may not have access to all of the availble NUMA
|
||||||
|
// nodes. Not sure what to do about that.
|
||||||
|
EXPECT_EQ(-1, port::NUMAGetThreadNodeAffinity());
|
||||||
|
if (port::NUMAEnabled()) {
|
||||||
|
int num_nodes = port::NUMANumNodes();
|
||||||
|
for (int request_node = 0; request_node < num_nodes; ++request_node) {
|
||||||
|
port::NUMASetThreadNodeAffinity(request_node);
|
||||||
|
int affinity_node = port::NUMAGetThreadNodeAffinity();
|
||||||
|
EXPECT_EQ(affinity_node, request_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace tensorflow
|
@ -24,6 +24,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/cpu_info.h"
|
#include "tensorflow/core/platform/cpu_info.h"
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
#include "tensorflow/core/platform/mem.h"
|
#include "tensorflow/core/platform/mem.h"
|
||||||
|
#include "tensorflow/core/platform/numa.h"
|
||||||
#include "tensorflow/core/platform/snappy.h"
|
#include "tensorflow/core/platform/snappy.h"
|
||||||
#include "tensorflow/core/platform/types.h"
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
|
||||||
@ -79,6 +80,19 @@ int NumHyperthreadsPerCore() {
|
|||||||
return (ht_per_core > 0) ? ht_per_core : 1;
|
return (ht_per_core > 0) ? ht_per_core : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool NUMAEnabled() {
|
||||||
|
// Not yet implemented: coming soon.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NUMANumNodes() { return 1; }
|
||||||
|
|
||||||
|
void NUMASetThreadNodeAffinity(int node) {}
|
||||||
|
|
||||||
|
int NUMAGetThreadNodeAffinity() {
|
||||||
|
return kNUMANoAffinity;
|
||||||
|
}
|
||||||
|
|
||||||
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
void* AlignedMalloc(size_t size, int minimum_alignment) {
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
return memalign(minimum_alignment, size);
|
return memalign(minimum_alignment, size);
|
||||||
@ -128,6 +142,16 @@ void Free(void* ptr) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
|
||||||
|
return AlignedMalloc(size, minimum_alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
void NUMAFree(void* ptr, size_t size) { Free(ptr); }
|
||||||
|
|
||||||
|
int NUMAGetMemAffinity(const void* addr) {
|
||||||
|
return kNUMANoAffinity;
|
||||||
|
}
|
||||||
|
|
||||||
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
|
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
|
||||||
// No-op.
|
// No-op.
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user