Remove mutex lock from the ProcessState::GetCPUAllocator.
PiperOrigin-RevId: 322064244 Change-Id: I87f4abd2a8d578bb6c0c2d9ef84a2fc9e552d8cc
This commit is contained in:
parent
19a8ce8888
commit
304bfa2fb4
@ -15,6 +15,7 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/core/common_runtime/process_state.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
@ -42,7 +43,8 @@ namespace tensorflow {
|
||||
return instance;
|
||||
}
|
||||
|
||||
ProcessState::ProcessState() : numa_enabled_(false) {}
|
||||
ProcessState::ProcessState()
|
||||
: numa_enabled_(false), cpu_allocators_cached_(0) {}
|
||||
|
||||
string ProcessState::MemDesc::DebugString() {
|
||||
return strings::StrCat((loc == CPU ? "CPU " : "GPU "), dev_index,
|
||||
@ -61,6 +63,12 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) {
|
||||
|
||||
Allocator* ProcessState::GetCPUAllocator(int numa_node) {
|
||||
if (!numa_enabled_ || numa_node == port::kNUMANoAffinity) numa_node = 0;
|
||||
|
||||
// Check if allocator for the numa node is in lock-free cache.
|
||||
if (numa_node < cpu_allocators_cached_.load(std::memory_order_acquire)) {
|
||||
return cpu_allocators_cache_[numa_node];
|
||||
}
|
||||
|
||||
mutex_lock lock(mu_);
|
||||
while (cpu_allocators_.size() <= static_cast<size_t>(numa_node)) {
|
||||
// If visitors have been defined we need an Allocator built from
|
||||
@ -115,6 +123,10 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) {
|
||||
allocator = new TrackingAllocator(allocator, true);
|
||||
}
|
||||
cpu_allocators_.push_back(allocator);
|
||||
if (cpu_allocators_.size() < cpu_allocators_cache_.max_size()) {
|
||||
cpu_allocators_cache_[cpu_allocators_.size() - 1] = allocator;
|
||||
cpu_allocators_cached_.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
if (!sub_allocator) {
|
||||
DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty());
|
||||
}
|
||||
|
@ -102,6 +102,13 @@ class ProcessState : public ProcessStateInterface {
|
||||
std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ TF_GUARDED_BY(mu_);
|
||||
std::vector<SubAllocator::Visitor> cpu_free_visitors_ TF_GUARDED_BY(mu_);
|
||||
|
||||
// A cache of cpu allocators indexed by a numa node. Used as a fast path to
|
||||
// get CPU allocator by numa node id without locking the mutex. We can't use
|
||||
// `cpu_allocators_` storage in the lock-free path because concurrent
|
||||
// operation can deallocate the vector storage.
|
||||
std::atomic<int> cpu_allocators_cached_;
|
||||
std::array<Allocator*, 8> cpu_allocators_cache_;
|
||||
|
||||
// Optional RecordingAllocators that wrap the corresponding
|
||||
// Allocators for runtime attribute use analysis.
|
||||
MDMap mem_desc_map_;
|
||||
|
Loading…
Reference in New Issue
Block a user