* Merge ref-counting / allocation of variable tensors in one go. * Input seems to be allocated twice, remove the second allocation loop. PiperOrigin-RevId: 350264826 Change-Id: Icbc6283a6f2357f49dad3992040e286e5424dd92
351 lines
13 KiB
C++
351 lines
13 KiB
C++
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "tensorflow/lite/arena_planner.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <limits>
|
|
#include <set>
|
|
#include <type_traits>
|
|
#include <utility>
|
|
|
|
namespace tflite {
|
|
namespace {
|
|
|
|
constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
|
|
|
|
} // namespace
|
|
|
|
ArenaPlanner::ArenaPlanner(TfLiteContext* context,
|
|
std::unique_ptr<GraphInfo> graph_info,
|
|
bool preserve_inputs, bool preserve_intermediates,
|
|
int tensor_alignment)
|
|
: context_(context),
|
|
graph_info_(std::move(graph_info)),
|
|
arena_(kDefaultArenaAlignment),
|
|
persistent_arena_(kDefaultArenaAlignment),
|
|
preserve_inputs_(preserve_inputs),
|
|
preserve_intermediates_(preserve_intermediates),
|
|
tensor_alignment_(tensor_alignment) {}
|
|
|
|
ArenaPlanner::~ArenaPlanner() {}
|
|
|
|
std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
|
|
if (type == kTfLiteArenaRwPersistent) {
|
|
return persistent_arena_.BasePointer();
|
|
}
|
|
if (type == kTfLiteArenaRw) {
|
|
return arena_.BasePointer();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::ResetAllocations() {
|
|
TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
|
|
TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
|
|
allocs_.clear();
|
|
allocs_.resize(graph_info_->num_tensors());
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
|
|
for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
|
|
if (allocs_[i].first_node > node && allocs_[i].size > 0) {
|
|
TfLiteTensor& tensor = *graph_info_->tensor(i);
|
|
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
|
|
allocs_[i].reset();
|
|
tensor.data.raw = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::PlanAllocations() {
|
|
// Invalidate any existing data.
|
|
TF_LITE_ENSURE_STATUS(ResetAllocations());
|
|
// Maybe other verb instead of 'Assigned'
|
|
alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
|
|
dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
|
|
|
|
// Keeps track of references to each tensor.
|
|
std::vector<int> refcounts(graph_info_->num_tensors(), 0);
|
|
|
|
auto allocate = [this](int node, int tensor) -> TfLiteStatus {
|
|
if (alloc_node_[tensor] != kNodeNotAssigned) {
|
|
// Tensor has already been allocated.
|
|
return kTfLiteOk;
|
|
}
|
|
TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
|
|
alloc_node_[tensor] = node;
|
|
return kTfLiteOk;
|
|
};
|
|
|
|
auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
|
|
if (alloc_node_[tensor] == kNodeNotAssigned) {
|
|
// We don't need to deallocate the tensor, that is never allocated.
|
|
// This happened with the constant tensors.
|
|
return kTfLiteOk;
|
|
}
|
|
TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
|
|
dealloc_node_[tensor] = node;
|
|
return kTfLiteOk;
|
|
};
|
|
|
|
// We must make sure the output tensors are never overwritten. We do that by
|
|
// artificially adding one to their ref-counts so they are never selected
|
|
// for deallocation.
|
|
for (int tensor_index : graph_info_->outputs()) {
|
|
refcounts[tensor_index]++;
|
|
}
|
|
|
|
// Variable tensors also should be ensured to be never overwritten and need to
|
|
// be alive all the time.
|
|
for (int tensor_index : graph_info_->variables()) {
|
|
// Increase the reference count for variable tensors by one, so it will
|
|
// never be deallocated.
|
|
refcounts[tensor_index]++;
|
|
// `variables` is a subgraph-level list and it should never be
|
|
// kTfLiteOptionalTensor.
|
|
TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
|
|
// Variable tensor should be allocated at the very beginning.
|
|
TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
|
|
}
|
|
|
|
// Queue all graph inputs for allocation. If preserve_inputs_ is true, make
|
|
// sure they never be overwritten.
|
|
for (int tensor_index : graph_info_->inputs()) {
|
|
if (tensor_index != kTfLiteOptionalTensor) {
|
|
if (preserve_inputs_) {
|
|
refcounts[tensor_index]++;
|
|
}
|
|
TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
|
|
}
|
|
}
|
|
|
|
// Count references to node input tensors.
|
|
for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
|
|
const TfLiteNode& node = graph_info_->node(i);
|
|
TfLiteIntArray* node_inputs = node.inputs;
|
|
for (int j = 0; j < node_inputs->size; ++j) {
|
|
int tensor_index = node_inputs->data[j];
|
|
if (tensor_index != kTfLiteOptionalTensor) {
|
|
refcounts[tensor_index]++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Go through the graph in execution order.
|
|
for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
|
|
const TfLiteNode& node = graph_info_->node(i);
|
|
|
|
// First queue output tensors for allocation.
|
|
TfLiteIntArray* node_outputs = node.outputs;
|
|
for (int j = 0; j < node_outputs->size; ++j) {
|
|
int tensor_index = node_outputs->data[j];
|
|
TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
|
|
}
|
|
|
|
// Then update the ref-counts of the node's inputs, and if necessary queue
|
|
// them for deallocation.
|
|
if (!preserve_intermediates_) {
|
|
TfLiteIntArray* node_inputs = node.inputs;
|
|
for (int j = 0; j < node_inputs->size; ++j) {
|
|
int tensor_index = node_inputs->data[j];
|
|
if (tensor_index != kTfLiteOptionalTensor) {
|
|
refcounts[tensor_index]--;
|
|
if (refcounts[tensor_index] == 0) {
|
|
TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Note that graph outputs will never be scheduled for deallocation. We
|
|
// could do that here for completeness, but it won't have any effect.
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
|
|
// Grow the size of `allocs_` if necessary. This allows allocating temporary
|
|
// tensors in op's `prepare` function.
|
|
TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
|
|
alloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
|
|
dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
|
|
allocs_.resize(graph_info_->num_tensors());
|
|
// Set allocation and deallocation for temporary tensors.
|
|
for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
|
|
i < graph_info_->num_execution_nodes();
|
|
++i) {
|
|
const TfLiteNode& node = graph_info_->node(i);
|
|
TfLiteIntArray* node_temporaries = node.temporaries;
|
|
for (int j = 0; j < node_temporaries->size; ++j) {
|
|
int tensor_index = node_temporaries->data[j];
|
|
alloc_node_[tensor_index] = i;
|
|
if (!preserve_intermediates_) {
|
|
dealloc_node_[tensor_index] = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
|
|
TF_LITE_ENSURE_STATUS(Commit());
|
|
|
|
for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
|
|
// TODO(ahentz): we could do this only for the tensors that were modified
|
|
// in CalculateAllocations(), instead of redoing it for tensors that
|
|
// already had proper pointers. However we must be very careful, because
|
|
// SimpleMemoryArena::Commit() could move the base pointer.
|
|
TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
|
|
// Clear non-persistent arena's buffer.
|
|
TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
|
|
// Set data pointers for all non-persistent tensors to nullptr.
|
|
for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
|
|
TfLiteTensor& tensor = *graph_info_->tensor(i);
|
|
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
tensor.data.raw = nullptr;
|
|
}
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
|
|
// First commit arena_ to allocate underlying buffer.
|
|
TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
|
|
// Resolve allocations for all tensors not on the persistent arena.
|
|
for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
|
|
TfLiteTensor& tensor = *graph_info_->tensor(i);
|
|
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
|
|
}
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
bool ArenaPlanner::HasNonPersistentMemory() {
|
|
return arena_.GetBufferSize() != 0;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::Commit() {
|
|
TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
|
|
TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
|
|
int last_node) {
|
|
auto tensor_compare = [this](int idx1, int idx2) {
|
|
// Tensors that have lifespan through the whole model inference time are
|
|
// allocated at the beginning of memory slice. Their respective order
|
|
// doesn't matter in fact, so here they are sorted by index.
|
|
if (this->alloc_node_[idx1] == 0 &&
|
|
this->dealloc_node_[idx1] == kNodeNotAssigned) {
|
|
if (this->alloc_node_[idx2] == 0 &&
|
|
this->dealloc_node_[idx2] == kNodeNotAssigned) {
|
|
return idx1 < idx2;
|
|
}
|
|
return true;
|
|
}
|
|
if (this->alloc_node_[idx2] == 0 &&
|
|
this->dealloc_node_[idx2] == kNodeNotAssigned) {
|
|
return false;
|
|
}
|
|
|
|
// All other tensors are sorted in non-increasing order of their size.
|
|
auto size1 = this->graph_info_->tensor(idx1)->bytes;
|
|
auto size2 = this->graph_info_->tensor(idx2)->bytes;
|
|
if (size1 != size2) {
|
|
return size1 > size2;
|
|
}
|
|
// Tensors with equal size are sorted in order of their allocation time.
|
|
return this->alloc_node_[idx1] < this->alloc_node_[idx2];
|
|
};
|
|
|
|
std::vector<int32_t> tensor_order;
|
|
for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
|
|
if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
|
|
tensor_order.push_back(i);
|
|
}
|
|
}
|
|
// Indices of tensors in order their allocation offsets will be calculated.
|
|
std::sort(tensor_order.begin(), tensor_order.end(), tensor_compare);
|
|
|
|
return tensor_order;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
|
|
// Indices of tensors in order their allocation offsets will be calculated.
|
|
const std::vector<int32_t> tensor_order =
|
|
CreateTensorAllocationVector(first_node, last_node);
|
|
|
|
// Deallocate if the tensor was already allocated.
|
|
for (const auto& tensor_index : tensor_order) {
|
|
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
|
|
if (tensor.allocation_type == kTfLiteArenaRw &&
|
|
allocs_[tensor_index].size != 0) {
|
|
TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
|
|
}
|
|
}
|
|
|
|
// Vector of ids of already allocated tensors, ordered by offset.
|
|
for (const auto& tensor_index : tensor_order) {
|
|
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
|
|
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
TF_LITE_ENSURE_STATUS(
|
|
arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
|
|
tensor_index, alloc_node_[tensor_index],
|
|
dealloc_node_[tensor_index], &allocs_[tensor_index]));
|
|
}
|
|
// Check allocs_[].size to prevent from reallocation of persistent tensors.
|
|
if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
|
|
allocs_[tensor_index].size == 0) {
|
|
TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
|
|
context_, tensor_alignment_, tensor.bytes, tensor_index,
|
|
/*first_node=*/alloc_node_[tensor_index],
|
|
/*last_node=*/std::numeric_limits<int32_t>::max(),
|
|
&allocs_[tensor_index]));
|
|
}
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
|
|
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
|
|
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
// Skip resolution if the size of the tensor is zero, leaving it as a
|
|
// nullptr.
|
|
if (allocs_[tensor_index].size != 0) {
|
|
TF_LITE_ENSURE_STATUS(arena_.ResolveAlloc(context_, allocs_[tensor_index],
|
|
&tensor.data.raw));
|
|
}
|
|
}
|
|
if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
|
|
TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
|
|
context_, allocs_[tensor_index], &tensor.data.raw));
|
|
}
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
} // namespace tflite
|