Implement "Greedy by size planner" for more optimal memory allocation

http://arxiv.org/abs/2001.03288
Observed improvements:
- Mobilenet V1 has 35% has memory reduction
- Mobilenet V2 has 2% reduction
- Majority of all other tested models demonstrated ~10-15% improvement

PiperOrigin-RevId: 293250794
Change-Id: I41b1f927dfbafb1b3db360522a1417c6d993a789
This commit is contained in:
Terry Heo 2020-02-04 16:05:20 -08:00 committed by TensorFlower Gardener
parent 5423d894e2
commit 1c4d426919
9 changed files with 387 additions and 326 deletions

View File

@ -181,7 +181,6 @@ cc_library(
],
copts = TFLITE_DEFAULT_COPTS,
deps = [
":simple_memory_arena",
":string",
"//tensorflow/lite/c:common",
"//tensorflow/lite/core/api",

View File

@ -19,11 +19,11 @@ limitations under the License.
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <vector>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/simple_memory_arena.h"
#include "tensorflow/lite/string_type.h"
namespace tflite {

View File

@ -14,19 +14,19 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/arena_planner.h"
#include <algorithm>
#include <cstdint>
#include <limits>
#include <set>
#include <type_traits>
#include <utility>
namespace tflite {
namespace {
struct AllocationInfo {
// The node index requesting this allocation.
int node;
// The tensor index to be allocated or deallocated.
int tensor;
// Whether to allocate or deallocate
enum Type { ALLOC, DEALLOC } type;
};
constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
} // namespace
ArenaPlanner::ArenaPlanner(TfLiteContext* context,
std::unique_ptr<GraphInfo> graph_info,
@ -57,14 +57,12 @@ TfLiteStatus ArenaPlanner::ResetAllocations() {
TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
allocs_.clear();
allocs_.resize(graph_info_->num_tensors());
// Note that we only clear the alloc_queue_ when re-planning allocations, as
// it should only change when the graph topology itself changes.
return kTfLiteOk;
}
TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
if (allocs_[i].node > node && allocs_[i].size > 0) {
if (allocs_[i].first_node > node && allocs_[i].size > 0) {
TfLiteTensor& tensor = *graph_info_->tensor(i);
if (tensor.allocation_type == kTfLiteArenaRw) {
TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
@ -80,44 +78,34 @@ TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
TfLiteStatus ArenaPlanner::PlanAllocations() {
// Invalidate any existing data.
TF_LITE_ENSURE_STATUS(ResetAllocations());
// The alloc_queue_ is specific to the graph topology, and will be
// completely reconstructed from graph data here.
alloc_queue_.clear();
// Maybe other verb instead of 'Assigned'
alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
// Keeps track of references to each tensor.
std::vector<int> refcounts(graph_info_->num_tensors(), 0);
// `allocated` and `deallocated` are technically list of boolean values.
// We're saving the compiled binary size by using `vector<int>`.
std::vector<int> allocated(graph_info_->num_tensors(), false);
std::vector<int> deallocated(graph_info_->num_tensors(), false);
auto allocate = [this, &allocated, &deallocated](int node,
int tensor) -> TfLiteStatus {
if (allocated[tensor]) {
auto allocate = [this](int node, int tensor) -> TfLiteStatus {
if (alloc_node_[tensor] != kNodeNotAssigned) {
// Tensor has already been allocated.
return kTfLiteOk;
}
TF_LITE_ENSURE(context_, !deallocated[tensor]);
alloc_queue_.push_back({node, tensor, AllocationInfo::ALLOC});
allocated[tensor] = true;
TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
alloc_node_[tensor] = node;
return kTfLiteOk;
};
auto deallocate = [this, &allocated, &deallocated](
int node, int tensor) -> TfLiteStatus {
if (!allocated[tensor]) {
// Do not enqueue a DEALLOC if the tensor is never allocated.
auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
if (alloc_node_[tensor] == kNodeNotAssigned) {
// We don't need to deallocate the tensor, that is never allocated.
// This happened with the constant tensors.
return kTfLiteOk;
}
TF_LITE_ENSURE(context_, !deallocated[tensor]);
alloc_queue_.push_back({node, tensor, AllocationInfo::DEALLOC});
TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
dealloc_node_[tensor] = node;
return kTfLiteOk;
};
// There will be an entry in alloc_queue_ for the allocation of each tensor
// and another for their deallocation.
alloc_queue_.reserve(2 * graph_info_->num_tensors());
// We must make sure the output tensors are never overwritten. We do that by
// artificially adding one to their ref-counts so they are never selected
// for deallocation.
@ -205,7 +193,20 @@ TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
// Grow the size of `allocs_` if necessary. This allows allocating temporary
// tensors in op's `prepare` function.
TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
alloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
allocs_.resize(graph_info_->num_tensors());
// Set allocation and deallocation for temporary tensors.
for (size_t i = first_node; i <= last_node && i < graph_info_->num_nodes();
++i) {
const TfLiteNode& node = graph_info_->node(i);
TfLiteIntArray* node_temporaries = node.temporaries;
for (int j = 0; j < node_temporaries->size; ++j) {
int tensor_index = node_temporaries->data[j];
alloc_node_[tensor_index] = i;
dealloc_node_[tensor_index] = i;
}
}
TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
TF_LITE_ENSURE_STATUS(Commit());
@ -257,43 +258,79 @@ TfLiteStatus ArenaPlanner::Commit() {
return kTfLiteOk;
}
TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
int active_node = first_node;
// When dynamic tensors are present this method is called multiple times.
// The items in the alloc_queue_ referring to nodes before first_node were
// processed previously and should be skipped. Entries after last_node are
// not yet ready to be handled.
for (const auto& alloc_info : alloc_queue_) {
if (alloc_info.node < first_node) continue;
if (alloc_info.node > last_node) break;
if (alloc_info.node == active_node) {
// This is the first allocation/deallocation for a given node. It is
// time to deallocate the previous temporaries and allocate new ones.
if (active_node != first_node) {
TF_LITE_ENSURE_STATUS(
CalculateDeallocationOfInternalTensors(active_node - 1));
std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
int last_node) {
auto tensor_compare = [this](int idx1, int idx2) {
// Tensors that have lifespan through the whole model inference time are
// allocated at the beginning of memory slice. Their respective order
// doesn't matter in fact, so here they are sorted by index.
if (this->alloc_node_[idx1] == 0 &&
this->dealloc_node_[idx1] == kNodeNotAssigned) {
if (this->alloc_node_[idx2] == 0 &&
this->dealloc_node_[idx2] == kNodeNotAssigned) {
return idx1 < idx2;
}
TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node));
++active_node;
return true;
}
// Handle the current item.
if (alloc_info.type == AllocationInfo::ALLOC) {
if (this->alloc_node_[idx2] == 0 &&
this->dealloc_node_[idx2] == kNodeNotAssigned) {
return false;
}
// All other tensors are sorted in non-increasing order of their size.
auto size1 = this->graph_info_->tensor(idx1)->bytes;
auto size2 = this->graph_info_->tensor(idx2)->bytes;
if (size1 != size2) {
return size1 > size2;
}
// Tensors with equal size are sorted in order of their allocation time.
return this->alloc_node_[idx1] < this->alloc_node_[idx2];
};
std::set<int32_t> tensors_set;
for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
tensors_set.insert(i);
}
}
// Indices of tensors in order their allocation offsets will be calculated.
std::vector<int32_t> tensor_order(tensors_set.begin(), tensors_set.end());
std::sort(tensor_order.begin(), tensor_order.end(), tensor_compare);
return tensor_order;
}
TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
// Indices of tensors in order their allocation offsets will be calculated.
const std::vector<int32_t> tensor_order =
CreateTensorAllocationVector(first_node, last_node);
// Deallocate if the tensor was already allocated.
for (const auto& tensor_index : tensor_order) {
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
if (tensor.allocation_type == kTfLiteArenaRw &&
allocs_[tensor_index].size != 0) {
TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
}
}
// Vector of ids of already allocated tensors, ordered by offset.
for (const auto& tensor_index : tensor_order) {
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
if (tensor.allocation_type == kTfLiteArenaRw) {
TF_LITE_ENSURE_STATUS(
CalculateTensorAllocation(alloc_info.tensor, alloc_info.node));
} else {
TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor));
arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
tensor_index, alloc_node_[tensor_index],
dealloc_node_[tensor_index], &allocs_[tensor_index]));
}
if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
context_, tensor_alignment_, tensor.bytes, tensor_index,
/*first_node=*/alloc_node_[tensor_index],
/*last_node=*/std::numeric_limits<int32_t>::max(),
&allocs_[tensor_index]));
}
}
// For the case if the graph is empty the node index can be negative since we
// substract from the active node, so the node_index can be zero for those
// cases
if (active_node > 0) {
// Don't forget to deallocate temporaries of last node.
TF_LITE_ENSURE_STATUS(
CalculateDeallocationOfInternalTensors(active_node - 1));
}
return kTfLiteOk;
}
@ -314,55 +351,4 @@ TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
return kTfLiteOk;
}
TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index,
int node_index) {
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
if (tensor.allocation_type == kTfLiteArenaRw) {
TF_LITE_ENSURE_STATUS(arena_.Allocate(context_, tensor_alignment_,
tensor.bytes, tensor_index,
node_index, &allocs_[tensor_index]));
}
if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
context_, tensor_alignment_, tensor.bytes, tensor_index, node_index,
&allocs_[tensor_index]));
}
return kTfLiteOk;
}
TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) {
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
if (tensor.allocation_type == kTfLiteArenaRw) {
TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
}
return kTfLiteOk;
}
TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors(
int node_index) {
if (node_index < static_cast<int>(graph_info_->num_nodes())) {
const TfLiteNode& node = graph_info_->node(static_cast<size_t>(node_index));
TfLiteIntArray* node_temporaries = node.temporaries;
for (int i = 0; i < node_temporaries->size; ++i) {
int tensor_index = node_temporaries->data[i];
TF_LITE_ENSURE_STATUS(
CalculateTensorAllocation(tensor_index, node_index));
}
}
return kTfLiteOk;
}
TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors(
int node_index) {
if (node_index < static_cast<int>(graph_info_->num_nodes())) {
const TfLiteNode& node = graph_info_->node(static_cast<size_t>(node_index));
TfLiteIntArray* node_temporaries = node.temporaries;
for (int i = 0; i < node_temporaries->size; ++i) {
int tensor_index = node_temporaries->data[i];
TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index));
}
}
return kTfLiteOk;
}
} // namespace tflite

View File

@ -74,6 +74,16 @@ class ArenaPlanner : public MemoryPlanner {
// tensors.
TfLiteStatus Commit();
// Returns vector of tensor number ordered by the following algorithm.
// Comparator to sort tensors for the allocation algorithm:
// - Tensors that have lifespan through the whole model inference time go
// first;
// - Other tensors (e.g. intermediate and temporary ones) are sorted in
// non-increasing order of their size. If sizes of two tensors are equal, the
// one that needs to be allocated earlier goes first.
std::vector<int32_t> CreateTensorAllocationVector(int first_node,
int last_node);
// Traverse the allocation queue and reserve space in the appropriate arena
// for all tensors affected by ops in the interval [first_node, last_node].
TfLiteStatus CalculateAllocations(int first_node, int last_node);
@ -82,12 +92,6 @@ class ArenaPlanner : public MemoryPlanner {
// position inside the corresponding arena buffer.
TfLiteStatus ResolveTensorAllocation(int tensor_index);
// Register an allocation for the given tensor.
TfLiteStatus CalculateTensorAllocation(int tensor_index, int node_index);
// Register a deallocation for the given tensor.
TfLiteStatus CalculateTensorDeallocation(int tensor_index);
// Register an allocation for all internal (temporary) tensors of
// 'node_index'.
TfLiteStatus CalculateAllocationOfInternalTensors(int node_index);
@ -100,11 +104,15 @@ class ArenaPlanner : public MemoryPlanner {
std::unique_ptr<GraphInfo> graph_info_;
// Stores allocation data for all tensors.
std::vector<ArenaAlloc> allocs_;
std::vector<ArenaAllocWithUsageInterval> allocs_;
// A chronological list of instructions to allocate and deallocate tensors,
// reflecting the way they are used in the graph.
std::vector<AllocationInfo> alloc_queue_;
// First node, that uses the tensor. It needs to be allocated before
// execution of the node's operation.
std::vector<int32_t> alloc_node_;
// Last node, that uses the tensor. It can be deallocated after execution of
// the node's operation.
std::vector<int32_t> dealloc_node_;
// Raw memory buffer that is allocated for all temporary and graph outputs
// that are declared kTfLiteArenaRw.

View File

@ -233,18 +233,6 @@ TEST_F(ArenaPlannerTest, EmptyGraph) {
Execute(0, 10);
}
TEST_F(ArenaPlannerTest, DeallocationOfInputTensor) {
// This is a negative TC, which will try to make sure that no allocation for
// input tensors is done, when making call with negative node_index, since
// previous check was doing comparison of node_index which was int and
// unsigned int, implicit conversion was passing this case, as the negative
// number was converted to unsigned it making it invalid.The new check
// takes care of this problem and removes the warning as well.
TestGraph graph({-1}, {}, {1});
SetGraph(&graph);
Execute(0, 10);
}
TEST_F(ArenaPlannerTest, GraphWithNoOps) {
TestGraph graph({0, 10}, {}, {5, 11});
SetGraph(&graph);
@ -261,8 +249,8 @@ TEST_F(ArenaPlannerTest, GraphWithOneOp) {
TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
SetGraph(&graph);
Execute(0, 10);
EXPECT_EQ(GetOffset(1), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(2), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
}
TEST_F(ArenaPlannerTest, ZeroSizedTensors) {
@ -286,12 +274,12 @@ TEST_F(ArenaPlannerTest, SimpleGraph) {
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(1), 0);
}
TEST_F(ArenaPlannerTest, SimpleGraphInputsPreserved) {
@ -309,13 +297,10 @@ TEST_F(ArenaPlannerTest, SimpleGraphInputsPreserved) {
// Alloc(+) and dealloc(-) order: +0 +1 +2 +4 +5 -2 +3 -4 -5
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
// Because we are keeping the inputs alive until the end (due to
// preserve_inputs=true), the output tensor will not be able to use that
// space. It will end up using the same are as tensor #2.
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
}
TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
@ -331,12 +316,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(1), 0);
}
TEST_F(ArenaPlannerTest, SimpleGraphWithResetAllocationsAfter) {
@ -352,19 +337,19 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithResetAllocationsAfter) {
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(1), 0);
// Reset allocations after the first node
ResetAllocationsAfter(0);
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_FALSE(IsUnallocated(0));
EXPECT_FALSE(IsUnallocated(1));
EXPECT_FALSE(IsUnallocated(2));
EXPECT_TRUE(IsUnallocated(3));
EXPECT_TRUE(IsUnallocated(4));
EXPECT_TRUE(IsUnallocated(5));
@ -383,12 +368,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(1), 0);
}
TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
@ -408,12 +393,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(1), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
}
TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
@ -439,12 +424,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(1), 0);
}
TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
@ -466,11 +451,11 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr);
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
}
TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
@ -495,10 +480,10 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
// Op4: +10 -4 -5 -8
Execute(0, 0);
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
EXPECT_TRUE(IsUnallocated(6));
EXPECT_TRUE(IsUnallocated(4));
EXPECT_TRUE(IsUnallocated(5));
@ -508,62 +493,61 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
EXPECT_TRUE(IsUnallocated(10));
Execute(1, 1);
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_TRUE(IsUnallocated(7));
EXPECT_TRUE(IsUnallocated(9));
EXPECT_TRUE(IsUnallocated(8));
EXPECT_TRUE(IsUnallocated(10));
Execute(2, 2);
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
// Here's an interesting allocation. Even though #6 requires only 21 bytes,
// its deallocation freed up 24 bytes due to the alignment requirements in
// the arena. That means we can fit #7 in the same space!
EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
// #7 (24 bytes) is allocated at the place, where #0 and #6 (4+24=28 bytes)
// were before their deallocation.
EXPECT_EQ(GetOffset(7), GetOffsetAfter(1));
EXPECT_TRUE(IsUnallocated(9));
EXPECT_TRUE(IsUnallocated(8));
EXPECT_TRUE(IsUnallocated(10));
Execute(3, 3);
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
// The deallocation of #0, #1 and #2 freed up 24 bytes but that's not enough
// for #9, so it goes at the end.
EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(7), GetOffsetAfter(1));
// The deallocation of #1 and #2 frees up 20 bytes but that's not enough
// neither for #9, nor for #8, so they both go at the end.
EXPECT_EQ(GetOffset(9), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
EXPECT_TRUE(IsUnallocated(10));
Execute(4, 4);
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), 0);
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(7), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(9), GetOffsetAfter(4));
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
// There's just enough space at the beginning for #10 due to the
// deallocation of #0, #1, #2 and #3 (total 36 bytes, #10 needs
// There is enough space at the beginning for #10 due to the
// deallocation of #7, #1, #2 and #3 (total 56 bytes, #10 needs
// only 33.)
EXPECT_EQ(GetOffset(10), 0);
}
@ -646,6 +630,86 @@ TEST_F(ArenaPlannerTest, ModifiedGraph_DeallocateNonPersistentArena) {
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
}
TEST_F(ArenaPlannerTest, ComplexGraph) {
TestGraph graph({0},
{
/* in, out, tmp */
{{0}, {1}, {}},
{{1}, {2}, {}},
{{1}, {3}, {}},
{{1}, {4}, {}},
{{2, 3, 4}, {5}, {}},
{{5}, {6}, {}},
{{5}, {7}, {}},
{{6, 7}, {8}, {}},
},
{8});
(*graph.tensors())[0].bytes = 32;
(*graph.tensors())[1].bytes = 28;
(*graph.tensors())[2].bytes = 36;
(*graph.tensors())[3].bytes = 16;
(*graph.tensors())[4].bytes = 8;
(*graph.tensors())[5].bytes = 64;
(*graph.tensors())[6].bytes = 10;
(*graph.tensors())[7].bytes = 40;
SetGraph(&graph);
Execute(0, 10);
// Alloc(+) and dealloc(-) order: +0 +1 -0 +2 +3 +4 -1 +5 -2 -3 -4 +6 +7 -5 +8
EXPECT_EQ(GetOffset(5), 0);
EXPECT_EQ(GetOffset(7), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(7));
EXPECT_EQ(GetOffset(2), GetOffsetAfter(5));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(3));
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(8), 0);
}
TEST_F(ArenaPlannerTest, GraphWithIntermediates) {
TestGraph graph({0, 1},
{
/* in, out, tmp */
{{0}, {2}, {3}},
{{1, 2}, {4, 5}, {}},
{{5}, {6, 7}, {8, 9, 10}},
{{4, 6}, {11}, {12}},
{{11}, {13}, {}},
{{7, 13}, {14}, {15}},
},
{11, 14});
SetGraph(&graph, /*preserve_inputs=*/true);
Execute(0, 10);
// Alloc(+) and dealloc(-) order by operation:
// Op0: +0 +1 +2 +3 -3
// Op1: +4 +5 -2 -4
// Op2: +6 +7 +8 +9 +10 -8 -9 -10 -5
// Op3: +11 +12 -12 -4 -6
// Op4: +13
// Op5: +14 +15 -7 -13 -15
EXPECT_EQ(GetOffset(0), 0);
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
EXPECT_EQ(GetOffset(15), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(14), GetOffsetAfter(15));
EXPECT_EQ(GetOffset(13), GetOffsetAfter(14));
EXPECT_EQ(GetOffset(12), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(11), GetOffsetAfter(13));
EXPECT_EQ(GetOffset(10), GetOffsetAfter(1));
EXPECT_EQ(GetOffset(9), GetOffsetAfter(10));
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
EXPECT_EQ(GetOffset(7), GetOffsetAfter(11));
EXPECT_EQ(GetOffset(6), GetOffsetAfter(8));
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
EXPECT_EQ(GetOffset(4), GetOffsetAfter(7));
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
// 2 is allocated in the smallest suitable gap, which is not equal to the
// first available one.
EXPECT_EQ(GetOffset(2), GetOffsetAfter(5));
}
} // namespace
} // namespace tflite

View File

@ -369,15 +369,14 @@ TEST(BasicInterpreter, CheckArenaAllocation) {
ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
ASSERT_LT(interpreter.tensor(0)->data.raw, interpreter.tensor(1)->data.raw);
ASSERT_LT(interpreter.tensor(1)->data.raw, interpreter.tensor(2)->data.raw);
ASSERT_LT(interpreter.tensor(2)->data.raw, interpreter.tensor(3)->data.raw);
ASSERT_LT(interpreter.tensor(3)->data.raw, interpreter.tensor(4)->data.raw);
ASSERT_LT(interpreter.tensor(4)->data.raw, interpreter.tensor(5)->data.raw);
ASSERT_LT(interpreter.tensor(5)->data.raw, interpreter.tensor(7)->data.raw);
ASSERT_EQ(interpreter.tensor(6)->data.raw, interpreter.tensor(2)->data.raw);
// #7 is the one with the largest pointer.
ASSERT_LT(interpreter.tensor(1)->data.raw, interpreter.tensor(3)->data.raw);
ASSERT_EQ(interpreter.tensor(3)->data.raw, interpreter.tensor(9)->data.raw);
ASSERT_LT(interpreter.tensor(3)->data.raw, interpreter.tensor(5)->data.raw);
ASSERT_LT(interpreter.tensor(5)->data.raw, interpreter.tensor(2)->data.raw);
ASSERT_EQ(interpreter.tensor(2)->data.raw, interpreter.tensor(7)->data.raw);
ASSERT_LT(interpreter.tensor(2)->data.raw, interpreter.tensor(4)->data.raw);
// #4 is the one with the largest pointer.
ASSERT_EQ(interpreter.tensor(8)->data.raw, nullptr);
ASSERT_EQ(interpreter.tensor(9)->data.raw, interpreter.tensor(5)->data.raw);
}
TEST(BasicInterpreter, BufferAccess) {

View File

@ -31,68 +31,71 @@ T AlignTo(size_t alignment, T offset) {
} // namespace
namespace tflite {
TfLiteStatus SimpleMemoryArena::Allocate(TfLiteContext* context,
size_t alignment, size_t size,
int32_t tensor, int32_t node,
ArenaAlloc* new_alloc) {
TfLiteStatus SimpleMemoryArena::Allocate(
TfLiteContext* context, size_t alignment, size_t size, int32_t tensor,
int32_t first_node, int32_t last_node,
ArenaAllocWithUsageInterval* new_alloc) {
TF_LITE_ENSURE(context, alignment <= arena_alignment_);
new_alloc->tensor = tensor;
new_alloc->node = node;
new_alloc->first_node = first_node;
new_alloc->last_node = last_node;
new_alloc->size = size;
if (size == 0) {
new_alloc->offset = 0;
return kTfLiteOk;
}
size_t current_top = 0;
if (!allocs_.empty()) {
auto last = allocs_.rbegin();
current_top = last->offset + last->size;
}
// If we don't find a better gap just allocate at the end of the buffer.
size_t best_offset = AlignTo(alignment, current_top);
size_t best_offset_fit = std::numeric_limits<size_t>::max();
auto best_insertion_it = allocs_.end();
const size_t kOffsetNotAssigned = std::numeric_limits<size_t>::max();
size_t best_offset = kOffsetNotAssigned;
size_t best_offset_fit = kOffsetNotAssigned;
// Go through the sorted allocs and look at the gaps between them.
size_t current_offset = 0;
for (auto it = allocs_.begin(); it != allocs_.end(); ++it) {
for (const auto& alloc : ordered_allocs_) {
if (alloc.last_node < first_node || alloc.first_node > last_node) {
// Usage interval of alloc doesn't intersect with current tensor's usage
// interval, so we skip it.
continue;
}
size_t aligned_current_offset = AlignTo(alignment, current_offset);
// If we found a gap larger than required size, and smaller than previous
// best fit, take it.
if (aligned_current_offset + size <= it->offset &&
it->offset - current_offset < best_offset_fit) {
if (aligned_current_offset + size <= alloc.offset &&
alloc.offset - aligned_current_offset < best_offset_fit) {
best_offset = aligned_current_offset;
best_offset_fit = it->offset - current_offset;
best_insertion_it = it;
best_offset_fit = alloc.offset - current_offset;
}
current_offset = it->offset + it->size;
current_offset = std::max(current_offset, alloc.offset + alloc.size);
}
if (best_offset == kOffsetNotAssigned) {
best_offset = AlignTo(alignment, current_offset);
}
// Update the required buffer size.
high_water_mark_ = std::max(high_water_mark_, best_offset + size);
new_alloc->offset = best_offset;
allocs_.insert(best_insertion_it, *new_alloc);
auto insertion_it = ordered_allocs_.begin();
while (insertion_it != ordered_allocs_.end() && *insertion_it < *new_alloc) {
++insertion_it;
}
ordered_allocs_.insert(insertion_it, *new_alloc);
return kTfLiteOk;
}
TfLiteStatus SimpleMemoryArena::Deallocate(TfLiteContext* context,
const ArenaAlloc& alloc) {
TfLiteStatus SimpleMemoryArena::Deallocate(
TfLiteContext* context, const ArenaAllocWithUsageInterval& alloc) {
if (alloc.size == 0) {
return kTfLiteOk;
}
int erased_allocs_count = 0;
auto it = allocs_.begin();
while (it != allocs_.end()) {
auto it = ordered_allocs_.begin();
while (it != ordered_allocs_.end()) {
if (it->tensor == alloc.tensor) {
erased_allocs_count++;
it = allocs_.erase(it);
it = ordered_allocs_.erase(it);
} else {
++it;
}
@ -128,9 +131,9 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) {
return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError;
}
TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context,
const ArenaAlloc& alloc,
char** output_ptr) {
TfLiteStatus SimpleMemoryArena::ResolveAlloc(
TfLiteContext* context, const ArenaAllocWithUsageInterval& alloc,
char** output_ptr) {
TF_LITE_ENSURE(context, committed_);
TF_LITE_ENSURE(context, output_ptr != nullptr);
if (alloc.size == 0) {
@ -144,7 +147,7 @@ TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context,
TfLiteStatus SimpleMemoryArena::ClearPlan() {
committed_ = false;
high_water_mark_ = 0;
allocs_.clear();
ordered_allocs_.clear();
return kTfLiteOk;
}

View File

@ -24,25 +24,30 @@ limitations under the License.
namespace tflite {
// This little structure holds the offset and the size for a dynamic memory
// allocation in the memory arena. When the arena is committed and the
// underlying buffer is set, the alloc can be resolved into an actual memory
// pointer.
struct ArenaAlloc {
ArenaAlloc() { reset(); }
// allocation in the memory arena as well as first_node and last_node that use
// corresponding tensor. It means that continuous part of memory with this size
// needs to be allocated before execution of operation in the first node and can
// be deallocated after execution of the operation in the last_node. When the
// arena is committed and the underlying buffer is set, the alloc can be
// resolved into an actual memory pointer.
struct ArenaAllocWithUsageInterval {
ArenaAllocWithUsageInterval() { reset(); }
size_t offset;
size_t size;
int32_t tensor;
int32_t node;
int32_t first_node;
int32_t last_node;
inline void reset() {
offset = 0;
size = 0;
tensor = -1;
node = -1;
first_node = -1;
last_node = -1;
}
inline bool operator<(const ArenaAlloc& other) const {
inline bool operator<(const ArenaAllocWithUsageInterval& other) const {
return offset < other.offset;
}
};
@ -59,12 +64,17 @@ class SimpleMemoryArena {
arena_alignment_(arena_alignment),
high_water_mark_(0),
underlying_buffer_size_(0),
allocs_() {}
ordered_allocs_() {}
// Schedule memory allocation for a tensor with a given size, assuming that it
// needs to be allocated before the execution of first_node, and deallocated
// after the execution of last_node.
TfLiteStatus Allocate(TfLiteContext* context, size_t alignment, size_t size,
int32_t tensor, int32_t node, ArenaAlloc* new_alloc);
int32_t tensor, int32_t first_node, int32_t last_node,
ArenaAllocWithUsageInterval* new_alloc);
TfLiteStatus Deallocate(TfLiteContext* context, const ArenaAlloc& alloc);
TfLiteStatus Deallocate(TfLiteContext* context,
const ArenaAllocWithUsageInterval& alloc);
inline size_t RequiredBufferSize() {
// Add in a small amount of padding to reduce the chance of resize events
@ -75,7 +85,8 @@ class SimpleMemoryArena {
TfLiteStatus Commit(TfLiteContext* context);
TfLiteStatus ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc,
TfLiteStatus ResolveAlloc(TfLiteContext* context,
const ArenaAllocWithUsageInterval& alloc,
char** output_ptr);
// This clears allocation details but does not release the underlying buffer.
@ -101,8 +112,7 @@ class SimpleMemoryArena {
std::unique_ptr<char[]> underlying_buffer_;
size_t underlying_buffer_size_;
char* underlying_buffer_aligned_ptr_;
// TODO(maciekc): add list iterator to the ArenaAlloc to lookup quickly.
std::list<ArenaAlloc> allocs_;
std::list<ArenaAllocWithUsageInterval> ordered_allocs_;
};
} // namespace tflite

View File

@ -27,39 +27,33 @@ void ReportError(TfLiteContext* context, const char* format, ...) {}
TEST(SimpleMemoryArenaTest, BasicArenaOperations) {
TfLiteContext context;
SimpleMemoryArena arena(64);
ArenaAlloc allocs[6];
ArenaAllocWithUsageInterval allocs[6];
arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]);
arena.Allocate(&context, 32, 2047, 2, 3, &allocs[2]);
arena.Deallocate(&context, allocs[0]);
arena.Allocate(&context, 32, 1023, 3, 4, &allocs[3]);
arena.Allocate(&context, 32, 2047, 4, 5, &allocs[4]);
arena.Deallocate(&context, allocs[1]);
arena.Allocate(&context, 32, 1023, 5, 6, &allocs[5]);
arena.Allocate(&context, 32, 2047, 0, 1, 3, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 2, 5, &allocs[1]);
arena.Allocate(&context, 32, 2047, 2, 3, 6, &allocs[2]);
arena.Allocate(&context, 32, 2047, 3, 5, 6, &allocs[3]);
arena.Allocate(&context, 32, 1023, 4, 4, 6, &allocs[4]);
arena.Allocate(&context, 32, 1023, 5, 6, 6, &allocs[5]);
EXPECT_EQ(allocs[0].offset, 0);
EXPECT_EQ(allocs[1].offset, 2048);
EXPECT_EQ(allocs[2].offset, 4096);
EXPECT_EQ(allocs[3].offset, 0);
EXPECT_EQ(allocs[4].offset, 6144);
EXPECT_EQ(allocs[5].offset, 1024);
EXPECT_EQ(allocs[5].offset, 2048);
}
TEST(SimpleMemoryArenaTest, BasicZeroAlloc) {
TfLiteContext context;
SimpleMemoryArena arena(64);
ArenaAlloc alloc;
ArenaAllocWithUsageInterval alloc;
// Zero-sized allocs should have a 0 offset and size.
ASSERT_EQ(arena.Allocate(&context, 32, 0, 0, 1, &alloc), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 0, 0, 1, 2, &alloc), kTfLiteOk);
EXPECT_EQ(alloc.offset, 0);
EXPECT_EQ(alloc.size, 0);
// Deallocation of zero-sized allocs should always succeed (even redundantly).
ASSERT_EQ(arena.Deallocate(&context, alloc), kTfLiteOk);
ASSERT_EQ(arena.Deallocate(&context, alloc), kTfLiteOk);
// The zero-sized alloc should resolve to null.
char* resolved_ptr = nullptr;
ASSERT_EQ(arena.Commit(&context), kTfLiteOk);
@ -70,15 +64,13 @@ TEST(SimpleMemoryArenaTest, BasicZeroAlloc) {
TEST(SimpleMemoryArenaTest, InterleavedZeroAlloc) {
TfLiteContext context;
SimpleMemoryArena arena(64);
ArenaAlloc allocs[4];
ArenaAllocWithUsageInterval allocs[4];
// Interleave some zero and non-zero-sized allocations and deallocations.
ASSERT_EQ(arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 0, 1, 2, &allocs[1]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 1023, 2, 3, &allocs[2]), kTfLiteOk);
ASSERT_EQ(arena.Deallocate(&context, allocs[1]), kTfLiteOk);
ASSERT_EQ(arena.Deallocate(&context, allocs[2]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 2047, 3, 4, &allocs[3]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 2047, 0, 0, 4, &allocs[0]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 0, 1, 1, 2, &allocs[1]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 1023, 2, 1, 2, &allocs[2]), kTfLiteOk);
ASSERT_EQ(arena.Allocate(&context, 32, 2047, 3, 3, 4, &allocs[3]), kTfLiteOk);
// Deallocation of a zero-sized alloc should not impact the allocator offsets.
EXPECT_EQ(allocs[0].offset, 0);
@ -90,11 +82,11 @@ TEST(SimpleMemoryArenaTest, InterleavedZeroAlloc) {
TEST(SimpleMemoryArenaTest, TestClearPlan) {
TfLiteContext context;
SimpleMemoryArena arena(64);
ArenaAlloc allocs[9];
ArenaAllocWithUsageInterval allocs[9];
arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]);
arena.Allocate(&context, 32, 2047, 2, 3, &allocs[2]);
arena.Allocate(&context, 32, 2047, 0, 0, 2, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 1, 2, &allocs[1]);
arena.Allocate(&context, 32, 2047, 2, 1, 2, &allocs[2]);
arena.Commit(&context);
EXPECT_EQ(allocs[0].offset, 0);
@ -104,9 +96,9 @@ TEST(SimpleMemoryArenaTest, TestClearPlan) {
arena.ClearPlan();
// Test with smaller allocs.
arena.Allocate(&context, 32, 1023, 3, 1, &allocs[3]);
arena.Allocate(&context, 32, 1023, 4, 2, &allocs[4]);
arena.Allocate(&context, 32, 1023, 5, 3, &allocs[5]);
arena.Allocate(&context, 32, 1023, 3, 0, 2, &allocs[3]);
arena.Allocate(&context, 32, 1023, 4, 1, 2, &allocs[4]);
arena.Allocate(&context, 32, 1023, 5, 1, 2, &allocs[5]);
arena.Commit(&context);
EXPECT_EQ(allocs[3].offset, 0);
@ -116,9 +108,9 @@ TEST(SimpleMemoryArenaTest, TestClearPlan) {
arena.ClearPlan();
// Test larger allocs which should require a reallocation.
arena.Allocate(&context, 32, 4095, 6, 1, &allocs[6]);
arena.Allocate(&context, 32, 4095, 7, 2, &allocs[7]);
arena.Allocate(&context, 32, 4095, 8, 3, &allocs[8]);
arena.Allocate(&context, 32, 4095, 6, 0, 2, &allocs[6]);
arena.Allocate(&context, 32, 4095, 7, 1, 2, &allocs[7]);
arena.Allocate(&context, 32, 4095, 8, 1, 2, &allocs[8]);
arena.Commit(&context);
EXPECT_EQ(allocs[6].offset, 0);
@ -130,10 +122,10 @@ TEST(SimpleMemoryArenaTest, TestClearBuffer) {
TfLiteContext context;
context.ReportError = ReportError;
SimpleMemoryArena arena(64);
ArenaAlloc allocs[9];
ArenaAllocWithUsageInterval allocs[9];
arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]);
arena.Allocate(&context, 32, 2047, 0, 0, 2, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 1, 2, &allocs[1]);
// Should be a no-op.
ASSERT_EQ(arena.ReleaseBuffer(), kTfLiteOk);
@ -174,10 +166,10 @@ TEST_P(BufferAndPlanClearingTest, TestClearBufferAndClearPlan) {
TfLiteContext context;
context.ReportError = ReportError;
SimpleMemoryArena arena(64);
ArenaAlloc allocs[9];
ArenaAllocWithUsageInterval allocs[9];
arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]);
arena.Allocate(&context, 32, 2047, 0, 0, 2, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 1, 2, &allocs[1]);
ASSERT_EQ(arena.Commit(&context), kTfLiteOk);
@ -195,8 +187,8 @@ TEST_P(BufferAndPlanClearingTest, TestClearBufferAndClearPlan) {
ASSERT_NE(arena.ResolveAlloc(&context, allocs[0], &resolved_ptr), kTfLiteOk);
// Re-allocate tensors & commit.
arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]);
arena.Allocate(&context, 32, 2047, 0, 0, 2, &allocs[0]);
arena.Allocate(&context, 32, 2047, 1, 1, 2, &allocs[1]);
ASSERT_EQ(arena.Commit(&context), kTfLiteOk);
// Pointer-resolution now works.