Implemented "Greedy by size planner" for that produces more optimal memory allocation.
- in Mobilenet V1 has 35% has memory reduction - in Mobilenet V2 has 2% reduction Majority of all other tested models demonstrated ~10-15% improvement. PiperOrigin-RevId: 266182266
This commit is contained in:
parent
c3d844efc8
commit
4f8a6dd61c
@ -174,7 +174,6 @@ cc_library(
|
|||||||
],
|
],
|
||||||
copts = TFLITE_DEFAULT_COPTS,
|
copts = TFLITE_DEFAULT_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
":simple_memory_arena",
|
|
||||||
":string",
|
":string",
|
||||||
"//tensorflow/lite/c:c_api_internal",
|
"//tensorflow/lite/c:c_api_internal",
|
||||||
"//tensorflow/lite/core/api",
|
"//tensorflow/lite/core/api",
|
||||||
|
@ -19,11 +19,11 @@ limitations under the License.
|
|||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/lite/c/c_api_internal.h"
|
#include "tensorflow/lite/c/c_api_internal.h"
|
||||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||||
#include "tensorflow/lite/simple_memory_arena.h"
|
|
||||||
#include "tensorflow/lite/string.h"
|
#include "tensorflow/lite/string.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
@ -13,18 +13,18 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
#include "tensorflow/lite/arena_planner.h"
|
#include "tensorflow/lite/arena_planner.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
|
#include <type_traits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct AllocationInfo {
|
constexpr size_t kNotAssigned = std::numeric_limits<size_t>::max();
|
||||||
// The node index requesting this allocation.
|
|
||||||
int node;
|
} // namespace
|
||||||
// The tensor index to be allocated or deallocated.
|
|
||||||
int tensor;
|
|
||||||
// Whether to allocate or deallocate
|
|
||||||
enum Type { ALLOC, DEALLOC } type;
|
|
||||||
};
|
|
||||||
|
|
||||||
ArenaPlanner::ArenaPlanner(TfLiteContext* context,
|
ArenaPlanner::ArenaPlanner(TfLiteContext* context,
|
||||||
std::unique_ptr<GraphInfo> graph_info,
|
std::unique_ptr<GraphInfo> graph_info,
|
||||||
@ -55,52 +55,42 @@ TfLiteStatus ArenaPlanner::ResetAllocations() {
|
|||||||
TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
|
TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
|
||||||
allocs_.clear();
|
allocs_.clear();
|
||||||
allocs_.resize(graph_info_->num_tensors());
|
allocs_.resize(graph_info_->num_tensors());
|
||||||
// Note that we only clear the alloc_queue_ when re-planning allocations, as
|
order_.clear();
|
||||||
// it should only change when the graph topology itself changes.
|
was_added_.clear();
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::PlanAllocations() {
|
TfLiteStatus ArenaPlanner::PlanAllocations() {
|
||||||
// Invalidate any existing data.
|
// Invalidate any existing data.
|
||||||
TF_LITE_ENSURE_STATUS(ResetAllocations());
|
TF_LITE_ENSURE_STATUS(ResetAllocations());
|
||||||
// The alloc_queue_ is specific to the graph topology, and will be
|
// Maybe other verb instead of 'Assigned'
|
||||||
// completely reconstructed from graph data here.
|
alloc_node_.assign(graph_info_->num_tensors(), kNotAssigned);
|
||||||
alloc_queue_.clear();
|
dealloc_node_.assign(graph_info_->num_tensors(), kNotAssigned);
|
||||||
|
|
||||||
// Keeps track of references to each tensor.
|
// Keeps track of references to each tensor.
|
||||||
std::vector<int> refcounts(graph_info_->num_tensors(), 0);
|
std::vector<int> refcounts(graph_info_->num_tensors(), 0);
|
||||||
// `allocated` and `deallocated` are technically list of boolean values.
|
|
||||||
// We're saving the compiled binary size by using `vector<int>`.
|
|
||||||
std::vector<int> allocated(graph_info_->num_tensors(), false);
|
|
||||||
std::vector<int> deallocated(graph_info_->num_tensors(), false);
|
|
||||||
|
|
||||||
auto allocate = [this, &allocated, &deallocated](int node,
|
auto allocate = [this](int node, int tensor) -> TfLiteStatus {
|
||||||
int tensor) -> TfLiteStatus {
|
if (alloc_node_[tensor] != kNotAssigned) {
|
||||||
if (allocated[tensor]) {
|
// Tensor has already been allocated.
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
TF_LITE_ENSURE(context_, !deallocated[tensor]);
|
TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNotAssigned);
|
||||||
alloc_queue_.push_back({node, tensor, AllocationInfo::ALLOC});
|
alloc_node_[tensor] = node;
|
||||||
allocated[tensor] = true;
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto deallocate = [this, &allocated, &deallocated](
|
auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
|
||||||
int node, int tensor) -> TfLiteStatus {
|
if (alloc_node_[tensor] == kNotAssigned) {
|
||||||
if (!allocated[tensor]) {
|
// We don't need to deallocate the tensor, that is never allocated.
|
||||||
// Do not enqueue a DEALLOC if the tensor is never allocated.
|
|
||||||
// This happened with the constant tensors.
|
// This happened with the constant tensors.
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
TF_LITE_ENSURE(context_, !deallocated[tensor]);
|
TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNotAssigned);
|
||||||
alloc_queue_.push_back({node, tensor, AllocationInfo::DEALLOC});
|
dealloc_node_[tensor] = node;
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
};
|
};
|
||||||
|
|
||||||
// There will be an entry in alloc_queue_ for the allocation of each tensor
|
|
||||||
// and another for their deallocation.
|
|
||||||
alloc_queue_.reserve(2 * graph_info_->num_tensors());
|
|
||||||
|
|
||||||
// We must make sure the output tensors are never overwritten. We do that by
|
// We must make sure the output tensors are never overwritten. We do that by
|
||||||
// artificially adding one to their ref-counts so they are never selected
|
// artificially adding one to their ref-counts so they are never selected
|
||||||
// for deallocation.
|
// for deallocation.
|
||||||
@ -188,12 +178,27 @@ TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
|
|||||||
// Grow the size of `allocs_` if necessary. This allows allocating temporary
|
// Grow the size of `allocs_` if necessary. This allows allocating temporary
|
||||||
// tensors in op's `prepare` function.
|
// tensors in op's `prepare` function.
|
||||||
TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
|
TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
|
||||||
|
alloc_node_.resize(graph_info_->num_tensors(), kNotAssigned);
|
||||||
|
dealloc_node_.resize(graph_info_->num_tensors(), kNotAssigned);
|
||||||
allocs_.resize(graph_info_->num_tensors());
|
allocs_.resize(graph_info_->num_tensors());
|
||||||
|
was_added_.assign(graph_info_->num_tensors(), false);
|
||||||
|
order_.clear();
|
||||||
|
// Set allocation and deallocation for temporary tensors.
|
||||||
|
for (size_t i = first_node; i <= last_node && i < graph_info_->num_nodes();
|
||||||
|
++i) {
|
||||||
|
const TfLiteNode& node = graph_info_->node(i);
|
||||||
|
TfLiteIntArray* node_temporaries = node.temporaries;
|
||||||
|
for (int j = 0; j < node_temporaries->size; ++j) {
|
||||||
|
int tensor_index = node_temporaries->data[j];
|
||||||
|
alloc_node_[tensor_index] = i;
|
||||||
|
dealloc_node_[tensor_index] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
|
TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
|
||||||
TF_LITE_ENSURE_STATUS(Commit());
|
TF_LITE_ENSURE_STATUS(Commit());
|
||||||
|
|
||||||
for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
|
for (size_t i = 0; i < graph_info_->num_tensors(); ++i) {
|
||||||
// TODO(ahentz): we could do this only for the tensors that were modified
|
// TODO(ahentz): we could do this only for the tensors that were modified
|
||||||
// in CalculateAllocations(), instead of redoing it for tensors that
|
// in CalculateAllocations(), instead of redoing it for tensors that
|
||||||
// already had proper pointers. However we must be very careful, because
|
// already had proper pointers. However we must be very careful, because
|
||||||
@ -204,48 +209,71 @@ TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
|
|||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::Commit() {
|
TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
|
||||||
TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
|
for (size_t i = 0; i < graph_info_->num_tensors(); ++i) {
|
||||||
TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
|
if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
|
||||||
|
AddTensorIfNeeded(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(order_.begin(), order_.end(), CompareBySize(this));
|
||||||
|
|
||||||
|
// Vector of ids of already allocated tensors, ordered by offset.
|
||||||
|
for (const auto& tensor_index : order_) {
|
||||||
|
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
|
||||||
|
if (tensor.allocation_type == kTfLiteArenaRw) {
|
||||||
|
TF_LITE_ENSURE_STATUS(arena_.Allocate(
|
||||||
|
context_, tensor_alignment_, tensor.bytes, alloc_node_[tensor_index],
|
||||||
|
dealloc_node_[tensor_index], &allocs_[tensor_index]));
|
||||||
|
}
|
||||||
|
if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
|
||||||
|
TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
|
||||||
|
context_, tensor_alignment_, tensor.bytes, alloc_node_[tensor_index],
|
||||||
|
std::numeric_limits<size_t>::max(), &allocs_[tensor_index]));
|
||||||
|
}
|
||||||
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
|
void ArenaPlanner::AddTensorIfNeeded(int tensor_index) {
|
||||||
int active_node = first_node;
|
if (!was_added_[tensor_index]) {
|
||||||
// When dynamic tensors are present this method is called multiple times.
|
was_added_[tensor_index] = true;
|
||||||
// The items in the alloc_queue_ referring to nodes before first_node were
|
order_.push_back(tensor_index);
|
||||||
// processed previously and should be skipped. Entries after last_node are
|
}
|
||||||
// not yet ready to be handled.
|
}
|
||||||
for (const auto& alloc_info : alloc_queue_) {
|
|
||||||
if (alloc_info.node < first_node) continue;
|
bool ArenaPlanner::CompareBySize::operator()(const int idx1,
|
||||||
if (alloc_info.node > last_node) break;
|
const int idx2) const {
|
||||||
if (alloc_info.node == active_node) {
|
// Tensors that have lifespan through the whole model inference time are
|
||||||
// This is the first allocation/deallocation for a given node. It is
|
// allocated at the beginning of memory slice. Their respective order doesn't
|
||||||
// time to deallocate the previous temporaries and allocate new ones.
|
// matter in fact, so here they are sorted by index.
|
||||||
if (active_node != first_node) {
|
if (planner->alloc_node_[idx1] == 0 &&
|
||||||
TF_LITE_ENSURE_STATUS(
|
planner->dealloc_node_[idx1] == kNotAssigned) {
|
||||||
CalculateDeallocationOfInternalTensors(active_node - 1));
|
if (planner->alloc_node_[idx2] == 0 &&
|
||||||
}
|
planner->dealloc_node_[idx2] == kNotAssigned) {
|
||||||
TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node));
|
return idx1 < idx2;
|
||||||
++active_node;
|
|
||||||
}
|
|
||||||
// Handle the current item.
|
|
||||||
if (alloc_info.type == AllocationInfo::ALLOC) {
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor));
|
|
||||||
} else {
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor));
|
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (planner->alloc_node_[idx2] == 0 &&
|
||||||
|
planner->dealloc_node_[idx2] == kNotAssigned) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For the case if the graph is empty the node index can be negative since we
|
// All other tensors are sorted in non-increasing order of their size.
|
||||||
// substract from the active node, so the node_index can be zero for those
|
auto size1 = planner->graph_info_->tensor(idx1)->bytes;
|
||||||
// cases
|
auto size2 = planner->graph_info_->tensor(idx2)->bytes;
|
||||||
if (active_node > 0) {
|
if (size1 != size2) {
|
||||||
// Don't forget to deallocate temporaries of last node.
|
return size1 > size2;
|
||||||
TF_LITE_ENSURE_STATUS(
|
|
||||||
CalculateDeallocationOfInternalTensors(active_node - 1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tensors with equal size are sorted in order of their allocation time.
|
||||||
|
return planner->alloc_node_[idx1] < planner->alloc_node_[idx2];
|
||||||
|
}
|
||||||
|
|
||||||
|
TfLiteStatus ArenaPlanner::Commit() {
|
||||||
|
TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
|
||||||
|
TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -266,51 +294,4 @@ TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
|
|||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) {
|
|
||||||
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
|
|
||||||
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
||||||
TF_LITE_ENSURE_STATUS(arena_.Allocate(
|
|
||||||
context_, tensor_alignment_, tensor.bytes, &allocs_[tensor_index]));
|
|
||||||
}
|
|
||||||
if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
|
|
||||||
TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
|
|
||||||
context_, tensor_alignment_, tensor.bytes, &allocs_[tensor_index]));
|
|
||||||
}
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) {
|
|
||||||
TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
|
|
||||||
if (tensor.allocation_type == kTfLiteArenaRw) {
|
|
||||||
TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
|
|
||||||
}
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors(
|
|
||||||
int node_index) {
|
|
||||||
if (node_index < static_cast<int>(graph_info_->num_nodes())) {
|
|
||||||
const TfLiteNode& node = graph_info_->node(static_cast<size_t>(node_index));
|
|
||||||
TfLiteIntArray* node_temporaries = node.temporaries;
|
|
||||||
for (int i = 0; i < node_temporaries->size; ++i) {
|
|
||||||
int tensor_index = node_temporaries->data[i];
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors(
|
|
||||||
int node_index) {
|
|
||||||
if (node_index < static_cast<int>(graph_info_->num_nodes())) {
|
|
||||||
const TfLiteNode& node = graph_info_->node(static_cast<size_t>(node_index));
|
|
||||||
TfLiteIntArray* node_temporaries = node.temporaries;
|
|
||||||
for (int i = 0; i < node_temporaries->size; ++i) {
|
|
||||||
int tensor_index = node_temporaries->data[i];
|
|
||||||
TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -15,6 +15,7 @@ limitations under the License.
|
|||||||
#ifndef TENSORFLOW_LITE_ARENA_PLANNER_H_
|
#ifndef TENSORFLOW_LITE_ARENA_PLANNER_H_
|
||||||
#define TENSORFLOW_LITE_ARENA_PLANNER_H_
|
#define TENSORFLOW_LITE_ARENA_PLANNER_H_
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -44,17 +45,12 @@ struct AllocationInfo;
|
|||||||
// execution. Since dynamic tensors don't have sizes until after the
|
// execution. Since dynamic tensors don't have sizes until after the
|
||||||
// corresponding operation is executed, this class supports incremental
|
// corresponding operation is executed, this class supports incremental
|
||||||
// planning.
|
// planning.
|
||||||
//
|
|
||||||
// TODO(b/127354079): Remove the constrain below when the issue is fixed.
|
|
||||||
// WARNING: MemoryPlanner's behavior must be deterministic. If the first N
|
|
||||||
// nodes are unchanged, it must produce exactly the same allocation plan for
|
|
||||||
// the first N nodes.
|
|
||||||
class ArenaPlanner : public MemoryPlanner {
|
class ArenaPlanner : public MemoryPlanner {
|
||||||
public:
|
public:
|
||||||
// Ownership of 'context' is not taken and it must remain util the
|
// Ownership of 'context' is not taken and it must remain util the
|
||||||
// ArenaPlanner is destroyed. If 'preserve_inputs' is true the inputs to the
|
// ArenaPlanner is destroyed. If 'preserve_inputs' is true the inputs
|
||||||
// graph will not share memory with any other tensor, effectively preserving
|
// to the graph will not share memory with any other tensor, effectively
|
||||||
// them until the end of inference.
|
// preserving them until the end of inference.
|
||||||
ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info,
|
ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info,
|
||||||
bool preserve_inputs, bool preserve_intermediates,
|
bool preserve_inputs, bool preserve_intermediates,
|
||||||
int tensor_alignment = kDefaultTensorAlignment);
|
int tensor_alignment = kDefaultTensorAlignment);
|
||||||
@ -82,29 +78,37 @@ class ArenaPlanner : public MemoryPlanner {
|
|||||||
// position inside the corresponding arena buffer.
|
// position inside the corresponding arena buffer.
|
||||||
TfLiteStatus ResolveTensorAllocation(int tensor_index);
|
TfLiteStatus ResolveTensorAllocation(int tensor_index);
|
||||||
|
|
||||||
// Register an allocation for the given tensor.
|
void AddTensorIfNeeded(int tensor_index);
|
||||||
TfLiteStatus CalculateTensorAllocation(int tensor_index);
|
|
||||||
|
|
||||||
// Register a deallocation for the given tensor.
|
// Comparator to sort tensors for the allocation algorithm:
|
||||||
TfLiteStatus CalculateTensorDeallocation(int tensor_index);
|
// - Tensors that have lifespan through the whole model inference time go
|
||||||
|
// first;
|
||||||
// Register an allocation for all internal (temporary) tensors of
|
// - Other tensors (e.g. intermediate and temporary ones) are sorted in
|
||||||
// 'node_index'.
|
// non-increasing order of their size. If sizes of two tensors are equal, the
|
||||||
TfLiteStatus CalculateAllocationOfInternalTensors(int node_index);
|
// one that needs to be allocated earlier goes first.
|
||||||
|
struct CompareBySize {
|
||||||
// Register a deallocation for all internal (temporary) tensors of
|
explicit CompareBySize(const ArenaPlanner* planner) : planner(planner) {}
|
||||||
// 'node_index'.
|
bool operator()(const int idx1, const int idx2) const;
|
||||||
TfLiteStatus CalculateDeallocationOfInternalTensors(int node_index);
|
const ArenaPlanner* planner;
|
||||||
|
};
|
||||||
|
|
||||||
TfLiteContext* context_;
|
TfLiteContext* context_;
|
||||||
std::unique_ptr<GraphInfo> graph_info_;
|
std::unique_ptr<GraphInfo> graph_info_;
|
||||||
|
|
||||||
// Stores allocation data for all tensors.
|
// Stores allocation data for all tensors.
|
||||||
std::vector<ArenaAlloc> allocs_;
|
std::vector<ArenaAllocWithUsageInterval> allocs_;
|
||||||
|
|
||||||
// A chronological list of instructions to allocate and deallocate tensors,
|
// First node, that uses the tensor. It needs to be allocated before
|
||||||
// reflecting the way they are used in the graph.
|
// execution of the node's operation.
|
||||||
std::vector<AllocationInfo> alloc_queue_;
|
std::vector<size_t> alloc_node_;
|
||||||
|
|
||||||
|
// Last node, that uses the tensor. It can be deallocated after execution of
|
||||||
|
// the node's operation.
|
||||||
|
std::vector<size_t> dealloc_node_;
|
||||||
|
|
||||||
|
// Indices of tensors in order their allocation offsets will be calculated.
|
||||||
|
std::vector<size_t> order_;
|
||||||
|
std::vector<char> was_added_; // avoiding std::vector<bool> as bitset
|
||||||
|
|
||||||
// Raw memory buffer that is allocated for all temporary and graph outputs
|
// Raw memory buffer that is allocated for all temporary and graph outputs
|
||||||
// that are declared kTfLiteArenaRw.
|
// that are declared kTfLiteArenaRw.
|
||||||
|
@ -18,8 +18,8 @@ limitations under the License.
|
|||||||
|
|
||||||
#include <gmock/gmock.h>
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include "tensorflow/lite/testing/util.h"
|
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
|
#include "tensorflow/lite/testing/util.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace {
|
namespace {
|
||||||
@ -198,7 +198,7 @@ class ArenaPlannerTest : public ::testing::Test {
|
|||||||
offset += kTensorAlignment - offset % kTensorAlignment;
|
offset += kTensorAlignment - offset % kTensorAlignment;
|
||||||
}
|
}
|
||||||
return offset;
|
return offset;
|
||||||
};
|
}
|
||||||
|
|
||||||
TfLiteContext context_;
|
TfLiteContext context_;
|
||||||
TestGraph* graph_;
|
TestGraph* graph_;
|
||||||
@ -211,18 +211,6 @@ TEST_F(ArenaPlannerTest, EmptyGraph) {
|
|||||||
Execute(0, 10);
|
Execute(0, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, DeallocationOfInputTensor) {
|
|
||||||
// This is a negative TC, which will try to make sure that no allocation for
|
|
||||||
// input tensors is done, when making call with negative node_index, since
|
|
||||||
// previous check was doing comparison of node_index which was int and
|
|
||||||
// unsigned int, implicit conversion was passing this case, as the negative
|
|
||||||
// number was converted to unsigned it making it invalid.The new check
|
|
||||||
// takes care of this problem and removes the warning as well.
|
|
||||||
TestGraph graph({-1}, {}, {1});
|
|
||||||
SetGraph(&graph);
|
|
||||||
Execute(0, 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, GraphWithNoOps) {
|
TEST_F(ArenaPlannerTest, GraphWithNoOps) {
|
||||||
TestGraph graph({0, 10}, {}, {5, 11});
|
TestGraph graph({0, 10}, {}, {5, 11});
|
||||||
SetGraph(&graph);
|
SetGraph(&graph);
|
||||||
@ -239,8 +227,8 @@ TEST_F(ArenaPlannerTest, GraphWithOneOp) {
|
|||||||
TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
|
TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
|
||||||
SetGraph(&graph);
|
SetGraph(&graph);
|
||||||
Execute(0, 10);
|
Execute(0, 10);
|
||||||
EXPECT_EQ(GetOffset(1), 0);
|
EXPECT_EQ(GetOffset(2), 0);
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, ZeroSizedTensors) {
|
TEST_F(ArenaPlannerTest, ZeroSizedTensors) {
|
||||||
@ -264,12 +252,12 @@ TEST_F(ArenaPlannerTest, SimpleGraph) {
|
|||||||
Execute(0, 10);
|
Execute(0, 10);
|
||||||
|
|
||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(3), GetOffsetAfter(4));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), 0);
|
EXPECT_EQ(GetOffset(1), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, SimpleGraphInputsPreserved) {
|
TEST_F(ArenaPlannerTest, SimpleGraphInputsPreserved) {
|
||||||
@ -287,13 +275,10 @@ TEST_F(ArenaPlannerTest, SimpleGraphInputsPreserved) {
|
|||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 +4 +5 -2 +3 -4 -5
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 +4 +5 -2 +3 -4 -5
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(0), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(5), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(3), GetOffsetAfter(4));
|
||||||
// Because we are keeping the inputs alive until the end (due to
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
|
||||||
// preserve_inputs=true), the output tensor will not be able to use that
|
|
||||||
// space. It will end up using the same are as tensor #2.
|
|
||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
|
TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
|
||||||
@ -309,12 +294,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
|
|||||||
Execute(0, 10);
|
Execute(0, 10);
|
||||||
|
|
||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
|
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
|
||||||
EXPECT_EQ(GetOffset(3), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
|
||||||
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
|
||||||
|
EXPECT_EQ(GetOffset(1), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
|
TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
|
||||||
@ -330,12 +315,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
|
|||||||
Execute(0, 10);
|
Execute(0, 10);
|
||||||
|
|
||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(3), GetOffsetAfter(4));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), 0);
|
EXPECT_EQ(GetOffset(1), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
|
TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
|
||||||
@ -355,12 +340,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
|
|||||||
Execute(0, 10);
|
Execute(0, 10);
|
||||||
|
|
||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(1), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
|
||||||
EXPECT_EQ(GetOffset(3), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
|
TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
|
||||||
@ -386,12 +371,12 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
|
|||||||
EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
|
EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
|
||||||
|
|
||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
EXPECT_EQ(GetOffset(1), 0);
|
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
|
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
|
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(3), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
|
||||||
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
|
||||||
|
EXPECT_EQ(GetOffset(1), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
|
TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
|
||||||
@ -413,11 +398,11 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
|
|||||||
EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr);
|
EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr);
|
||||||
|
|
||||||
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
// Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
|
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
|
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(3), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(4));
|
||||||
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
|
TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
|
||||||
@ -446,10 +431,10 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
|
|||||||
// Op4: +10 -4 -5 -8
|
// Op4: +10 -4 -5 -8
|
||||||
|
|
||||||
Execute(0, 0);
|
Execute(0, 0);
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
|
||||||
EXPECT_TRUE(is_unallocated(6));
|
EXPECT_TRUE(is_unallocated(6));
|
||||||
EXPECT_TRUE(is_unallocated(4));
|
EXPECT_TRUE(is_unallocated(4));
|
||||||
EXPECT_TRUE(is_unallocated(5));
|
EXPECT_TRUE(is_unallocated(5));
|
||||||
@ -459,62 +444,61 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
|
|||||||
EXPECT_TRUE(is_unallocated(10));
|
EXPECT_TRUE(is_unallocated(10));
|
||||||
|
|
||||||
Execute(1, 1);
|
Execute(1, 1);
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
|
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
|
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_TRUE(is_unallocated(7));
|
EXPECT_TRUE(is_unallocated(7));
|
||||||
EXPECT_TRUE(is_unallocated(9));
|
EXPECT_TRUE(is_unallocated(9));
|
||||||
EXPECT_TRUE(is_unallocated(8));
|
EXPECT_TRUE(is_unallocated(8));
|
||||||
EXPECT_TRUE(is_unallocated(10));
|
EXPECT_TRUE(is_unallocated(10));
|
||||||
|
|
||||||
Execute(2, 2);
|
Execute(2, 2);
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
|
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
|
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
// Here's an interesting allocation. Even though #6 requires only 21 bytes,
|
// #7 (24 bytes) is allocated at the place, where #0 and #6 (4+24=28 bytes)
|
||||||
// its deallocation freed up 24 bytes due to the alignment requirements in
|
// were before their deallocation.
|
||||||
// the arena. That means we can fit #7 in the same space!
|
EXPECT_EQ(GetOffset(7), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
|
|
||||||
EXPECT_TRUE(is_unallocated(9));
|
EXPECT_TRUE(is_unallocated(9));
|
||||||
EXPECT_TRUE(is_unallocated(8));
|
EXPECT_TRUE(is_unallocated(8));
|
||||||
EXPECT_TRUE(is_unallocated(10));
|
EXPECT_TRUE(is_unallocated(10));
|
||||||
|
|
||||||
Execute(3, 3);
|
Execute(3, 3);
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
|
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
|
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
|
EXPECT_EQ(GetOffset(7), GetOffsetAfter(1));
|
||||||
// The deallocation of #0, #1 and #2 freed up 24 bytes but that's not enough
|
// The deallocation of #1 and #2 frees up 20 bytes but that's not enough
|
||||||
// for #9, so it goes at the end.
|
// neither for #9, nor for #8, so they both go at the end.
|
||||||
EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
|
EXPECT_EQ(GetOffset(9), GetOffsetAfter(4));
|
||||||
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
|
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
|
||||||
EXPECT_TRUE(is_unallocated(10));
|
EXPECT_TRUE(is_unallocated(10));
|
||||||
|
|
||||||
Execute(4, 4);
|
Execute(4, 4);
|
||||||
EXPECT_EQ(GetOffset(0), 0);
|
EXPECT_EQ(GetOffset(3), 0);
|
||||||
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(3));
|
||||||
EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(2));
|
||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
|
EXPECT_EQ(GetOffset(0), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
|
EXPECT_EQ(GetOffset(6), GetOffsetAfter(0));
|
||||||
EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
|
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
|
||||||
EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
|
||||||
EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
|
EXPECT_EQ(GetOffset(7), GetOffsetAfter(1));
|
||||||
EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
|
EXPECT_EQ(GetOffset(9), GetOffsetAfter(4));
|
||||||
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
|
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
|
||||||
// There's just enough space at the beginning for #10 due to the
|
// There is enough space at the beginning for #10 due to the
|
||||||
// deallocation of #0, #1, #2 and #3 (total 36 bytes, #10 needs
|
// deallocation of #7, #1, #2 and #3 (total 56 bytes, #10 needs
|
||||||
// only 33.)
|
// only 33.)
|
||||||
EXPECT_EQ(GetOffset(10), 0);
|
EXPECT_EQ(GetOffset(10), 0);
|
||||||
}
|
}
|
||||||
@ -547,6 +531,86 @@ TEST_F(ArenaPlannerTest, ModifiedGraph) {
|
|||||||
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
|
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(ArenaPlannerTest, ComplexGraph) {
|
||||||
|
TestGraph graph({0},
|
||||||
|
{
|
||||||
|
/* in, out, tmp */
|
||||||
|
{{0}, {1}, {}},
|
||||||
|
{{1}, {2}, {}},
|
||||||
|
{{1}, {3}, {}},
|
||||||
|
{{1}, {4}, {}},
|
||||||
|
{{2, 3, 4}, {5}, {}},
|
||||||
|
{{5}, {6}, {}},
|
||||||
|
{{5}, {7}, {}},
|
||||||
|
{{6, 7}, {8}, {}},
|
||||||
|
},
|
||||||
|
{8});
|
||||||
|
(*graph.tensors())[0].bytes = 32;
|
||||||
|
(*graph.tensors())[1].bytes = 28;
|
||||||
|
(*graph.tensors())[2].bytes = 36;
|
||||||
|
(*graph.tensors())[3].bytes = 16;
|
||||||
|
(*graph.tensors())[4].bytes = 8;
|
||||||
|
(*graph.tensors())[5].bytes = 64;
|
||||||
|
(*graph.tensors())[6].bytes = 10;
|
||||||
|
(*graph.tensors())[7].bytes = 40;
|
||||||
|
SetGraph(&graph);
|
||||||
|
Execute(0, 10);
|
||||||
|
|
||||||
|
// Alloc(+) and dealloc(-) order: +0 +1 -0 +2 +3 +4 -1 +5 -2 -3 -4 +6 +7 -5 +8
|
||||||
|
EXPECT_EQ(GetOffset(5), 0);
|
||||||
|
EXPECT_EQ(GetOffset(7), GetOffsetAfter(5));
|
||||||
|
EXPECT_EQ(GetOffset(6), GetOffsetAfter(7));
|
||||||
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(5));
|
||||||
|
EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
|
||||||
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(3));
|
||||||
|
EXPECT_EQ(GetOffset(0), 0);
|
||||||
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
||||||
|
EXPECT_EQ(GetOffset(8), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ArenaPlannerTest, GraphWithIntermediates) {
|
||||||
|
TestGraph graph({0, 1},
|
||||||
|
{
|
||||||
|
/* in, out, tmp */
|
||||||
|
{{0}, {2}, {3}},
|
||||||
|
{{1, 2}, {4, 5}, {}},
|
||||||
|
{{5}, {6, 7}, {8, 9, 10}},
|
||||||
|
{{4, 6}, {11}, {12}},
|
||||||
|
{{11}, {13}, {}},
|
||||||
|
{{7, 13}, {14}, {15}},
|
||||||
|
},
|
||||||
|
{11, 14});
|
||||||
|
SetGraph(&graph, /*preserve_inputs=*/true);
|
||||||
|
Execute(0, 10);
|
||||||
|
|
||||||
|
// Alloc(+) and dealloc(-) order by operation:
|
||||||
|
// Op0: +0 +1 +2 +3 -3
|
||||||
|
// Op1: +4 +5 -2 -4
|
||||||
|
// Op2: +6 +7 +8 +9 +10 -8 -9 -10 -5
|
||||||
|
// Op3: +11 +12 -12 -4 -6
|
||||||
|
// Op4: +13
|
||||||
|
// Op5: +14 +15 -7 -13 -15
|
||||||
|
EXPECT_EQ(GetOffset(0), 0);
|
||||||
|
EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
|
||||||
|
EXPECT_EQ(GetOffset(15), GetOffsetAfter(1));
|
||||||
|
EXPECT_EQ(GetOffset(14), GetOffsetAfter(15));
|
||||||
|
EXPECT_EQ(GetOffset(13), GetOffsetAfter(14));
|
||||||
|
EXPECT_EQ(GetOffset(12), GetOffsetAfter(1));
|
||||||
|
EXPECT_EQ(GetOffset(11), GetOffsetAfter(13));
|
||||||
|
EXPECT_EQ(GetOffset(10), GetOffsetAfter(1));
|
||||||
|
EXPECT_EQ(GetOffset(9), GetOffsetAfter(10));
|
||||||
|
EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
|
||||||
|
EXPECT_EQ(GetOffset(7), GetOffsetAfter(11));
|
||||||
|
EXPECT_EQ(GetOffset(6), GetOffsetAfter(8));
|
||||||
|
EXPECT_EQ(GetOffset(5), GetOffsetAfter(6));
|
||||||
|
EXPECT_EQ(GetOffset(4), GetOffsetAfter(7));
|
||||||
|
EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
|
||||||
|
|
||||||
|
// 2 is allocated in the smallest suitable gap, which is not equal to the
|
||||||
|
// first available one.
|
||||||
|
EXPECT_EQ(GetOffset(2), GetOffsetAfter(5));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
|
||||||
|
@ -162,7 +162,6 @@ Subgraph::Subgraph(ErrorReporter* error_reporter,
|
|||||||
: external_contexts_(external_contexts),
|
: external_contexts_(external_contexts),
|
||||||
error_reporter_(error_reporter),
|
error_reporter_(error_reporter),
|
||||||
next_execution_plan_index_to_prepare_(0),
|
next_execution_plan_index_to_prepare_(0),
|
||||||
next_execution_plan_index_to_plan_allocation_(0),
|
|
||||||
subgraphs_(subgraphs),
|
subgraphs_(subgraphs),
|
||||||
resource_variables_(resource_variables) {
|
resource_variables_(resource_variables) {
|
||||||
context_.impl_ = static_cast<void*>(this);
|
context_.impl_ = static_cast<void*>(this);
|
||||||
@ -496,7 +495,6 @@ TfLiteStatus Subgraph::AllocateTensors() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
next_execution_plan_index_to_prepare_ = 0;
|
next_execution_plan_index_to_prepare_ = 0;
|
||||||
next_execution_plan_index_to_plan_allocation_ = 0;
|
|
||||||
if (memory_planner_) {
|
if (memory_planner_) {
|
||||||
TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
|
TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
|
||||||
}
|
}
|
||||||
@ -695,13 +693,10 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() {
|
|||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
|
TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
|
||||||
next_execution_plan_index_to_prepare_, &last_exec_plan_index_prepared));
|
next_execution_plan_index_to_prepare_, &last_exec_plan_index_prepared));
|
||||||
next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
|
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
|
TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
|
||||||
next_execution_plan_index_to_plan_allocation_,
|
next_execution_plan_index_to_prepare_, last_exec_plan_index_prepared));
|
||||||
last_exec_plan_index_prepared));
|
next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
|
||||||
next_execution_plan_index_to_plan_allocation_ =
|
|
||||||
last_exec_plan_index_prepared + 1;
|
|
||||||
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
@ -776,22 +771,6 @@ TfLiteStatus Subgraph::Invoke() {
|
|||||||
if (tensor_resized_since_op_invoke_ &&
|
if (tensor_resized_since_op_invoke_ &&
|
||||||
HasDynamicTensor(context_, node.outputs)) {
|
HasDynamicTensor(context_, node.outputs)) {
|
||||||
next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
|
next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
|
||||||
|
|
||||||
// This happens when an intermediate dynamic tensor is resized.
|
|
||||||
// We don't have to prepare all the ops, but we need to recompute
|
|
||||||
// the allocation plan.
|
|
||||||
//
|
|
||||||
// This is a workaround for b/127354079. It relies on the property that
|
|
||||||
// ArenaPlanner's behavior is deterministic. A better solution is being
|
|
||||||
// able to "Rewind" to a specific index in ArenaPlanner.
|
|
||||||
// TODO(b/127354079): Improve ArenaPlanner and remove this mechanism.
|
|
||||||
if (next_execution_plan_index_to_plan_allocation_ >
|
|
||||||
next_execution_plan_index_to_prepare_) {
|
|
||||||
next_execution_plan_index_to_plan_allocation_ = 0;
|
|
||||||
if (memory_planner_) {
|
|
||||||
TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -527,14 +527,6 @@ class Subgraph {
|
|||||||
// NOTE: this relies on the order of nodes that is in topological order.
|
// NOTE: this relies on the order of nodes that is in topological order.
|
||||||
int next_execution_plan_index_to_prepare_;
|
int next_execution_plan_index_to_prepare_;
|
||||||
|
|
||||||
// This is similar to `next_execution_plan_index_to_prepare_`, but it tracks
|
|
||||||
// which nodes' allocation is planned with the arena planner.
|
|
||||||
//
|
|
||||||
// This is a workaround for b/127354079. It shouldn't be necessary if
|
|
||||||
// ArenaPlanner can "rewind" to a specific point.
|
|
||||||
// TODO(b/127354079): Improve ArenaPlanner and remove this mechanism.
|
|
||||||
int next_execution_plan_index_to_plan_allocation_;
|
|
||||||
|
|
||||||
// WARNING: This is an experimental interface that is subject to change.
|
// WARNING: This is an experimental interface that is subject to change.
|
||||||
// This is a list of node indices (to index into nodes_and_registration).
|
// This is a list of node indices (to index into nodes_and_registration).
|
||||||
// This represents a valid topological sort (dependency ordered) execution
|
// This represents a valid topological sort (dependency ordered) execution
|
||||||
|
@ -364,15 +364,14 @@ TEST(BasicInterpreter, CheckArenaAllocation) {
|
|||||||
ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
|
ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
|
||||||
|
|
||||||
ASSERT_LT(interpreter.tensor(0)->data.raw, interpreter.tensor(1)->data.raw);
|
ASSERT_LT(interpreter.tensor(0)->data.raw, interpreter.tensor(1)->data.raw);
|
||||||
ASSERT_LT(interpreter.tensor(1)->data.raw, interpreter.tensor(2)->data.raw);
|
ASSERT_LT(interpreter.tensor(1)->data.raw, interpreter.tensor(3)->data.raw);
|
||||||
ASSERT_LT(interpreter.tensor(2)->data.raw, interpreter.tensor(3)->data.raw);
|
ASSERT_EQ(interpreter.tensor(3)->data.raw, interpreter.tensor(9)->data.raw);
|
||||||
ASSERT_LT(interpreter.tensor(3)->data.raw, interpreter.tensor(4)->data.raw);
|
ASSERT_LT(interpreter.tensor(3)->data.raw, interpreter.tensor(5)->data.raw);
|
||||||
ASSERT_LT(interpreter.tensor(4)->data.raw, interpreter.tensor(5)->data.raw);
|
ASSERT_LT(interpreter.tensor(5)->data.raw, interpreter.tensor(2)->data.raw);
|
||||||
ASSERT_LT(interpreter.tensor(5)->data.raw, interpreter.tensor(7)->data.raw);
|
ASSERT_EQ(interpreter.tensor(2)->data.raw, interpreter.tensor(7)->data.raw);
|
||||||
ASSERT_EQ(interpreter.tensor(6)->data.raw, interpreter.tensor(2)->data.raw);
|
ASSERT_LT(interpreter.tensor(2)->data.raw, interpreter.tensor(4)->data.raw);
|
||||||
// #7 is the one with the largest pointer.
|
// #4 is the one with the largest pointer.
|
||||||
ASSERT_EQ(interpreter.tensor(8)->data.raw, nullptr);
|
ASSERT_EQ(interpreter.tensor(8)->data.raw, nullptr);
|
||||||
ASSERT_EQ(interpreter.tensor(9)->data.raw, interpreter.tensor(5)->data.raw);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(BasicInterpreter, BufferAccess) {
|
TEST(BasicInterpreter, BufferAccess) {
|
||||||
|
@ -21,11 +21,6 @@ namespace tflite {
|
|||||||
|
|
||||||
// A MemoryPlanner is responsible for planning and executing a number of
|
// A MemoryPlanner is responsible for planning and executing a number of
|
||||||
// memory-related operations that are necessary in TF Lite.
|
// memory-related operations that are necessary in TF Lite.
|
||||||
//
|
|
||||||
// TODO(b/127354079): Remove the constrain below when the issue is fixed.
|
|
||||||
// WARNING: MemoryPlanner's behavior must be deterministic. If the first N
|
|
||||||
// nodes are unchanged, it must produce exactly the same allocation plan for
|
|
||||||
// the first N nodes.
|
|
||||||
class MemoryPlanner {
|
class MemoryPlanner {
|
||||||
public:
|
public:
|
||||||
virtual ~MemoryPlanner() {}
|
virtual ~MemoryPlanner() {}
|
||||||
|
@ -31,73 +31,55 @@ T AlignTo(size_t alignment, T offset) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
TfLiteStatus SimpleMemoryArena::Allocate(
|
||||||
TfLiteStatus SimpleMemoryArena::Allocate(TfLiteContext* context,
|
TfLiteContext* context, size_t alignment, size_t size, size_t first_node,
|
||||||
size_t alignment, size_t size,
|
size_t last_node, ArenaAllocWithUsageInterval* new_alloc) {
|
||||||
ArenaAlloc* new_alloc) {
|
|
||||||
TF_LITE_ENSURE(context, alignment <= arena_alignment_);
|
TF_LITE_ENSURE(context, alignment <= arena_alignment_);
|
||||||
|
new_alloc->first_node = first_node;
|
||||||
|
new_alloc->last_node = last_node;
|
||||||
|
new_alloc->size = size;
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
new_alloc->offset = 0;
|
new_alloc->offset = 0;
|
||||||
new_alloc->size = 0;
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t current_top = 0;
|
|
||||||
|
|
||||||
if (!allocs_.empty()) {
|
|
||||||
auto last = allocs_.rbegin();
|
|
||||||
current_top = last->offset + last->size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we don't find a better gap just allocate at the end of the buffer.
|
// If we don't find a better gap just allocate at the end of the buffer.
|
||||||
size_t best_offset = AlignTo(alignment, current_top);
|
const size_t kNotAssigned = std::numeric_limits<size_t>::max();
|
||||||
size_t best_offset_fit = std::numeric_limits<size_t>::max();
|
size_t best_offset = kNotAssigned;
|
||||||
auto best_insertion_it = allocs_.end();
|
size_t best_offset_fit = kNotAssigned;
|
||||||
|
|
||||||
// Go through the sorted allocs and look at the gaps between them.
|
// Go through the sorted allocs and look at the gaps between them.
|
||||||
size_t current_offset = 0;
|
size_t current_offset = 0;
|
||||||
for (auto it = allocs_.begin(); it != allocs_.end(); ++it) {
|
for (const auto& alloc : ordered_allocs_) {
|
||||||
|
if (alloc.last_node < first_node || alloc.first_node > last_node) {
|
||||||
|
// Usage interval of alloc doesn't intersect with current tensor's usage
|
||||||
|
// interval, so we skip it.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
size_t aligned_current_offset = AlignTo(alignment, current_offset);
|
size_t aligned_current_offset = AlignTo(alignment, current_offset);
|
||||||
// If we found a gap larger than required size, and smaller than previous
|
// If we found a gap larger than required size, and smaller than previous
|
||||||
// best fit, take it.
|
// best fit, take it.
|
||||||
if (aligned_current_offset + size <= it->offset &&
|
if (aligned_current_offset + size <= alloc.offset &&
|
||||||
it->offset - current_offset < best_offset_fit) {
|
alloc.offset - aligned_current_offset < best_offset_fit) {
|
||||||
best_offset = aligned_current_offset;
|
best_offset = aligned_current_offset;
|
||||||
best_offset_fit = it->offset - current_offset;
|
best_offset_fit = alloc.offset - current_offset;
|
||||||
best_insertion_it = it;
|
|
||||||
}
|
}
|
||||||
current_offset = it->offset + it->size;
|
current_offset = std::max(current_offset, alloc.offset + alloc.size);
|
||||||
|
}
|
||||||
|
if (best_offset == kNotAssigned) {
|
||||||
|
best_offset = AlignTo(alignment, current_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the required buffer size.
|
// Update the required buffer size.
|
||||||
high_water_mark_ = std::max(high_water_mark_, best_offset + size);
|
high_water_mark_ = std::max(high_water_mark_, best_offset + size);
|
||||||
|
|
||||||
new_alloc->offset = best_offset;
|
new_alloc->offset = best_offset;
|
||||||
new_alloc->size = size;
|
|
||||||
allocs_.insert(best_insertion_it, *new_alloc);
|
|
||||||
|
|
||||||
return kTfLiteOk;
|
auto insertion_it = ordered_allocs_.begin();
|
||||||
}
|
while (insertion_it != ordered_allocs_.end() && *insertion_it < *new_alloc) {
|
||||||
|
++insertion_it;
|
||||||
TfLiteStatus SimpleMemoryArena::Deallocate(TfLiteContext* context,
|
|
||||||
const ArenaAlloc& alloc) {
|
|
||||||
if (alloc.size == 0) {
|
|
||||||
return kTfLiteOk;
|
|
||||||
}
|
}
|
||||||
|
ordered_allocs_.insert(insertion_it, *new_alloc);
|
||||||
int erased_allocs_count = 0;
|
|
||||||
auto it = allocs_.begin();
|
|
||||||
while (it != allocs_.end()) {
|
|
||||||
if (it->offset == alloc.offset) {
|
|
||||||
TF_LITE_ENSURE_EQ(context, it->size, alloc.size);
|
|
||||||
erased_allocs_count++;
|
|
||||||
it = allocs_.erase(it);
|
|
||||||
} else {
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TF_LITE_ENSURE_EQ(context, erased_allocs_count, 1);
|
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,9 +110,9 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) {
|
|||||||
return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError;
|
return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context,
|
TfLiteStatus SimpleMemoryArena::ResolveAlloc(
|
||||||
const ArenaAlloc& alloc,
|
TfLiteContext* context, const ArenaAllocWithUsageInterval& alloc,
|
||||||
char** output_ptr) {
|
char** output_ptr) {
|
||||||
TF_LITE_ENSURE(context, committed_);
|
TF_LITE_ENSURE(context, committed_);
|
||||||
TF_LITE_ENSURE(context, output_ptr != nullptr);
|
TF_LITE_ENSURE(context, output_ptr != nullptr);
|
||||||
if (alloc.size == 0) {
|
if (alloc.size == 0) {
|
||||||
@ -144,7 +126,7 @@ TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context,
|
|||||||
TfLiteStatus SimpleMemoryArena::Clear() {
|
TfLiteStatus SimpleMemoryArena::Clear() {
|
||||||
committed_ = false;
|
committed_ = false;
|
||||||
high_water_mark_ = 0;
|
high_water_mark_ = 0;
|
||||||
allocs_.clear();
|
ordered_allocs_.clear();
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,16 +22,19 @@ limitations under the License.
|
|||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
|
||||||
// This little structure holds the offset and the size for a dynamic memory
|
// This little structure holds the offset and the size for a dynamic memory
|
||||||
// allocation in the memory arena. When the arena is committed and the
|
// allocation in the memory arena as well as first_node and last_node that use
|
||||||
// underlying buffer is set, the alloc can be resolved into an actual memory
|
// corresponding tensor. It means that continuous part of memory with this size
|
||||||
// pointer.
|
// needs to be allocated before execution of operation in the first node and can
|
||||||
struct ArenaAlloc {
|
// be deallocated after execution of the operation in the last_node. When the
|
||||||
ArenaAlloc() : offset(0), size(0) {}
|
// arena is committed and the underlying buffer is set, the alloc can be
|
||||||
|
// resolved into an actual memory pointer.
|
||||||
|
struct ArenaAllocWithUsageInterval {
|
||||||
size_t offset;
|
size_t offset;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
size_t first_node;
|
||||||
|
size_t last_node;
|
||||||
|
|
||||||
inline bool operator<(const ArenaAlloc& other) const {
|
inline bool operator<(const ArenaAllocWithUsageInterval& other) const {
|
||||||
return offset < other.offset;
|
return offset < other.offset;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -48,12 +51,14 @@ class SimpleMemoryArena {
|
|||||||
arena_alignment_(arena_alignment),
|
arena_alignment_(arena_alignment),
|
||||||
high_water_mark_(0),
|
high_water_mark_(0),
|
||||||
underlying_buffer_size_(0),
|
underlying_buffer_size_(0),
|
||||||
allocs_() {}
|
ordered_allocs_() {}
|
||||||
|
|
||||||
|
// Schedule memory allocation for a tensor with a given size, assuming that it
|
||||||
|
// needs to be allocated before the execution of first_node, and deallocated
|
||||||
|
// after the execution of last_node.
|
||||||
TfLiteStatus Allocate(TfLiteContext* context, size_t alignment, size_t size,
|
TfLiteStatus Allocate(TfLiteContext* context, size_t alignment, size_t size,
|
||||||
ArenaAlloc* new_alloc);
|
size_t first_node, size_t last_node,
|
||||||
|
ArenaAllocWithUsageInterval* new_alloc);
|
||||||
TfLiteStatus Deallocate(TfLiteContext* context, const ArenaAlloc& alloc);
|
|
||||||
|
|
||||||
inline size_t RequiredBufferSize() {
|
inline size_t RequiredBufferSize() {
|
||||||
// Add in a small amount of padding to reduce the chance of resize events
|
// Add in a small amount of padding to reduce the chance of resize events
|
||||||
@ -64,7 +69,8 @@ class SimpleMemoryArena {
|
|||||||
|
|
||||||
TfLiteStatus Commit(TfLiteContext* context);
|
TfLiteStatus Commit(TfLiteContext* context);
|
||||||
|
|
||||||
TfLiteStatus ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc,
|
TfLiteStatus ResolveAlloc(TfLiteContext* context,
|
||||||
|
const ArenaAllocWithUsageInterval& alloc,
|
||||||
char** output_ptr);
|
char** output_ptr);
|
||||||
|
|
||||||
TfLiteStatus Clear();
|
TfLiteStatus Clear();
|
||||||
@ -80,8 +86,7 @@ class SimpleMemoryArena {
|
|||||||
std::unique_ptr<char[]> underlying_buffer_;
|
std::unique_ptr<char[]> underlying_buffer_;
|
||||||
size_t underlying_buffer_size_;
|
size_t underlying_buffer_size_;
|
||||||
char* underlying_buffer_aligned_ptr_;
|
char* underlying_buffer_aligned_ptr_;
|
||||||
// TODO(maciekc): add list iterator to the ArenaAlloc to lookup quickly.
|
std::list<ArenaAllocWithUsageInterval> ordered_allocs_;
|
||||||
std::list<ArenaAlloc> allocs_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -24,39 +24,33 @@ namespace {
|
|||||||
TEST(SimpleMemoryArenaTest, BasicArenaOperations) {
|
TEST(SimpleMemoryArenaTest, BasicArenaOperations) {
|
||||||
TfLiteContext context;
|
TfLiteContext context;
|
||||||
SimpleMemoryArena arena(64);
|
SimpleMemoryArena arena(64);
|
||||||
ArenaAlloc allocs[6];
|
ArenaAllocWithUsageInterval allocs[6];
|
||||||
|
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[0]);
|
arena.Allocate(&context, 32, 2047, 1, 3, &allocs[0]);
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[1]);
|
arena.Allocate(&context, 32, 2047, 2, 5, &allocs[1]);
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[2]);
|
arena.Allocate(&context, 32, 2047, 3, 6, &allocs[2]);
|
||||||
arena.Deallocate(&context, allocs[0]);
|
arena.Allocate(&context, 32, 2047, 5, 6, &allocs[3]);
|
||||||
arena.Allocate(&context, 32, 1023, &allocs[3]);
|
arena.Allocate(&context, 32, 1023, 4, 6, &allocs[4]);
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[4]);
|
arena.Allocate(&context, 32, 1023, 6, 6, &allocs[5]);
|
||||||
arena.Deallocate(&context, allocs[1]);
|
|
||||||
arena.Allocate(&context, 32, 1023, &allocs[5]);
|
|
||||||
|
|
||||||
EXPECT_EQ(allocs[0].offset, 0);
|
EXPECT_EQ(allocs[0].offset, 0);
|
||||||
EXPECT_EQ(allocs[1].offset, 2048);
|
EXPECT_EQ(allocs[1].offset, 2048);
|
||||||
EXPECT_EQ(allocs[2].offset, 4096);
|
EXPECT_EQ(allocs[2].offset, 4096);
|
||||||
EXPECT_EQ(allocs[3].offset, 0);
|
EXPECT_EQ(allocs[3].offset, 0);
|
||||||
EXPECT_EQ(allocs[4].offset, 6144);
|
EXPECT_EQ(allocs[4].offset, 6144);
|
||||||
EXPECT_EQ(allocs[5].offset, 1024);
|
EXPECT_EQ(allocs[5].offset, 2048);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(SimpleMemoryArenaTest, BasicZeroAlloc) {
|
TEST(SimpleMemoryArenaTest, BasicZeroAlloc) {
|
||||||
TfLiteContext context;
|
TfLiteContext context;
|
||||||
SimpleMemoryArena arena(64);
|
SimpleMemoryArena arena(64);
|
||||||
ArenaAlloc alloc;
|
ArenaAllocWithUsageInterval alloc;
|
||||||
|
|
||||||
// Zero-sized allocs should have a 0 offset and size.
|
// Zero-sized allocs should have a 0 offset and size.
|
||||||
ASSERT_EQ(arena.Allocate(&context, 32, 0, &alloc), kTfLiteOk);
|
ASSERT_EQ(arena.Allocate(&context, 32, 0, 1, 2, &alloc), kTfLiteOk);
|
||||||
EXPECT_EQ(alloc.offset, 0);
|
EXPECT_EQ(alloc.offset, 0);
|
||||||
EXPECT_EQ(alloc.size, 0);
|
EXPECT_EQ(alloc.size, 0);
|
||||||
|
|
||||||
// Deallocation of zero-sized allocs should always succeed (even redundantly).
|
|
||||||
ASSERT_EQ(arena.Deallocate(&context, alloc), kTfLiteOk);
|
|
||||||
ASSERT_EQ(arena.Deallocate(&context, alloc), kTfLiteOk);
|
|
||||||
|
|
||||||
// The zero-sized alloc should resolve to null.
|
// The zero-sized alloc should resolve to null.
|
||||||
char* resolved_ptr = nullptr;
|
char* resolved_ptr = nullptr;
|
||||||
ASSERT_EQ(arena.Commit(&context), kTfLiteOk);
|
ASSERT_EQ(arena.Commit(&context), kTfLiteOk);
|
||||||
@ -67,15 +61,13 @@ TEST(SimpleMemoryArenaTest, BasicZeroAlloc) {
|
|||||||
TEST(SimpleMemoryArenaTest, InterleavedZeroAlloc) {
|
TEST(SimpleMemoryArenaTest, InterleavedZeroAlloc) {
|
||||||
TfLiteContext context;
|
TfLiteContext context;
|
||||||
SimpleMemoryArena arena(64);
|
SimpleMemoryArena arena(64);
|
||||||
ArenaAlloc allocs[4];
|
ArenaAllocWithUsageInterval allocs[4];
|
||||||
|
|
||||||
// Interleave some zero and non-zero-sized allocations and deallocations.
|
// Interleave some zero and non-zero-sized allocations and deallocations.
|
||||||
ASSERT_EQ(arena.Allocate(&context, 32, 2047, &allocs[0]), kTfLiteOk);
|
ASSERT_EQ(arena.Allocate(&context, 32, 2047, 0, 4, &allocs[0]), kTfLiteOk);
|
||||||
ASSERT_EQ(arena.Allocate(&context, 32, 0, &allocs[1]), kTfLiteOk);
|
ASSERT_EQ(arena.Allocate(&context, 32, 0, 1, 2, &allocs[1]), kTfLiteOk);
|
||||||
ASSERT_EQ(arena.Allocate(&context, 32, 1023, &allocs[2]), kTfLiteOk);
|
ASSERT_EQ(arena.Allocate(&context, 32, 1023, 1, 2, &allocs[2]), kTfLiteOk);
|
||||||
ASSERT_EQ(arena.Deallocate(&context, allocs[1]), kTfLiteOk);
|
ASSERT_EQ(arena.Allocate(&context, 32, 2047, 3, 4, &allocs[3]), kTfLiteOk);
|
||||||
ASSERT_EQ(arena.Deallocate(&context, allocs[2]), kTfLiteOk);
|
|
||||||
ASSERT_EQ(arena.Allocate(&context, 32, 2047, &allocs[3]), kTfLiteOk);
|
|
||||||
|
|
||||||
// Deallocation of a zero-sized alloc should not impact the allocator offsets.
|
// Deallocation of a zero-sized alloc should not impact the allocator offsets.
|
||||||
EXPECT_EQ(allocs[0].offset, 0);
|
EXPECT_EQ(allocs[0].offset, 0);
|
||||||
@ -87,11 +79,11 @@ TEST(SimpleMemoryArenaTest, InterleavedZeroAlloc) {
|
|||||||
TEST(SimpleMemoryArenaTest, TestAfterClear) {
|
TEST(SimpleMemoryArenaTest, TestAfterClear) {
|
||||||
TfLiteContext context;
|
TfLiteContext context;
|
||||||
SimpleMemoryArena arena(64);
|
SimpleMemoryArena arena(64);
|
||||||
ArenaAlloc allocs[9];
|
ArenaAllocWithUsageInterval allocs[9];
|
||||||
|
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[0]);
|
arena.Allocate(&context, 32, 2047, 0, 2, &allocs[0]);
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[1]);
|
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]);
|
||||||
arena.Allocate(&context, 32, 2047, &allocs[2]);
|
arena.Allocate(&context, 32, 2047, 1, 2, &allocs[2]);
|
||||||
arena.Commit(&context);
|
arena.Commit(&context);
|
||||||
|
|
||||||
EXPECT_EQ(allocs[0].offset, 0);
|
EXPECT_EQ(allocs[0].offset, 0);
|
||||||
@ -101,9 +93,9 @@ TEST(SimpleMemoryArenaTest, TestAfterClear) {
|
|||||||
arena.Clear();
|
arena.Clear();
|
||||||
|
|
||||||
// Test with smaller allocs.
|
// Test with smaller allocs.
|
||||||
arena.Allocate(&context, 32, 1023, &allocs[3]);
|
arena.Allocate(&context, 32, 1023, 0, 2, &allocs[3]);
|
||||||
arena.Allocate(&context, 32, 1023, &allocs[4]);
|
arena.Allocate(&context, 32, 1023, 1, 2, &allocs[4]);
|
||||||
arena.Allocate(&context, 32, 1023, &allocs[5]);
|
arena.Allocate(&context, 32, 1023, 1, 2, &allocs[5]);
|
||||||
arena.Commit(&context);
|
arena.Commit(&context);
|
||||||
|
|
||||||
EXPECT_EQ(allocs[3].offset, 0);
|
EXPECT_EQ(allocs[3].offset, 0);
|
||||||
@ -113,9 +105,9 @@ TEST(SimpleMemoryArenaTest, TestAfterClear) {
|
|||||||
arena.Clear();
|
arena.Clear();
|
||||||
|
|
||||||
// Test larger allocs which should require a reallocation.
|
// Test larger allocs which should require a reallocation.
|
||||||
arena.Allocate(&context, 32, 4095, &allocs[6]);
|
arena.Allocate(&context, 32, 4095, 0, 2, &allocs[6]);
|
||||||
arena.Allocate(&context, 32, 4095, &allocs[7]);
|
arena.Allocate(&context, 32, 4095, 1, 2, &allocs[7]);
|
||||||
arena.Allocate(&context, 32, 4095, &allocs[8]);
|
arena.Allocate(&context, 32, 4095, 1, 2, &allocs[8]);
|
||||||
arena.Commit(&context);
|
arena.Commit(&context);
|
||||||
|
|
||||||
EXPECT_EQ(allocs[6].offset, 0);
|
EXPECT_EQ(allocs[6].offset, 0);
|
||||||
|
Loading…
Reference in New Issue
Block a user