Merge pull request #38121 from jenselofsson:offline_memory_planner

PiperOrigin-RevId: 316533499
Change-Id: Id967e853081829f4c974cf7527a628724ed0edc2
This commit is contained in:
TensorFlower Gardener 2020-06-15 13:37:09 -07:00
commit 4381963d2d
10 changed files with 649 additions and 92 deletions

View File

@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
#include "tensorflow/lite/micro/testing/micro_test.h"
#include "tensorflow/lite/schema/schema_generated.h"
#include "tensorflow/lite/version.h"
@ -46,6 +47,7 @@ TF_LITE_MICRO_TEST(TestInvoke) {
"to supported version %d.\n",
model->version(), TFLITE_SCHEMA_VERSION);
}
PrintModelData(model, error_reporter);
// Pull in only the operation implementations we need.
// This relies on a complete list of all the ops needed by this graph.

View File

@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
next_free += sizeof(BufferRequirements) * max_buffer_count_;
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
next_free += sizeof(int) * max_buffer_count_;
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
current->size = size;
current->first_time_used = first_time_used;
current->last_time_used = last_time_used;
current->offline_offset = kOnlinePlannedBuffer;
++buffer_count_;
need_to_calculate_offsets_ = true;
return kTfLiteOk;
}
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
int last_time_used, int offline_offset) {
BufferRequirements* current = &requirements_[buffer_count_];
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
kTfLiteOk) {
return kTfLiteError;
}
current->offline_offset = offline_offset;
return kTfLiteOk;
}
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
const int last_time_used) const {
@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
ListEntry* result = nullptr;
ListEntry* candidate_next_entry;
if (start == nullptr) {
candidate_next_entry = &buffers_sorted_by_offset_[0];
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
} else {
if (start->next_entry_index == -1) {
return nullptr;
@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// This helps find a more compact layout. Intuitively, you can think
// about putting the large buffers in place first, and then the
// smaller buffers can fit in the gaps, rather than fragmenting the
// gaps with small buffers at the beginning.
// gaps with small buffers at the beginning. Add offline planned offsets
// first in the list, since they have a predetermined offset.
int idx_from_tail = buffer_count_;
int idx_from_head = 0;
for (int i = 0; i < buffer_count_; ++i) {
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
buffer_ids_sorted_by_size_[i] = i;
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
idx_from_tail--;
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
buffer_ids_sorted_[idx_from_tail] = i;
buffer_offsets_[i] = -1;
} else {
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
buffer_ids_sorted_[idx_from_head] = i;
buffer_offsets_[i] = requirements_[i].offline_offset;
idx_from_head++;
}
}
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers.
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
buffer_count_);
// Put the largest buffer at offset zero to start the process.
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
first_entry->offset = 0;
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
first_entry->next_entry_index = -1;
// This sorting algorithm is naive, and may end up taking a very long time
// with hundreds of buffers. Do not sort the offline planned offsets.
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
&buffer_ids_sorted_[idx_from_head],
buffer_count_ - idx_from_head);
// Initialize the first entry to the first buffer in
// buffer_ids_sorted_.
// - If there are no offline planned offsets, the largest buffer will be
// first, and the buffers will be handled in size order.
// - If offline offsets are present, these will be handled first in order
// for the greedy algorithm to utilized gaps in the offline plan.
first_entry_index_ = 0;
next_free_entry_ = 1;
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
first_entry->next_entry_index = -1; // to mark the entry as end of list
int buffer_id = buffer_ids_sorted_[0];
first_entry->requirements_index = buffer_id;
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
buffer_offsets_[buffer_id] = 0;
}
first_entry->offset = buffer_offsets_[buffer_id];
// Work through the rest of the buffers to find a good gap to place each one.
for (int i = 1; i < buffer_count_; ++i) {
// The id is the order the buffer was originally added by the client.
const int buffer_id = buffer_ids_sorted_by_size_[i];
buffer_id = buffer_ids_sorted_[i];
// Look at what size and time range the buffer needs to be active.
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
const int wanted_size = wanted_requirements->size;
@ -168,9 +203,11 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// so that it's easy to find the next buffer in memory, and so the gap.
// The candidate_entry variable holds the buffer that we're considering
// placing the current buffer after.
ListEntry* prior_entry = nullptr;
int candidate_offset = 0;
// Loop through the offset-ordered list of buffers, looking for gaps.
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
ListEntry* prior_entry = nullptr;
while (true) {
// Find out what the next active buffer is.
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
@ -200,6 +237,10 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
// The gap wasn't big enough, so move on to another candidate.
prior_entry = next_entry;
}
} else {
// Offline planned offset are to be considered constant
candidate_offset = wanted_requirements->offline_offset;
}
// At this point, we've either found a gap (possibly at the end of the
// list) and want to place the buffer there, or there are no other active
// buffers in this time range and so we can put it at offset zero.
@ -212,9 +253,17 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
new_entry->requirements_index = buffer_id;
const int new_entry_index = next_free_entry_;
++next_free_entry_;
if (first_entry->offset > candidate_offset) {
// The new entry offset is smaller than the first entry offset =>
// replace the first entry
first_entry = new_entry;
first_entry->next_entry_index = first_entry_index_;
first_entry_index_ = new_entry_index;
} else {
ListEntry* current_entry = first_entry;
// Make sure that we insert the buffer at the correct place in the ordered
// list.
// Make sure that we insert the buffer at the correct place in the
// buffer-offset-ordered list
while (true) {
const int next_entry_index = current_entry->next_entry_index;
if (next_entry_index == -1) {
@ -223,6 +272,7 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
new_entry->next_entry_index = -1;
break;
}
// not at the end of the list -> take a look at next entry
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
if (next_entry->offset > candidate_offset) {
// We're at the right spot to do an insertion and retain the sorting
@ -234,6 +284,7 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
current_entry = next_entry;
}
}
}
}
size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
@ -241,7 +292,7 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
if (buffer_count_ == 0) {
return 0;
}
ListEntry* entry = &buffers_sorted_by_offset_[0];
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
size_t max_size = 0;
while (entry) {
BufferRequirements* requirements =

View File

@ -21,6 +21,8 @@ limitations under the License.
namespace tflite {
constexpr int kOnlinePlannedBuffer = -1;
// A memory planner that uses a greedy algorithm to arrange buffers in memory
// to minimize the overall arena size needed.
//
@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used) override;
// Record details of an offline planned buffer offset we want to place.
// offline_offset is the buffer offset from the start of the arena.
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
int first_time_used, int last_time_used,
int offline_offset);
// Returns the high-water mark of used memory. This is the minimum size of a
// memory arena you'd need to allocate to hold these buffers.
size_t GetMaximumMemorySize() override;
@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_by_size_
sizeof(int) + // buffer_ids_sorted_by_size_
sizeof(int) + // buffer_sizes_sorted_
sizeof(int) + // buffer_ids_sorted_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
// Records the client-provided information about each buffer.
struct BufferRequirements {
int size;
int offline_offset;
int first_time_used;
int last_time_used;
};
// Working arrays used during the layout algorithm.
BufferRequirements* requirements_;
int* buffer_sizes_sorted_by_size_;
int* buffer_ids_sorted_by_size_;
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
// {
// offline planned buffers,
// online planned buffers sorted by size
// }
int* buffer_sizes_sorted_;
int* buffer_ids_sorted_;
ListEntry* buffers_sorted_by_offset_;
int next_free_entry_;
int next_free_entry_; // Index of the next free entry of
// buffers_sorted_by_offset_
int first_entry_index_; // Index of the first entry (smallest offset) of
// buffers_sorted_by_offset_
// Stores the outcome of the plan, the location of each buffer in the arena.
int* buffer_offsets_;

View File

@ -39,16 +39,19 @@ namespace {
// Used to hold information used during allocation calculations.
struct AllocationInfo {
size_t bytes;
void** output_ptr;
int first_created;
int last_used;
int32_t offline_offset;
bool needs_allocating;
void** output_ptr;
};
// We align tensor buffers to 16-byte boundaries, since this is a common
// requirement for SIMD extensions.
constexpr int kBufferAlignment = 16;
constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
// Instance of a zero-length int to pass as tensor dims for a flatbuffer
// Tensor with no shape. Note that the second member of a TfLiteArray is a
// flexible array member, which is not strictly valid C++. However it is
@ -77,6 +80,71 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
TF_LITE_REMOVE_VIRTUAL_DELETE
};
#if !defined(__clang__)
// Helper function to check flatbuffer metadata correctness. This function is
// not called by default. Hence it's not linked in to the final binary code.
TfLiteStatus CheckOfflinePlannedOffsets(const Model* model,
ErrorReporter* error_reporter) {
// Suppress compile warning for unused function
(void)CheckOfflinePlannedOffsets;
if (model->metadata()) {
for (size_t i = 0; i < model->metadata()->size(); ++i) {
auto metadata = model->metadata()->Get(i);
if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
strlen(kOfflineMemAllocMetadata)) == 0) {
auto* subgraphs = model->subgraphs();
const SubGraph* subgraph = (*subgraphs)[0];
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
subgraph->tensors();
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model->buffers();
int nbr_tflite_tensors = tensors->size();
auto* buffer = (*buffers)[metadata->buffer()];
auto* array = buffer->data();
const uint32_t* metadata_buffer = (uint32_t*)array->data();
int version = metadata_buffer[0];
int subgraph_idx = metadata_buffer[1];
const int nbr_offline_offsets = metadata_buffer[2];
int* offline_planner_offsets = (int*)&metadata_buffer[3];
TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: =====");
TF_LITE_REPORT_ERROR(error_reporter,
"Offline planner metadata found, version %d, "
"subgraph %d, nbr offline offsets %d",
version, subgraph_idx, nbr_offline_offsets);
for (int j = 0; j < nbr_offline_offsets; ++j) {
TF_LITE_REPORT_ERROR(
error_reporter,
"Offline planner tensor index %d, offline offset: %d", j,
offline_planner_offsets[j]);
}
if (version != 1) {
TF_LITE_REPORT_ERROR(error_reporter, "Version not supported! (%d)\n",
version);
return kTfLiteError;
}
if (subgraph_idx != 0) {
TF_LITE_REPORT_ERROR(error_reporter,
"Only 1 subgraph supported! Subgraph idx (%d)\n",
subgraph_idx);
return kTfLiteError;
}
if (nbr_tflite_tensors != nbr_offline_offsets) {
TF_LITE_REPORT_ERROR(error_reporter,
"Nbr of offline buffer offsets (%d) in metadata "
"not equal nbr tensors (%d)\n",
nbr_offline_offsets, nbr_tflite_tensors);
return kTfLiteError;
}
}
}
}
return kTfLiteOk;
}
#endif
// A helper class to construct AllocationInfo array. This array contains the
// lifetime of tensors / scratch_buffer and will be used to calculate the memory
// plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
@ -94,9 +162,17 @@ class AllocationInfoBuilder {
return Allocate();
}
// Check if model contains offline planned buffer offsets.
// - If there's no metadata available, offline_planner_offsets is not set
// - If there's metadata available, offline_planner_offsets will point to the
// first offset in the metadata buffer list.
TfLiteStatus GetOfflinePlannedOffsets(const Model* model,
int32_t** offline_planner_offsets);
// Add allocaiton information for the tensors.
TfLiteStatus AddTensors(const SubGraph* subgraph,
TfLiteStatus AddTensors(const SubGraph* subgraph, int32_t* offline_offsets,
TfLiteTensor* runtime_tensors);
// Add allocation information for the scratch buffers.
TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles);
@ -130,6 +206,7 @@ TfLiteStatus AllocationInfoBuilder::Allocate() {
}
TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
int32_t* offline_offsets,
TfLiteTensor* runtime_tensors) {
// Set up allocation info for all tensors.
for (size_t i = 0; i < tensor_count_; ++i) {
@ -141,6 +218,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
current->last_used = -1;
current->needs_allocating = (runtime_tensors[i].data.data == nullptr) &&
(!subgraph->tensors()->Get(i)->is_variable());
if (offline_offsets) {
current->offline_offset = offline_offsets[i];
} else {
current->offline_offset = kOnlinePlannedBuffer;
}
}
for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
@ -198,6 +280,52 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
return kTfLiteOk;
}
// The tensor offsets will be encoded in the metadata:[Metadata] field of the
// Model. The following encoding applies:
//
// | Metadata component | Value |
// | name:string | “OfflineMemoryAllocation” |
// | buffer:unit | Index of buffer containing memory allocation data |
//
// The buffer contents for the memory allocation is a list of 32-bit integers.
// The number of tensors, n, must be equal to the number of tensors defined in
// the model. The following encoding applies:
//
// | Offset | Value |
// | 0 | Offline allocation format version set to 0 |
// | 1 | Subgraph index to which this allocation applies |
// | 2 | Number offsets following: n |
// | 3 | Arena byte offset of tensor #0 or -1 to allocate at runtime |
// | 4 | Arena byte offset of tensor #1 or -1 to allocate at runtime |
// | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
const Model* model, int32_t** offline_planner_offsets) {
if (model->metadata()) {
for (size_t i = 0; i < model->metadata()->size(); ++i) {
auto metadata = model->metadata()->Get(i);
if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
strlen(kOfflineMemAllocMetadata)) == 0) {
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model->buffers();
auto* buffer = (*buffers)[metadata->buffer()];
auto* array = buffer->data();
const uint32_t* metadata_buffer = (uint32_t*)array->data();
const size_t nbr_tensors = (size_t)metadata_buffer[2];
*offline_planner_offsets = (int32_t*)&metadata_buffer[3];
if (tensor_count_ != nbr_tensors) {
TF_LITE_REPORT_ERROR(reporter_,
"Nbr of offline buffer offsets (%d) in metadata "
"not equal nbr tensors (%d)\n",
nbr_tensors, tensor_count_);
return kTfLiteError;
}
}
}
}
return kTfLiteOk;
}
TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
internal::ScratchBufferHandle* buffer_handles) {
// Set up allocation info for buffers.
@ -210,11 +338,13 @@ TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
current->first_created = handle->node_idx;
current->last_used = handle->node_idx;
current->needs_allocating = true;
current->offline_offset = kOnlinePlannedBuffer;
}
return kTfLiteOk;
}
TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
GreedyMemoryPlanner* planner,
const AllocationInfo* allocation_info,
size_t allocation_info_size) {
// Add the tensors to our allocation plan.
@ -223,9 +353,15 @@ TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
if (current->needs_allocating) {
size_t aligned_bytes_required =
AlignSizeUp(current->bytes, kBufferAlignment);
if (current->offline_offset == kOnlinePlannedBuffer) {
TF_LITE_ENSURE_STATUS(
planner->AddBuffer(error_reporter, aligned_bytes_required,
current->first_created, current->last_used));
} else {
TF_LITE_ENSURE_STATUS(planner->AddBuffer(
error_reporter, aligned_bytes_required, current->first_created,
current->last_used, current->offline_offset));
}
}
}
return kTfLiteOk;
@ -466,7 +602,6 @@ TfLiteStatus MicroAllocator::StartModelAllocation(
const SubGraph* subgraph = GetSubGraphFromModel(model);
TFLITE_DCHECK(subgraph != nullptr);
model_is_allocating_ = true;
TF_LITE_ENSURE_STATUS(
@ -491,7 +626,7 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
const SubGraph* subgraph = GetSubGraphFromModel(model);
TFLITE_DCHECK(subgraph != nullptr);
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(context, subgraph));
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, context, subgraph));
TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph));
model_is_allocating_ = false;
@ -739,7 +874,8 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
return (*subgraphs)[0];
}
TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model,
TfLiteContext* context,
const SubGraph* subgraph) {
// Create static memory plan
// 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
@ -756,7 +892,13 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
TF_LITE_ENSURE_STATUS(
builder.Init(subgraph->tensors()->size(), scratch_buffer_count_));
TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, context->tensors));
int32_t* offline_planner_offsets = nullptr;
TF_LITE_ENSURE_STATUS(
builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, offline_planner_offsets,
context->tensors));
TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
const AllocationInfo* allocation_info = builder.Finish();

View File

@ -189,7 +189,8 @@ class MicroAllocator {
// Commits a memory plan for all non-persistent buffer allocations in the
// 'head' section of the memory arena.
virtual TfLiteStatus CommitStaticMemoryPlan(TfLiteContext* context,
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
TfLiteContext* context,
const SubGraph* subgraph);
// A simple memory allocator that always allocate from the arena tail or head.

View File

@ -253,10 +253,10 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
// bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors.
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
// t1 can't reuse any memory, as n0 requires both t0 and t1.
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
// t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires
// both t1 and t2.
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
// t3 reuses the same memory from t0 as t0 is not an input to any node.
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
@ -308,4 +308,218 @@ TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) {
/*count=*/3);
}
TF_LITE_MICRO_TEST(OfflinePlannerBranchesAllOnline) {
int version = 1;
int subgraph = 0;
constexpr int nbr_tensors = 4;
tflite::testing::MockOpResolver mock_resolver;
tflite::NodeAndRegistration* node_and_registration;
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
nbr_tensors] = {version, subgraph,
nbr_tensors, // header
// memory offsets:
-1, -1, -1, -1};
// The structure is identical to the one in
// TestAllocationForModelsWithBranches
int num_conns = 3;
tflite::testing::NodeConnection node_list[3] = {{
{0}, // input
{1} // output
},
{
{0}, // input
{2} // output
},
{
{1, 2}, // input1, input2
{3} // output
}};
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
nbr_tensors, metadata_buffer, node_list, num_conns);
TfLiteContext context;
constexpr size_t arena_size = 4096;
uint8_t arena[arena_size];
tflite::MicroAllocator* allocator =
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
&node_and_registration));
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
allocator->FinishModelAllocation(model, &context));
// Since all of the tensors are online planned and the model structure is
// identical to that in TestAllocationForModelsWithBranches,
// the offsets be should identical to that test.
uint8_t* start = context.tensors[0].data.uint8;
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
}
TF_LITE_MICRO_TEST(OfflinePlannerBasic) {
constexpr int nbr_tensors = 4;
tflite::testing::MockOpResolver mock_resolver;
tflite::NodeAndRegistration* node_and_registration;
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
nbr_tensors] = {1, 0, nbr_tensors,
0, // t0
48, // t1
0, // t2
48}; // t3
int t0 = 0;
int t1 = 1;
int t2 = 2;
int t3 = 3;
int num_conns = 3;
tflite::testing::NodeConnection node_list[3] = {{
{t0}, // input
{t1} // output
},
{
{t1}, // input
{t2} // output
},
{
{t2}, // input
{t3} // output
}};
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
nbr_tensors, metadata_buffer, node_list, num_conns);
TfLiteContext context;
constexpr size_t arena_size = 4096;
uint8_t arena[arena_size];
tflite::MicroAllocator* allocator =
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
&node_and_registration));
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
allocator->FinishModelAllocation(model, &context));
uint8_t* start = context.tensors[0].data.uint8;
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[2].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[3].data.uint8 - start);
}
TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) {
constexpr int nbr_tensors = 4;
tflite::testing::MockOpResolver mock_resolver;
tflite::NodeAndRegistration* node_and_registration;
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
nbr_tensors] = {
1, 0, nbr_tensors, // header: version, subgraph, nbr tensors
// memory offsets:
0, // t0
0, // t1
48, // t2
-1}; // t3
int t0 = 0;
int t1 = 1;
int t2 = 2;
int t3 = 3;
int num_conns = 2;
tflite::testing::NodeConnection node_list[2] = {
{
{t0, t1}, // input, scratch
{t2} // output
},
{
{t2}, // input
{t3} // output
},
};
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
nbr_tensors, metadata_buffer, node_list, num_conns);
TfLiteContext context;
constexpr size_t arena_size = 4096;
uint8_t arena[arena_size];
tflite::MicroAllocator* allocator =
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
&node_and_registration));
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
allocator->FinishModelAllocation(model, &context));
uint8_t* start = context.tensors[0].data.uint8;
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[1].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
}
TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) {
constexpr int nbr_tensors = 5;
tflite::testing::MockOpResolver mock_resolver;
tflite::NodeAndRegistration* node_and_registration;
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
nbr_tensors] = {
1, 0, nbr_tensors, // header: version, subgraph, nbr tensors
// memory offsets:
0, // t0
48, // t1
-1, // t2
0, // t3
-1}; // t4
int t0 = 0;
int t1 = 1;
int t2 = 2;
int t3 = 3;
int t4 = 4;
int num_conns = 2;
tflite::testing::NodeConnection node_list[2] = {
{
{t0, t1}, // input, scratch
{t2}, // output
},
{
{t2}, // input
{t3, t4}, // output1, output2
},
};
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
nbr_tensors, metadata_buffer, node_list, num_conns);
TfLiteContext context;
constexpr size_t arena_size = 4096;
uint8_t arena[arena_size];
tflite::MicroAllocator* allocator =
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
&node_and_registration));
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
allocator->FinishModelAllocation(model, &context));
uint8_t* start = context.tensors[0].data.uint8;
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[4].data.uint8 - start);
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
}
TF_LITE_MICRO_TESTS_END

View File

@ -27,6 +27,7 @@ limitations under the License.
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/memory_helpers.h"
#include "tensorflow/lite/micro/micro_allocator.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/schema/schema_generated.h"
@ -111,6 +112,33 @@ const char* AllocTypeName(TfLiteAllocationType type) {
}
} // namespace
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter) {
auto* subgraphs = model->subgraphs();
const SubGraph* subgraph = (*subgraphs)[0];
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
subgraph->tensors();
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
model->buffers();
TF_LITE_REPORT_ERROR(error_reporter, "==== Model info: =====");
for (size_t i = 0; i < tensors->size(); ++i) {
const tflite::Tensor& flatbuffer_tensor = *tensors->Get(i);
size_t type_size, tensor_size;
auto* buffer = (*buffers)[flatbuffer_tensor.buffer()];
auto* array = buffer->data();
int array_size = 0;
if (array) {
array_size = array->size();
}
BytesRequiredForTensor(flatbuffer_tensor, &tensor_size, &type_size,
error_reporter);
TF_LITE_REPORT_ERROR(
error_reporter, "Tensor index: %d arena tensor %d size %d ", i,
!array_size && !flatbuffer_tensor.is_variable(), tensor_size);
}
}
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter) {
printf("Interpreter has %zu tensors and %zu nodes\n",

View File

@ -20,6 +20,9 @@ limitations under the License.
#include "tensorflow/lite/micro/micro_interpreter.h"
namespace tflite {
// Helper function to print model flatbuffer data. This function is not called
// by default. Hence it's not linked in to the final binary code.
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
// Prints a dump of what tensors and what nodes are in the interpreter.
void PrintInterpreterState(MicroInterpreter* interpreter);
} // namespace tflite

View File

@ -55,7 +55,7 @@ class StackAllocator : public flatbuffers::Allocator {
return *inst;
}
static constexpr size_t kStackAllocatorSize = 4096;
static constexpr size_t kStackAllocatorSize = 8192;
private:
uint8_t data_backing_[kStackAllocatorSize];
@ -101,6 +101,9 @@ class ModelBuilder {
Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
std::initializer_list<Tensor> outputs);
void AddMetadata(const char* description_string,
const int32_t* metadata_buffer_data, size_t num_elements);
// Constructs the flatbuffer model using `builder_` and return a pointer to
// it. The returned model has the same lifetime as `builder_`.
const Model* BuildModel(std::initializer_list<Tensor> inputs,
@ -123,6 +126,16 @@ class ModelBuilder {
static constexpr int kMaxTensors = 50;
flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
static constexpr int kMaxMetadataBuffers = 10;
static constexpr int kMaxMetadatas = 10;
flatbuffers::Offset<Metadata> metadata_[kMaxMetadatas];
flatbuffers::Offset<Buffer> metadata_buffers_[kMaxMetadataBuffers];
int nbr_of_metadata_buffers_ = 0;
int next_tensor_id_ = 0;
};
@ -149,13 +162,33 @@ ModelBuilder::Node ModelBuilder::AddNode(
return next_operator_id_ - 1;
}
void ModelBuilder::AddMetadata(const char* description_string,
const int32_t* metadata_buffer_data,
size_t num_elements) {
metadata_[ModelBuilder::nbr_of_metadata_buffers_] =
CreateMetadata(*builder_, builder_->CreateString(description_string),
1 + ModelBuilder::nbr_of_metadata_buffers_);
metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer(
*builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data,
sizeof(uint32_t) * num_elements));
ModelBuilder::nbr_of_metadata_buffers_++;
}
const Model* ModelBuilder::BuildModel(
std::initializer_list<ModelBuilder::Tensor> inputs,
std::initializer_list<ModelBuilder::Tensor> outputs) {
// Model schema requires an empty buffer at idx 0.
constexpr size_t kBufferSize = 1;
const flatbuffers::Offset<Buffer> buffers[kBufferSize] = {
tflite::CreateBuffer(*builder_)};
size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_;
flatbuffers::Offset<Buffer> buffers[kMaxMetadataBuffers];
buffers[0] = tflite::CreateBuffer(*builder_);
// Place the metadata buffers first in the buffer since the indices for them
// have already been set in AddMetadata()
for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) {
buffers[i] = metadata_buffers_[i - 1];
}
// TFLM only supports single subgraph.
constexpr size_t subgraphs_size = 1;
@ -166,12 +199,26 @@ const Model* ModelBuilder::BuildModel(
builder_->CreateVector(outputs.begin(), outputs.size()),
builder_->CreateVector(operators_, next_operator_id_),
builder_->CreateString("test_subgraph"))};
const flatbuffers::Offset<Model> model_offset = tflite::CreateModel(
flatbuffers::Offset<Model> model_offset;
if (ModelBuilder::nbr_of_metadata_buffers_ > 0) {
model_offset = tflite::CreateModel(
*builder_, 0,
builder_->CreateVector(operator_codes_, next_operator_code_id_),
builder_->CreateVector(subgraphs, subgraphs_size),
builder_->CreateString("teset_model"),
builder_->CreateVector(buffers, kBufferSize));
builder_->CreateVector(buffers, buffer_size), 0,
builder_->CreateVector(metadata_,
ModelBuilder::nbr_of_metadata_buffers_));
} else {
model_offset = tflite::CreateModel(
*builder_, 0,
builder_->CreateVector(operator_codes_, next_operator_code_id_),
builder_->CreateVector(subgraphs, subgraphs_size),
builder_->CreateString("teset_model"),
builder_->CreateVector(buffers, buffer_size));
}
tflite::FinishModelBuffer(*builder_, model_offset);
void* model_pointer = builder_->GetBufferPointer();
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
@ -250,6 +297,35 @@ const Model* BuildSimpleModelWithBranch() {
return model_builder.BuildModel({t0}, {t3});
}
const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns) {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
ModelBuilder model_builder(fb_builder);
const int op_id =
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom",
/* version= */ 0);
for (int i = 0; i < number_of_tensors; ++i) {
model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
}
for (int i = 0; i < num_conns; ++i) {
model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output);
}
model_builder.AddMetadata(
"OfflineMemoryAllocation", metadata_buffer,
number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize);
return model_builder.BuildModel(node_conn[0].input,
node_conn[num_conns - 1].output);
}
const Model* BuildSimpleMockModel() {
using flatbuffers::Offset;
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@ -638,6 +714,15 @@ const Model* GetSimpleModelWithBranch() {
return model;
}
const Model* GetModelWithOfflinePlanning(int num_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns) {
const Model* model = BuildModelWithOfflinePlanning(
num_tensors, metadata_buffer, node_conn, num_conns);
return model;
}
const Model* GetSimpleStatefulModel() {
static Model* model = nullptr;
if (!model) {

View File

@ -30,6 +30,14 @@ limitations under the License.
namespace tflite {
namespace testing {
constexpr int kOfflinePlannerHeaderSize = 3;
struct NodeConnection_ {
std::initializer_list<int32_t> input;
std::initializer_list<int32_t> output;
};
typedef struct NodeConnection_ NodeConnection;
// A simple operator that returns the median of the input with the number of
// times the kernel was invoked. The implementation below is deliberately
// complicated, just to demonstrate how kernel memory planning works.
@ -82,6 +90,12 @@ const Model* GetComplexMockModel();
// Returns a simple flatbuffer model with two branches.
const Model* GetSimpleModelWithBranch();
// Returns a simple flatbuffer model with offline planned tensors
const Model* GetModelWithOfflinePlanning(int num_tensors,
const int32_t* metadata_buffer,
NodeConnection* node_conn,
int num_conns);
// Returns a flatbuffer model with `simple_stateful_op`
const Model* GetSimpleStatefulModel();