Merge pull request #38121 from jenselofsson:offline_memory_planner
PiperOrigin-RevId: 316533499 Change-Id: Id967e853081829f4c974cf7527a628724ed0edc2
This commit is contained in:
commit
4381963d2d
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||||
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
||||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||||
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
|
||||||
|
#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
|
||||||
#include "tensorflow/lite/micro/testing/micro_test.h"
|
#include "tensorflow/lite/micro/testing/micro_test.h"
|
||||||
#include "tensorflow/lite/schema/schema_generated.h"
|
#include "tensorflow/lite/schema/schema_generated.h"
|
||||||
#include "tensorflow/lite/version.h"
|
#include "tensorflow/lite/version.h"
|
||||||
|
@ -46,6 +47,7 @@ TF_LITE_MICRO_TEST(TestInvoke) {
|
||||||
"to supported version %d.\n",
|
"to supported version %d.\n",
|
||||||
model->version(), TFLITE_SCHEMA_VERSION);
|
model->version(), TFLITE_SCHEMA_VERSION);
|
||||||
}
|
}
|
||||||
|
PrintModelData(model, error_reporter);
|
||||||
|
|
||||||
// Pull in only the operation implementations we need.
|
// Pull in only the operation implementations we need.
|
||||||
// This relies on a complete list of all the ops needed by this graph.
|
// This relies on a complete list of all the ops needed by this graph.
|
||||||
|
|
|
@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
|
||||||
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
|
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
|
||||||
next_free += sizeof(BufferRequirements) * max_buffer_count_;
|
next_free += sizeof(BufferRequirements) * max_buffer_count_;
|
||||||
|
|
||||||
buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
|
buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
|
||||||
next_free += sizeof(int) * max_buffer_count_;
|
next_free += sizeof(int) * max_buffer_count_;
|
||||||
|
|
||||||
buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
|
buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
|
||||||
next_free += sizeof(int) * max_buffer_count_;
|
next_free += sizeof(int) * max_buffer_count_;
|
||||||
|
|
||||||
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
|
buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
|
||||||
|
@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||||
current->size = size;
|
current->size = size;
|
||||||
current->first_time_used = first_time_used;
|
current->first_time_used = first_time_used;
|
||||||
current->last_time_used = last_time_used;
|
current->last_time_used = last_time_used;
|
||||||
|
current->offline_offset = kOnlinePlannedBuffer;
|
||||||
++buffer_count_;
|
++buffer_count_;
|
||||||
need_to_calculate_offsets_ = true;
|
need_to_calculate_offsets_ = true;
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TfLiteStatus GreedyMemoryPlanner::AddBuffer(
|
||||||
|
tflite::ErrorReporter* error_reporter, int size, int first_time_used,
|
||||||
|
int last_time_used, int offline_offset) {
|
||||||
|
BufferRequirements* current = &requirements_[buffer_count_];
|
||||||
|
if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
|
||||||
|
kTfLiteOk) {
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
current->offline_offset = offline_offset;
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
|
bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
|
||||||
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
|
const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
|
||||||
const int last_time_used) const {
|
const int last_time_used) const {
|
||||||
|
@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
|
||||||
ListEntry* result = nullptr;
|
ListEntry* result = nullptr;
|
||||||
ListEntry* candidate_next_entry;
|
ListEntry* candidate_next_entry;
|
||||||
if (start == nullptr) {
|
if (start == nullptr) {
|
||||||
candidate_next_entry = &buffers_sorted_by_offset_[0];
|
candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||||
} else {
|
} else {
|
||||||
if (start->next_entry_index == -1) {
|
if (start->next_entry_index == -1) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||||
// This helps find a more compact layout. Intuitively, you can think
|
// This helps find a more compact layout. Intuitively, you can think
|
||||||
// about putting the large buffers in place first, and then the
|
// about putting the large buffers in place first, and then the
|
||||||
// smaller buffers can fit in the gaps, rather than fragmenting the
|
// smaller buffers can fit in the gaps, rather than fragmenting the
|
||||||
// gaps with small buffers at the beginning.
|
// gaps with small buffers at the beginning. Add offline planned offsets
|
||||||
|
// first in the list, since they have a predetermined offset.
|
||||||
|
int idx_from_tail = buffer_count_;
|
||||||
|
int idx_from_head = 0;
|
||||||
for (int i = 0; i < buffer_count_; ++i) {
|
for (int i = 0; i < buffer_count_; ++i) {
|
||||||
buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
|
if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
|
||||||
buffer_ids_sorted_by_size_[i] = i;
|
idx_from_tail--;
|
||||||
buffer_offsets_[i] = -1;
|
buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
|
||||||
|
buffer_ids_sorted_[idx_from_tail] = i;
|
||||||
|
buffer_offsets_[i] = -1;
|
||||||
|
} else {
|
||||||
|
buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
|
||||||
|
buffer_ids_sorted_[idx_from_head] = i;
|
||||||
|
buffer_offsets_[i] = requirements_[i].offline_offset;
|
||||||
|
idx_from_head++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// This sorting algorithm is naive, and may end up taking a very long time
|
|
||||||
// with hundreds of buffers.
|
|
||||||
ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
|
|
||||||
buffer_count_);
|
|
||||||
|
|
||||||
// Put the largest buffer at offset zero to start the process.
|
// This sorting algorithm is naive, and may end up taking a very long time
|
||||||
ListEntry* first_entry = &buffers_sorted_by_offset_[0];
|
// with hundreds of buffers. Do not sort the offline planned offsets.
|
||||||
first_entry->offset = 0;
|
ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
|
||||||
first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
|
&buffer_ids_sorted_[idx_from_head],
|
||||||
first_entry->next_entry_index = -1;
|
buffer_count_ - idx_from_head);
|
||||||
|
|
||||||
|
// Initialize the first entry to the first buffer in
|
||||||
|
// buffer_ids_sorted_.
|
||||||
|
// - If there are no offline planned offsets, the largest buffer will be
|
||||||
|
// first, and the buffers will be handled in size order.
|
||||||
|
// - If offline offsets are present, these will be handled first in order
|
||||||
|
// for the greedy algorithm to utilized gaps in the offline plan.
|
||||||
|
first_entry_index_ = 0;
|
||||||
next_free_entry_ = 1;
|
next_free_entry_ = 1;
|
||||||
buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
|
ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||||
|
first_entry->next_entry_index = -1; // to mark the entry as end of list
|
||||||
|
int buffer_id = buffer_ids_sorted_[0];
|
||||||
|
first_entry->requirements_index = buffer_id;
|
||||||
|
if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
|
||||||
|
buffer_offsets_[buffer_id] = 0;
|
||||||
|
}
|
||||||
|
first_entry->offset = buffer_offsets_[buffer_id];
|
||||||
|
|
||||||
// Work through the rest of the buffers to find a good gap to place each one.
|
// Work through the rest of the buffers to find a good gap to place each one.
|
||||||
for (int i = 1; i < buffer_count_; ++i) {
|
for (int i = 1; i < buffer_count_; ++i) {
|
||||||
// The id is the order the buffer was originally added by the client.
|
// The id is the order the buffer was originally added by the client.
|
||||||
const int buffer_id = buffer_ids_sorted_by_size_[i];
|
buffer_id = buffer_ids_sorted_[i];
|
||||||
// Look at what size and time range the buffer needs to be active.
|
// Look at what size and time range the buffer needs to be active.
|
||||||
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
|
BufferRequirements* wanted_requirements = &requirements_[buffer_id];
|
||||||
const int wanted_size = wanted_requirements->size;
|
const int wanted_size = wanted_requirements->size;
|
||||||
|
@ -168,37 +203,43 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||||
// so that it's easy to find the next buffer in memory, and so the gap.
|
// so that it's easy to find the next buffer in memory, and so the gap.
|
||||||
// The candidate_entry variable holds the buffer that we're considering
|
// The candidate_entry variable holds the buffer that we're considering
|
||||||
// placing the current buffer after.
|
// placing the current buffer after.
|
||||||
ListEntry* prior_entry = nullptr;
|
|
||||||
int candidate_offset = 0;
|
int candidate_offset = 0;
|
||||||
// Loop through the offset-ordered list of buffers, looking for gaps.
|
// Loop through the offset-ordered list of buffers, looking for gaps.
|
||||||
while (true) {
|
if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
|
||||||
// Find out what the next active buffer is.
|
ListEntry* prior_entry = nullptr;
|
||||||
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
while (true) {
|
||||||
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
// Find out what the next active buffer is.
|
||||||
|
ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
|
||||||
|
prior_entry, wanted_first_time_used, wanted_last_time_used);
|
||||||
|
|
||||||
if (prior_entry) {
|
if (prior_entry) {
|
||||||
BufferRequirements* candidate_requirements =
|
BufferRequirements* candidate_requirements =
|
||||||
&requirements_[prior_entry->requirements_index];
|
&requirements_[prior_entry->requirements_index];
|
||||||
const int prior_entry_offset =
|
const int prior_entry_offset =
|
||||||
prior_entry->offset + candidate_requirements->size;
|
prior_entry->offset + candidate_requirements->size;
|
||||||
if (prior_entry_offset > candidate_offset) {
|
if (prior_entry_offset > candidate_offset) {
|
||||||
candidate_offset = prior_entry_offset;
|
candidate_offset = prior_entry_offset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (next_entry == nullptr) {
|
||||||
|
// We're at the end of the list, so we can always append the buffer
|
||||||
|
// here.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Find out how much space there is between us and the next buffer.
|
||||||
|
const int gap = next_entry->offset - candidate_offset;
|
||||||
|
if (gap >= wanted_size) {
|
||||||
|
// This entry has a big enough gap between it and the next, so
|
||||||
|
// use it!
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// The gap wasn't big enough, so move on to another candidate.
|
||||||
|
prior_entry = next_entry;
|
||||||
}
|
}
|
||||||
if (next_entry == nullptr) {
|
} else {
|
||||||
// We're at the end of the list, so we can always append the buffer
|
// Offline planned offset are to be considered constant
|
||||||
// here.
|
candidate_offset = wanted_requirements->offline_offset;
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Find out how much space there is between us and the next buffer.
|
|
||||||
const int gap = next_entry->offset - candidate_offset;
|
|
||||||
if (gap >= wanted_size) {
|
|
||||||
// This entry has a big enough gap between it and the next, so
|
|
||||||
// use it!
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// The gap wasn't big enough, so move on to another candidate.
|
|
||||||
prior_entry = next_entry;
|
|
||||||
}
|
}
|
||||||
// At this point, we've either found a gap (possibly at the end of the
|
// At this point, we've either found a gap (possibly at the end of the
|
||||||
// list) and want to place the buffer there, or there are no other active
|
// list) and want to place the buffer there, or there are no other active
|
||||||
|
@ -212,26 +253,36 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
|
||||||
new_entry->requirements_index = buffer_id;
|
new_entry->requirements_index = buffer_id;
|
||||||
const int new_entry_index = next_free_entry_;
|
const int new_entry_index = next_free_entry_;
|
||||||
++next_free_entry_;
|
++next_free_entry_;
|
||||||
ListEntry* current_entry = first_entry;
|
|
||||||
// Make sure that we insert the buffer at the correct place in the ordered
|
if (first_entry->offset > candidate_offset) {
|
||||||
// list.
|
// The new entry offset is smaller than the first entry offset =>
|
||||||
while (true) {
|
// replace the first entry
|
||||||
const int next_entry_index = current_entry->next_entry_index;
|
first_entry = new_entry;
|
||||||
if (next_entry_index == -1) {
|
first_entry->next_entry_index = first_entry_index_;
|
||||||
// We're at the end of the list, so just add the new entry here.
|
first_entry_index_ = new_entry_index;
|
||||||
current_entry->next_entry_index = new_entry_index;
|
} else {
|
||||||
new_entry->next_entry_index = -1;
|
ListEntry* current_entry = first_entry;
|
||||||
break;
|
// Make sure that we insert the buffer at the correct place in the
|
||||||
|
// buffer-offset-ordered list
|
||||||
|
while (true) {
|
||||||
|
const int next_entry_index = current_entry->next_entry_index;
|
||||||
|
if (next_entry_index == -1) {
|
||||||
|
// We're at the end of the list, so just add the new entry here.
|
||||||
|
current_entry->next_entry_index = new_entry_index;
|
||||||
|
new_entry->next_entry_index = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// not at the end of the list -> take a look at next entry
|
||||||
|
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
||||||
|
if (next_entry->offset > candidate_offset) {
|
||||||
|
// We're at the right spot to do an insertion and retain the sorting
|
||||||
|
// order, so place the new entry here.
|
||||||
|
new_entry->next_entry_index = current_entry->next_entry_index;
|
||||||
|
current_entry->next_entry_index = new_entry_index;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
current_entry = next_entry;
|
||||||
}
|
}
|
||||||
ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
|
|
||||||
if (next_entry->offset > candidate_offset) {
|
|
||||||
// We're at the right spot to do an insertion and retain the sorting
|
|
||||||
// order, so place the new entry here.
|
|
||||||
new_entry->next_entry_index = current_entry->next_entry_index;
|
|
||||||
current_entry->next_entry_index = new_entry_index;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
current_entry = next_entry;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -241,7 +292,7 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
|
||||||
if (buffer_count_ == 0) {
|
if (buffer_count_ == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
ListEntry* entry = &buffers_sorted_by_offset_[0];
|
ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
|
||||||
size_t max_size = 0;
|
size_t max_size = 0;
|
||||||
while (entry) {
|
while (entry) {
|
||||||
BufferRequirements* requirements =
|
BufferRequirements* requirements =
|
||||||
|
|
|
@ -21,6 +21,8 @@ limitations under the License.
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
|
||||||
|
constexpr int kOnlinePlannedBuffer = -1;
|
||||||
|
|
||||||
// A memory planner that uses a greedy algorithm to arrange buffers in memory
|
// A memory planner that uses a greedy algorithm to arrange buffers in memory
|
||||||
// to minimize the overall arena size needed.
|
// to minimize the overall arena size needed.
|
||||||
//
|
//
|
||||||
|
@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||||
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||||
int first_time_used, int last_time_used) override;
|
int first_time_used, int last_time_used) override;
|
||||||
|
|
||||||
|
// Record details of an offline planned buffer offset we want to place.
|
||||||
|
// offline_offset is the buffer offset from the start of the arena.
|
||||||
|
TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
|
||||||
|
int first_time_used, int last_time_used,
|
||||||
|
int offline_offset);
|
||||||
|
|
||||||
// Returns the high-water mark of used memory. This is the minimum size of a
|
// Returns the high-water mark of used memory. This is the minimum size of a
|
||||||
// memory arena you'd need to allocate to hold these buffers.
|
// memory arena you'd need to allocate to hold these buffers.
|
||||||
size_t GetMaximumMemorySize() override;
|
size_t GetMaximumMemorySize() override;
|
||||||
|
@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||||
static size_t per_buffer_size() {
|
static size_t per_buffer_size() {
|
||||||
const int per_buffer_size =
|
const int per_buffer_size =
|
||||||
sizeof(BufferRequirements) + // requirements_
|
sizeof(BufferRequirements) + // requirements_
|
||||||
sizeof(int) + // buffer_sizes_sorted_by_size_
|
sizeof(int) + // buffer_sizes_sorted_
|
||||||
sizeof(int) + // buffer_ids_sorted_by_size_
|
sizeof(int) + // buffer_ids_sorted_
|
||||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||||
sizeof(int); // buffer_offsets_;
|
sizeof(int); // buffer_offsets_;
|
||||||
return per_buffer_size;
|
return per_buffer_size;
|
||||||
|
@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
||||||
// Records the client-provided information about each buffer.
|
// Records the client-provided information about each buffer.
|
||||||
struct BufferRequirements {
|
struct BufferRequirements {
|
||||||
int size;
|
int size;
|
||||||
|
int offline_offset;
|
||||||
int first_time_used;
|
int first_time_used;
|
||||||
int last_time_used;
|
int last_time_used;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Working arrays used during the layout algorithm.
|
// Working arrays used during the layout algorithm.
|
||||||
BufferRequirements* requirements_;
|
BufferRequirements* requirements_;
|
||||||
int* buffer_sizes_sorted_by_size_;
|
// buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
|
||||||
int* buffer_ids_sorted_by_size_;
|
// {
|
||||||
|
// offline planned buffers,
|
||||||
|
// online planned buffers sorted by size
|
||||||
|
// }
|
||||||
|
int* buffer_sizes_sorted_;
|
||||||
|
int* buffer_ids_sorted_;
|
||||||
ListEntry* buffers_sorted_by_offset_;
|
ListEntry* buffers_sorted_by_offset_;
|
||||||
int next_free_entry_;
|
int next_free_entry_; // Index of the next free entry of
|
||||||
|
// buffers_sorted_by_offset_
|
||||||
|
int first_entry_index_; // Index of the first entry (smallest offset) of
|
||||||
|
// buffers_sorted_by_offset_
|
||||||
|
|
||||||
// Stores the outcome of the plan, the location of each buffer in the arena.
|
// Stores the outcome of the plan, the location of each buffer in the arena.
|
||||||
int* buffer_offsets_;
|
int* buffer_offsets_;
|
||||||
|
|
|
@ -39,16 +39,19 @@ namespace {
|
||||||
// Used to hold information used during allocation calculations.
|
// Used to hold information used during allocation calculations.
|
||||||
struct AllocationInfo {
|
struct AllocationInfo {
|
||||||
size_t bytes;
|
size_t bytes;
|
||||||
|
void** output_ptr;
|
||||||
int first_created;
|
int first_created;
|
||||||
int last_used;
|
int last_used;
|
||||||
|
int32_t offline_offset;
|
||||||
bool needs_allocating;
|
bool needs_allocating;
|
||||||
void** output_ptr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// We align tensor buffers to 16-byte boundaries, since this is a common
|
// We align tensor buffers to 16-byte boundaries, since this is a common
|
||||||
// requirement for SIMD extensions.
|
// requirement for SIMD extensions.
|
||||||
constexpr int kBufferAlignment = 16;
|
constexpr int kBufferAlignment = 16;
|
||||||
|
|
||||||
|
constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
|
||||||
|
|
||||||
// Instance of a zero-length int to pass as tensor dims for a flatbuffer
|
// Instance of a zero-length int to pass as tensor dims for a flatbuffer
|
||||||
// Tensor with no shape. Note that the second member of a TfLiteArray is a
|
// Tensor with no shape. Note that the second member of a TfLiteArray is a
|
||||||
// flexible array member, which is not strictly valid C++. However it is
|
// flexible array member, which is not strictly valid C++. However it is
|
||||||
|
@ -77,6 +80,71 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
|
||||||
TF_LITE_REMOVE_VIRTUAL_DELETE
|
TF_LITE_REMOVE_VIRTUAL_DELETE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if !defined(__clang__)
|
||||||
|
// Helper function to check flatbuffer metadata correctness. This function is
|
||||||
|
// not called by default. Hence it's not linked in to the final binary code.
|
||||||
|
TfLiteStatus CheckOfflinePlannedOffsets(const Model* model,
|
||||||
|
ErrorReporter* error_reporter) {
|
||||||
|
// Suppress compile warning for unused function
|
||||||
|
(void)CheckOfflinePlannedOffsets;
|
||||||
|
|
||||||
|
if (model->metadata()) {
|
||||||
|
for (size_t i = 0; i < model->metadata()->size(); ++i) {
|
||||||
|
auto metadata = model->metadata()->Get(i);
|
||||||
|
if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
|
||||||
|
strlen(kOfflineMemAllocMetadata)) == 0) {
|
||||||
|
auto* subgraphs = model->subgraphs();
|
||||||
|
const SubGraph* subgraph = (*subgraphs)[0];
|
||||||
|
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
|
||||||
|
subgraph->tensors();
|
||||||
|
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
|
||||||
|
model->buffers();
|
||||||
|
int nbr_tflite_tensors = tensors->size();
|
||||||
|
auto* buffer = (*buffers)[metadata->buffer()];
|
||||||
|
auto* array = buffer->data();
|
||||||
|
const uint32_t* metadata_buffer = (uint32_t*)array->data();
|
||||||
|
int version = metadata_buffer[0];
|
||||||
|
int subgraph_idx = metadata_buffer[1];
|
||||||
|
const int nbr_offline_offsets = metadata_buffer[2];
|
||||||
|
int* offline_planner_offsets = (int*)&metadata_buffer[3];
|
||||||
|
|
||||||
|
TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: =====");
|
||||||
|
TF_LITE_REPORT_ERROR(error_reporter,
|
||||||
|
"Offline planner metadata found, version %d, "
|
||||||
|
"subgraph %d, nbr offline offsets %d",
|
||||||
|
version, subgraph_idx, nbr_offline_offsets);
|
||||||
|
for (int j = 0; j < nbr_offline_offsets; ++j) {
|
||||||
|
TF_LITE_REPORT_ERROR(
|
||||||
|
error_reporter,
|
||||||
|
"Offline planner tensor index %d, offline offset: %d", j,
|
||||||
|
offline_planner_offsets[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (version != 1) {
|
||||||
|
TF_LITE_REPORT_ERROR(error_reporter, "Version not supported! (%d)\n",
|
||||||
|
version);
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
if (subgraph_idx != 0) {
|
||||||
|
TF_LITE_REPORT_ERROR(error_reporter,
|
||||||
|
"Only 1 subgraph supported! Subgraph idx (%d)\n",
|
||||||
|
subgraph_idx);
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
if (nbr_tflite_tensors != nbr_offline_offsets) {
|
||||||
|
TF_LITE_REPORT_ERROR(error_reporter,
|
||||||
|
"Nbr of offline buffer offsets (%d) in metadata "
|
||||||
|
"not equal nbr tensors (%d)\n",
|
||||||
|
nbr_offline_offsets, nbr_tflite_tensors);
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// A helper class to construct AllocationInfo array. This array contains the
|
// A helper class to construct AllocationInfo array. This array contains the
|
||||||
// lifetime of tensors / scratch_buffer and will be used to calculate the memory
|
// lifetime of tensors / scratch_buffer and will be used to calculate the memory
|
||||||
// plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
|
// plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
|
||||||
|
@ -94,9 +162,17 @@ class AllocationInfoBuilder {
|
||||||
return Allocate();
|
return Allocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if model contains offline planned buffer offsets.
|
||||||
|
// - If there's no metadata available, offline_planner_offsets is not set
|
||||||
|
// - If there's metadata available, offline_planner_offsets will point to the
|
||||||
|
// first offset in the metadata buffer list.
|
||||||
|
TfLiteStatus GetOfflinePlannedOffsets(const Model* model,
|
||||||
|
int32_t** offline_planner_offsets);
|
||||||
|
|
||||||
// Add allocaiton information for the tensors.
|
// Add allocaiton information for the tensors.
|
||||||
TfLiteStatus AddTensors(const SubGraph* subgraph,
|
TfLiteStatus AddTensors(const SubGraph* subgraph, int32_t* offline_offsets,
|
||||||
TfLiteTensor* runtime_tensors);
|
TfLiteTensor* runtime_tensors);
|
||||||
|
|
||||||
// Add allocation information for the scratch buffers.
|
// Add allocation information for the scratch buffers.
|
||||||
TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles);
|
TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles);
|
||||||
|
|
||||||
|
@ -130,6 +206,7 @@ TfLiteStatus AllocationInfoBuilder::Allocate() {
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
|
TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
|
||||||
|
int32_t* offline_offsets,
|
||||||
TfLiteTensor* runtime_tensors) {
|
TfLiteTensor* runtime_tensors) {
|
||||||
// Set up allocation info for all tensors.
|
// Set up allocation info for all tensors.
|
||||||
for (size_t i = 0; i < tensor_count_; ++i) {
|
for (size_t i = 0; i < tensor_count_; ++i) {
|
||||||
|
@ -141,6 +218,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
|
||||||
current->last_used = -1;
|
current->last_used = -1;
|
||||||
current->needs_allocating = (runtime_tensors[i].data.data == nullptr) &&
|
current->needs_allocating = (runtime_tensors[i].data.data == nullptr) &&
|
||||||
(!subgraph->tensors()->Get(i)->is_variable());
|
(!subgraph->tensors()->Get(i)->is_variable());
|
||||||
|
if (offline_offsets) {
|
||||||
|
current->offline_offset = offline_offsets[i];
|
||||||
|
} else {
|
||||||
|
current->offline_offset = kOnlinePlannedBuffer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
|
for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
|
||||||
|
@ -198,6 +280,52 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The tensor offsets will be encoded in the metadata:[Metadata] field of the
|
||||||
|
// Model. The following encoding applies:
|
||||||
|
//
|
||||||
|
// | Metadata component | Value |
|
||||||
|
// | name:string | “OfflineMemoryAllocation” |
|
||||||
|
// | buffer:unit | Index of buffer containing memory allocation data |
|
||||||
|
//
|
||||||
|
// The buffer contents for the memory allocation is a list of 32-bit integers.
|
||||||
|
// The number of tensors, n, must be equal to the number of tensors defined in
|
||||||
|
// the model. The following encoding applies:
|
||||||
|
//
|
||||||
|
// | Offset | Value |
|
||||||
|
// | 0 | Offline allocation format version – set to 0 |
|
||||||
|
// | 1 | Subgraph index to which this allocation applies |
|
||||||
|
// | 2 | Number offsets following: n |
|
||||||
|
// | 3 | Arena byte offset of tensor #0 or -1 to allocate at runtime |
|
||||||
|
// | 4 | Arena byte offset of tensor #1 or -1 to allocate at runtime |
|
||||||
|
// | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
|
||||||
|
TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
|
||||||
|
const Model* model, int32_t** offline_planner_offsets) {
|
||||||
|
if (model->metadata()) {
|
||||||
|
for (size_t i = 0; i < model->metadata()->size(); ++i) {
|
||||||
|
auto metadata = model->metadata()->Get(i);
|
||||||
|
if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
|
||||||
|
strlen(kOfflineMemAllocMetadata)) == 0) {
|
||||||
|
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
|
||||||
|
model->buffers();
|
||||||
|
auto* buffer = (*buffers)[metadata->buffer()];
|
||||||
|
auto* array = buffer->data();
|
||||||
|
const uint32_t* metadata_buffer = (uint32_t*)array->data();
|
||||||
|
const size_t nbr_tensors = (size_t)metadata_buffer[2];
|
||||||
|
*offline_planner_offsets = (int32_t*)&metadata_buffer[3];
|
||||||
|
|
||||||
|
if (tensor_count_ != nbr_tensors) {
|
||||||
|
TF_LITE_REPORT_ERROR(reporter_,
|
||||||
|
"Nbr of offline buffer offsets (%d) in metadata "
|
||||||
|
"not equal nbr tensors (%d)\n",
|
||||||
|
nbr_tensors, tensor_count_);
|
||||||
|
return kTfLiteError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
|
TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
|
||||||
internal::ScratchBufferHandle* buffer_handles) {
|
internal::ScratchBufferHandle* buffer_handles) {
|
||||||
// Set up allocation info for buffers.
|
// Set up allocation info for buffers.
|
||||||
|
@ -210,11 +338,13 @@ TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
|
||||||
current->first_created = handle->node_idx;
|
current->first_created = handle->node_idx;
|
||||||
current->last_used = handle->node_idx;
|
current->last_used = handle->node_idx;
|
||||||
current->needs_allocating = true;
|
current->needs_allocating = true;
|
||||||
|
current->offline_offset = kOnlinePlannedBuffer;
|
||||||
}
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
|
TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
|
||||||
|
GreedyMemoryPlanner* planner,
|
||||||
const AllocationInfo* allocation_info,
|
const AllocationInfo* allocation_info,
|
||||||
size_t allocation_info_size) {
|
size_t allocation_info_size) {
|
||||||
// Add the tensors to our allocation plan.
|
// Add the tensors to our allocation plan.
|
||||||
|
@ -223,9 +353,15 @@ TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
|
||||||
if (current->needs_allocating) {
|
if (current->needs_allocating) {
|
||||||
size_t aligned_bytes_required =
|
size_t aligned_bytes_required =
|
||||||
AlignSizeUp(current->bytes, kBufferAlignment);
|
AlignSizeUp(current->bytes, kBufferAlignment);
|
||||||
TF_LITE_ENSURE_STATUS(
|
if (current->offline_offset == kOnlinePlannedBuffer) {
|
||||||
planner->AddBuffer(error_reporter, aligned_bytes_required,
|
TF_LITE_ENSURE_STATUS(
|
||||||
current->first_created, current->last_used));
|
planner->AddBuffer(error_reporter, aligned_bytes_required,
|
||||||
|
current->first_created, current->last_used));
|
||||||
|
} else {
|
||||||
|
TF_LITE_ENSURE_STATUS(planner->AddBuffer(
|
||||||
|
error_reporter, aligned_bytes_required, current->first_created,
|
||||||
|
current->last_used, current->offline_offset));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
|
@ -466,7 +602,6 @@ TfLiteStatus MicroAllocator::StartModelAllocation(
|
||||||
|
|
||||||
const SubGraph* subgraph = GetSubGraphFromModel(model);
|
const SubGraph* subgraph = GetSubGraphFromModel(model);
|
||||||
TFLITE_DCHECK(subgraph != nullptr);
|
TFLITE_DCHECK(subgraph != nullptr);
|
||||||
|
|
||||||
model_is_allocating_ = true;
|
model_is_allocating_ = true;
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(
|
TF_LITE_ENSURE_STATUS(
|
||||||
|
@ -491,7 +626,7 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
|
||||||
const SubGraph* subgraph = GetSubGraphFromModel(model);
|
const SubGraph* subgraph = GetSubGraphFromModel(model);
|
||||||
TFLITE_DCHECK(subgraph != nullptr);
|
TFLITE_DCHECK(subgraph != nullptr);
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(context, subgraph));
|
TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, context, subgraph));
|
||||||
TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph));
|
TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph));
|
||||||
|
|
||||||
model_is_allocating_ = false;
|
model_is_allocating_ = false;
|
||||||
|
@ -739,7 +874,8 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
|
||||||
return (*subgraphs)[0];
|
return (*subgraphs)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
|
TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model,
|
||||||
|
TfLiteContext* context,
|
||||||
const SubGraph* subgraph) {
|
const SubGraph* subgraph) {
|
||||||
// Create static memory plan
|
// Create static memory plan
|
||||||
// 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
|
// 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
|
||||||
|
@ -756,7 +892,13 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
|
||||||
AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
|
AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
|
||||||
TF_LITE_ENSURE_STATUS(
|
TF_LITE_ENSURE_STATUS(
|
||||||
builder.Init(subgraph->tensors()->size(), scratch_buffer_count_));
|
builder.Init(subgraph->tensors()->size(), scratch_buffer_count_));
|
||||||
TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, context->tensors));
|
|
||||||
|
int32_t* offline_planner_offsets = nullptr;
|
||||||
|
TF_LITE_ENSURE_STATUS(
|
||||||
|
builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
|
||||||
|
TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, offline_planner_offsets,
|
||||||
|
context->tensors));
|
||||||
|
|
||||||
TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
|
TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
|
||||||
const AllocationInfo* allocation_info = builder.Finish();
|
const AllocationInfo* allocation_info = builder.Finish();
|
||||||
|
|
||||||
|
|
|
@ -189,7 +189,8 @@ class MicroAllocator {
|
||||||
|
|
||||||
// Commits a memory plan for all non-persistent buffer allocations in the
|
// Commits a memory plan for all non-persistent buffer allocations in the
|
||||||
// 'head' section of the memory arena.
|
// 'head' section of the memory arena.
|
||||||
virtual TfLiteStatus CommitStaticMemoryPlan(TfLiteContext* context,
|
virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
|
||||||
|
TfLiteContext* context,
|
||||||
const SubGraph* subgraph);
|
const SubGraph* subgraph);
|
||||||
|
|
||||||
// A simple memory allocator that always allocate from the arena tail or head.
|
// A simple memory allocator that always allocate from the arena tail or head.
|
||||||
|
|
|
@ -253,10 +253,10 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
|
||||||
// bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors.
|
// bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors.
|
||||||
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
|
||||||
// t1 can't reuse any memory, as n0 requires both t0 and t1.
|
// t1 can't reuse any memory, as n0 requires both t0 and t1.
|
||||||
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
|
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
|
||||||
// t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires
|
// t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires
|
||||||
// both t1 and t2.
|
// both t1 and t2.
|
||||||
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
|
||||||
// t3 reuses the same memory from t0 as t0 is not an input to any node.
|
// t3 reuses the same memory from t0 as t0 is not an input to any node.
|
||||||
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
|
||||||
|
|
||||||
|
@ -308,4 +308,218 @@ TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) {
|
||||||
/*count=*/3);
|
/*count=*/3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TF_LITE_MICRO_TEST(OfflinePlannerBranchesAllOnline) {
|
||||||
|
int version = 1;
|
||||||
|
int subgraph = 0;
|
||||||
|
constexpr int nbr_tensors = 4;
|
||||||
|
tflite::testing::MockOpResolver mock_resolver;
|
||||||
|
tflite::NodeAndRegistration* node_and_registration;
|
||||||
|
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
|
||||||
|
nbr_tensors] = {version, subgraph,
|
||||||
|
nbr_tensors, // header
|
||||||
|
// memory offsets:
|
||||||
|
-1, -1, -1, -1};
|
||||||
|
|
||||||
|
// The structure is identical to the one in
|
||||||
|
// TestAllocationForModelsWithBranches
|
||||||
|
int num_conns = 3;
|
||||||
|
tflite::testing::NodeConnection node_list[3] = {{
|
||||||
|
{0}, // input
|
||||||
|
{1} // output
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{0}, // input
|
||||||
|
{2} // output
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{1, 2}, // input1, input2
|
||||||
|
{3} // output
|
||||||
|
}};
|
||||||
|
|
||||||
|
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
|
||||||
|
nbr_tensors, metadata_buffer, node_list, num_conns);
|
||||||
|
|
||||||
|
TfLiteContext context;
|
||||||
|
constexpr size_t arena_size = 4096;
|
||||||
|
uint8_t arena[arena_size];
|
||||||
|
tflite::MicroAllocator* allocator =
|
||||||
|
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
|
||||||
|
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(
|
||||||
|
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
|
||||||
|
&node_and_registration));
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
|
||||||
|
allocator->FinishModelAllocation(model, &context));
|
||||||
|
|
||||||
|
// Since all of the tensors are online planned and the model structure is
|
||||||
|
// identical to that in TestAllocationForModelsWithBranches,
|
||||||
|
// the offsets be should identical to that test.
|
||||||
|
uint8_t* start = context.tensors[0].data.uint8;
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
|
||||||
|
}
|
||||||
|
|
||||||
|
TF_LITE_MICRO_TEST(OfflinePlannerBasic) {
|
||||||
|
constexpr int nbr_tensors = 4;
|
||||||
|
tflite::testing::MockOpResolver mock_resolver;
|
||||||
|
tflite::NodeAndRegistration* node_and_registration;
|
||||||
|
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
|
||||||
|
nbr_tensors] = {1, 0, nbr_tensors,
|
||||||
|
0, // t0
|
||||||
|
48, // t1
|
||||||
|
0, // t2
|
||||||
|
48}; // t3
|
||||||
|
|
||||||
|
int t0 = 0;
|
||||||
|
int t1 = 1;
|
||||||
|
int t2 = 2;
|
||||||
|
int t3 = 3;
|
||||||
|
|
||||||
|
int num_conns = 3;
|
||||||
|
tflite::testing::NodeConnection node_list[3] = {{
|
||||||
|
{t0}, // input
|
||||||
|
{t1} // output
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{t1}, // input
|
||||||
|
{t2} // output
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{t2}, // input
|
||||||
|
{t3} // output
|
||||||
|
}};
|
||||||
|
|
||||||
|
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
|
||||||
|
nbr_tensors, metadata_buffer, node_list, num_conns);
|
||||||
|
|
||||||
|
TfLiteContext context;
|
||||||
|
constexpr size_t arena_size = 4096;
|
||||||
|
uint8_t arena[arena_size];
|
||||||
|
tflite::MicroAllocator* allocator =
|
||||||
|
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
|
||||||
|
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(
|
||||||
|
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
|
||||||
|
&node_and_registration));
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
|
||||||
|
allocator->FinishModelAllocation(model, &context));
|
||||||
|
|
||||||
|
uint8_t* start = context.tensors[0].data.uint8;
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[2].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[3].data.uint8 - start);
|
||||||
|
}
|
||||||
|
|
||||||
|
TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) {
|
||||||
|
constexpr int nbr_tensors = 4;
|
||||||
|
tflite::testing::MockOpResolver mock_resolver;
|
||||||
|
tflite::NodeAndRegistration* node_and_registration;
|
||||||
|
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
|
||||||
|
nbr_tensors] = {
|
||||||
|
1, 0, nbr_tensors, // header: version, subgraph, nbr tensors
|
||||||
|
// memory offsets:
|
||||||
|
0, // t0
|
||||||
|
0, // t1
|
||||||
|
48, // t2
|
||||||
|
-1}; // t3
|
||||||
|
|
||||||
|
int t0 = 0;
|
||||||
|
int t1 = 1;
|
||||||
|
int t2 = 2;
|
||||||
|
int t3 = 3;
|
||||||
|
|
||||||
|
int num_conns = 2;
|
||||||
|
tflite::testing::NodeConnection node_list[2] = {
|
||||||
|
{
|
||||||
|
{t0, t1}, // input, scratch
|
||||||
|
{t2} // output
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{t2}, // input
|
||||||
|
{t3} // output
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
|
||||||
|
nbr_tensors, metadata_buffer, node_list, num_conns);
|
||||||
|
|
||||||
|
TfLiteContext context;
|
||||||
|
constexpr size_t arena_size = 4096;
|
||||||
|
uint8_t arena[arena_size];
|
||||||
|
tflite::MicroAllocator* allocator =
|
||||||
|
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
|
||||||
|
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(
|
||||||
|
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
|
||||||
|
&node_and_registration));
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
|
||||||
|
allocator->FinishModelAllocation(model, &context));
|
||||||
|
|
||||||
|
uint8_t* start = context.tensors[0].data.uint8;
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[1].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) {
|
||||||
|
constexpr int nbr_tensors = 5;
|
||||||
|
tflite::testing::MockOpResolver mock_resolver;
|
||||||
|
tflite::NodeAndRegistration* node_and_registration;
|
||||||
|
const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
|
||||||
|
nbr_tensors] = {
|
||||||
|
1, 0, nbr_tensors, // header: version, subgraph, nbr tensors
|
||||||
|
// memory offsets:
|
||||||
|
0, // t0
|
||||||
|
48, // t1
|
||||||
|
-1, // t2
|
||||||
|
0, // t3
|
||||||
|
-1}; // t4
|
||||||
|
|
||||||
|
int t0 = 0;
|
||||||
|
int t1 = 1;
|
||||||
|
int t2 = 2;
|
||||||
|
int t3 = 3;
|
||||||
|
int t4 = 4;
|
||||||
|
|
||||||
|
int num_conns = 2;
|
||||||
|
tflite::testing::NodeConnection node_list[2] = {
|
||||||
|
{
|
||||||
|
{t0, t1}, // input, scratch
|
||||||
|
{t2}, // output
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{t2}, // input
|
||||||
|
{t3, t4}, // output1, output2
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
|
||||||
|
nbr_tensors, metadata_buffer, node_list, num_conns);
|
||||||
|
|
||||||
|
TfLiteContext context;
|
||||||
|
constexpr size_t arena_size = 4096;
|
||||||
|
uint8_t arena[arena_size];
|
||||||
|
tflite::MicroAllocator* allocator =
|
||||||
|
tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
|
||||||
|
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(
|
||||||
|
kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
|
||||||
|
&node_and_registration));
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
|
||||||
|
allocator->FinishModelAllocation(model, &context));
|
||||||
|
|
||||||
|
uint8_t* start = context.tensors[0].data.uint8;
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[4].data.uint8 - start);
|
||||||
|
TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
|
||||||
|
}
|
||||||
|
|
||||||
TF_LITE_MICRO_TESTS_END
|
TF_LITE_MICRO_TESTS_END
|
||||||
|
|
|
@ -27,6 +27,7 @@ limitations under the License.
|
||||||
|
|
||||||
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
||||||
#include "tensorflow/lite/c/common.h"
|
#include "tensorflow/lite/c/common.h"
|
||||||
|
#include "tensorflow/lite/micro/memory_helpers.h"
|
||||||
#include "tensorflow/lite/micro/micro_allocator.h"
|
#include "tensorflow/lite/micro/micro_allocator.h"
|
||||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||||
#include "tensorflow/lite/schema/schema_generated.h"
|
#include "tensorflow/lite/schema/schema_generated.h"
|
||||||
|
@ -111,6 +112,33 @@ const char* AllocTypeName(TfLiteAllocationType type) {
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
// Helper function to print model flatbuffer data. This function is not called
|
||||||
|
// by default. Hence it's not linked in to the final binary code.
|
||||||
|
void PrintModelData(const Model* model, ErrorReporter* error_reporter) {
|
||||||
|
auto* subgraphs = model->subgraphs();
|
||||||
|
const SubGraph* subgraph = (*subgraphs)[0];
|
||||||
|
const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
|
||||||
|
subgraph->tensors();
|
||||||
|
const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
|
||||||
|
model->buffers();
|
||||||
|
TF_LITE_REPORT_ERROR(error_reporter, "==== Model info: =====");
|
||||||
|
for (size_t i = 0; i < tensors->size(); ++i) {
|
||||||
|
const tflite::Tensor& flatbuffer_tensor = *tensors->Get(i);
|
||||||
|
size_t type_size, tensor_size;
|
||||||
|
auto* buffer = (*buffers)[flatbuffer_tensor.buffer()];
|
||||||
|
auto* array = buffer->data();
|
||||||
|
int array_size = 0;
|
||||||
|
if (array) {
|
||||||
|
array_size = array->size();
|
||||||
|
}
|
||||||
|
BytesRequiredForTensor(flatbuffer_tensor, &tensor_size, &type_size,
|
||||||
|
error_reporter);
|
||||||
|
TF_LITE_REPORT_ERROR(
|
||||||
|
error_reporter, "Tensor index: %d arena tensor %d size %d ", i,
|
||||||
|
!array_size && !flatbuffer_tensor.is_variable(), tensor_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||||
void PrintInterpreterState(MicroInterpreter* interpreter) {
|
void PrintInterpreterState(MicroInterpreter* interpreter) {
|
||||||
printf("Interpreter has %zu tensors and %zu nodes\n",
|
printf("Interpreter has %zu tensors and %zu nodes\n",
|
||||||
|
|
|
@ -20,6 +20,9 @@ limitations under the License.
|
||||||
#include "tensorflow/lite/micro/micro_interpreter.h"
|
#include "tensorflow/lite/micro/micro_interpreter.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
// Helper function to print model flatbuffer data. This function is not called
|
||||||
|
// by default. Hence it's not linked in to the final binary code.
|
||||||
|
void PrintModelData(const Model* model, ErrorReporter* error_reporter);
|
||||||
// Prints a dump of what tensors and what nodes are in the interpreter.
|
// Prints a dump of what tensors and what nodes are in the interpreter.
|
||||||
void PrintInterpreterState(MicroInterpreter* interpreter);
|
void PrintInterpreterState(MicroInterpreter* interpreter);
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
|
|
@ -55,7 +55,7 @@ class StackAllocator : public flatbuffers::Allocator {
|
||||||
return *inst;
|
return *inst;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr size_t kStackAllocatorSize = 4096;
|
static constexpr size_t kStackAllocatorSize = 8192;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint8_t data_backing_[kStackAllocatorSize];
|
uint8_t data_backing_[kStackAllocatorSize];
|
||||||
|
@ -101,6 +101,9 @@ class ModelBuilder {
|
||||||
Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
|
Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
|
||||||
std::initializer_list<Tensor> outputs);
|
std::initializer_list<Tensor> outputs);
|
||||||
|
|
||||||
|
void AddMetadata(const char* description_string,
|
||||||
|
const int32_t* metadata_buffer_data, size_t num_elements);
|
||||||
|
|
||||||
// Constructs the flatbuffer model using `builder_` and return a pointer to
|
// Constructs the flatbuffer model using `builder_` and return a pointer to
|
||||||
// it. The returned model has the same lifetime as `builder_`.
|
// it. The returned model has the same lifetime as `builder_`.
|
||||||
const Model* BuildModel(std::initializer_list<Tensor> inputs,
|
const Model* BuildModel(std::initializer_list<Tensor> inputs,
|
||||||
|
@ -123,6 +126,16 @@ class ModelBuilder {
|
||||||
|
|
||||||
static constexpr int kMaxTensors = 50;
|
static constexpr int kMaxTensors = 50;
|
||||||
flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
|
flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
|
||||||
|
|
||||||
|
static constexpr int kMaxMetadataBuffers = 10;
|
||||||
|
|
||||||
|
static constexpr int kMaxMetadatas = 10;
|
||||||
|
flatbuffers::Offset<Metadata> metadata_[kMaxMetadatas];
|
||||||
|
|
||||||
|
flatbuffers::Offset<Buffer> metadata_buffers_[kMaxMetadataBuffers];
|
||||||
|
|
||||||
|
int nbr_of_metadata_buffers_ = 0;
|
||||||
|
|
||||||
int next_tensor_id_ = 0;
|
int next_tensor_id_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -149,13 +162,33 @@ ModelBuilder::Node ModelBuilder::AddNode(
|
||||||
return next_operator_id_ - 1;
|
return next_operator_id_ - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ModelBuilder::AddMetadata(const char* description_string,
|
||||||
|
const int32_t* metadata_buffer_data,
|
||||||
|
size_t num_elements) {
|
||||||
|
metadata_[ModelBuilder::nbr_of_metadata_buffers_] =
|
||||||
|
CreateMetadata(*builder_, builder_->CreateString(description_string),
|
||||||
|
1 + ModelBuilder::nbr_of_metadata_buffers_);
|
||||||
|
|
||||||
|
metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer(
|
||||||
|
*builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data,
|
||||||
|
sizeof(uint32_t) * num_elements));
|
||||||
|
|
||||||
|
ModelBuilder::nbr_of_metadata_buffers_++;
|
||||||
|
}
|
||||||
|
|
||||||
const Model* ModelBuilder::BuildModel(
|
const Model* ModelBuilder::BuildModel(
|
||||||
std::initializer_list<ModelBuilder::Tensor> inputs,
|
std::initializer_list<ModelBuilder::Tensor> inputs,
|
||||||
std::initializer_list<ModelBuilder::Tensor> outputs) {
|
std::initializer_list<ModelBuilder::Tensor> outputs) {
|
||||||
// Model schema requires an empty buffer at idx 0.
|
// Model schema requires an empty buffer at idx 0.
|
||||||
constexpr size_t kBufferSize = 1;
|
size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_;
|
||||||
const flatbuffers::Offset<Buffer> buffers[kBufferSize] = {
|
flatbuffers::Offset<Buffer> buffers[kMaxMetadataBuffers];
|
||||||
tflite::CreateBuffer(*builder_)};
|
buffers[0] = tflite::CreateBuffer(*builder_);
|
||||||
|
|
||||||
|
// Place the metadata buffers first in the buffer since the indices for them
|
||||||
|
// have already been set in AddMetadata()
|
||||||
|
for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) {
|
||||||
|
buffers[i] = metadata_buffers_[i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
// TFLM only supports single subgraph.
|
// TFLM only supports single subgraph.
|
||||||
constexpr size_t subgraphs_size = 1;
|
constexpr size_t subgraphs_size = 1;
|
||||||
|
@ -166,12 +199,26 @@ const Model* ModelBuilder::BuildModel(
|
||||||
builder_->CreateVector(outputs.begin(), outputs.size()),
|
builder_->CreateVector(outputs.begin(), outputs.size()),
|
||||||
builder_->CreateVector(operators_, next_operator_id_),
|
builder_->CreateVector(operators_, next_operator_id_),
|
||||||
builder_->CreateString("test_subgraph"))};
|
builder_->CreateString("test_subgraph"))};
|
||||||
const flatbuffers::Offset<Model> model_offset = tflite::CreateModel(
|
|
||||||
*builder_, 0,
|
flatbuffers::Offset<Model> model_offset;
|
||||||
builder_->CreateVector(operator_codes_, next_operator_code_id_),
|
if (ModelBuilder::nbr_of_metadata_buffers_ > 0) {
|
||||||
builder_->CreateVector(subgraphs, subgraphs_size),
|
model_offset = tflite::CreateModel(
|
||||||
builder_->CreateString("teset_model"),
|
*builder_, 0,
|
||||||
builder_->CreateVector(buffers, kBufferSize));
|
builder_->CreateVector(operator_codes_, next_operator_code_id_),
|
||||||
|
builder_->CreateVector(subgraphs, subgraphs_size),
|
||||||
|
builder_->CreateString("teset_model"),
|
||||||
|
builder_->CreateVector(buffers, buffer_size), 0,
|
||||||
|
builder_->CreateVector(metadata_,
|
||||||
|
ModelBuilder::nbr_of_metadata_buffers_));
|
||||||
|
} else {
|
||||||
|
model_offset = tflite::CreateModel(
|
||||||
|
*builder_, 0,
|
||||||
|
builder_->CreateVector(operator_codes_, next_operator_code_id_),
|
||||||
|
builder_->CreateVector(subgraphs, subgraphs_size),
|
||||||
|
builder_->CreateString("teset_model"),
|
||||||
|
builder_->CreateVector(buffers, buffer_size));
|
||||||
|
}
|
||||||
|
|
||||||
tflite::FinishModelBuffer(*builder_, model_offset);
|
tflite::FinishModelBuffer(*builder_, model_offset);
|
||||||
void* model_pointer = builder_->GetBufferPointer();
|
void* model_pointer = builder_->GetBufferPointer();
|
||||||
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
|
const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
|
||||||
|
@ -250,6 +297,35 @@ const Model* BuildSimpleModelWithBranch() {
|
||||||
return model_builder.BuildModel({t0}, {t3});
|
return model_builder.BuildModel({t0}, {t3});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
|
||||||
|
const int32_t* metadata_buffer,
|
||||||
|
NodeConnection* node_conn,
|
||||||
|
int num_conns) {
|
||||||
|
using flatbuffers::Offset;
|
||||||
|
flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
|
||||||
|
|
||||||
|
ModelBuilder model_builder(fb_builder);
|
||||||
|
|
||||||
|
const int op_id =
|
||||||
|
model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom",
|
||||||
|
/* version= */ 0);
|
||||||
|
|
||||||
|
for (int i = 0; i < number_of_tensors; ++i) {
|
||||||
|
model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < num_conns; ++i) {
|
||||||
|
model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output);
|
||||||
|
}
|
||||||
|
|
||||||
|
model_builder.AddMetadata(
|
||||||
|
"OfflineMemoryAllocation", metadata_buffer,
|
||||||
|
number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize);
|
||||||
|
|
||||||
|
return model_builder.BuildModel(node_conn[0].input,
|
||||||
|
node_conn[num_conns - 1].output);
|
||||||
|
}
|
||||||
|
|
||||||
const Model* BuildSimpleMockModel() {
|
const Model* BuildSimpleMockModel() {
|
||||||
using flatbuffers::Offset;
|
using flatbuffers::Offset;
|
||||||
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
|
flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
|
||||||
|
@ -638,6 +714,15 @@ const Model* GetSimpleModelWithBranch() {
|
||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Model* GetModelWithOfflinePlanning(int num_tensors,
|
||||||
|
const int32_t* metadata_buffer,
|
||||||
|
NodeConnection* node_conn,
|
||||||
|
int num_conns) {
|
||||||
|
const Model* model = BuildModelWithOfflinePlanning(
|
||||||
|
num_tensors, metadata_buffer, node_conn, num_conns);
|
||||||
|
return model;
|
||||||
|
}
|
||||||
|
|
||||||
const Model* GetSimpleStatefulModel() {
|
const Model* GetSimpleStatefulModel() {
|
||||||
static Model* model = nullptr;
|
static Model* model = nullptr;
|
||||||
if (!model) {
|
if (!model) {
|
||||||
|
|
|
@ -30,6 +30,14 @@ limitations under the License.
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
namespace testing {
|
namespace testing {
|
||||||
|
|
||||||
|
constexpr int kOfflinePlannerHeaderSize = 3;
|
||||||
|
|
||||||
|
struct NodeConnection_ {
|
||||||
|
std::initializer_list<int32_t> input;
|
||||||
|
std::initializer_list<int32_t> output;
|
||||||
|
};
|
||||||
|
typedef struct NodeConnection_ NodeConnection;
|
||||||
|
|
||||||
// A simple operator that returns the median of the input with the number of
|
// A simple operator that returns the median of the input with the number of
|
||||||
// times the kernel was invoked. The implementation below is deliberately
|
// times the kernel was invoked. The implementation below is deliberately
|
||||||
// complicated, just to demonstrate how kernel memory planning works.
|
// complicated, just to demonstrate how kernel memory planning works.
|
||||||
|
@ -82,6 +90,12 @@ const Model* GetComplexMockModel();
|
||||||
// Returns a simple flatbuffer model with two branches.
|
// Returns a simple flatbuffer model with two branches.
|
||||||
const Model* GetSimpleModelWithBranch();
|
const Model* GetSimpleModelWithBranch();
|
||||||
|
|
||||||
|
// Returns a simple flatbuffer model with offline planned tensors
|
||||||
|
const Model* GetModelWithOfflinePlanning(int num_tensors,
|
||||||
|
const int32_t* metadata_buffer,
|
||||||
|
NodeConnection* node_conn,
|
||||||
|
int num_conns);
|
||||||
|
|
||||||
// Returns a flatbuffer model with `simple_stateful_op`
|
// Returns a flatbuffer model with `simple_stateful_op`
|
||||||
const Model* GetSimpleStatefulModel();
|
const Model* GetSimpleStatefulModel();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue