TFLM: Add an interpreter API to inspect the actual number of bytes used in the arena.
This helps to choose the optimal arena size. - I've also used this tool to adjust the arena size for a few test cases. - This CL changes the GreedyMemoryPlanner by exposing the per buffer size requirement so that we can estimate if the remaining arena is enough for planning the entire buffer. PiperOrigin-RevId: 307628733 Change-Id: Id47f578a0bd0b67a3bbbd2a2ef7103d2336b17aa
This commit is contained in:
parent
e71f97fd6b
commit
298b24151e
|
@ -43,8 +43,8 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
|
|||
tflite::ops::micro::AllOpsResolver resolver;
|
||||
|
||||
// Create an area of memory to use for input, output, and intermediate arrays.
|
||||
// Finding the minimum value for your model may require some trial and error.
|
||||
const int tensor_arena_size = 2 * 1024;
|
||||
// `arena_used_bytes` can be used to retrieve the optimal size.
|
||||
const int tensor_arena_size = 2208 + 16 + 100 /* some reserved space */;
|
||||
uint8_t tensor_arena[tensor_arena_size];
|
||||
|
||||
// Build an interpreter to run the model with
|
||||
|
@ -53,6 +53,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
|
|||
|
||||
// Allocate memory from the tensor_arena for the model's tensors
|
||||
TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
|
||||
// At the time of writing, the hello world model uses 2208 bytes, we leave
|
||||
// 100 bytes head room here to make the test less fragile and in the same
|
||||
// time, alert for substantial increase.
|
||||
TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2208 + 100);
|
||||
|
||||
// Obtain a pointer to the model's input tensor
|
||||
TfLiteTensor* input = interpreter.input(0);
|
||||
|
|
|
@ -41,13 +41,8 @@ void ReverseSortInPlace(int* values, int* ids, int size) {
|
|||
GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
|
||||
int scratch_buffer_size)
|
||||
: buffer_count_(0), need_to_calculate_offsets_(true) {
|
||||
const int per_buffer_size = sizeof(BufferRequirements) + // requirements_
|
||||
sizeof(int) + // buffer_sizes_sorted_by_size_
|
||||
sizeof(int) + // buffer_ids_sorted_by_size_
|
||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||
sizeof(int); // buffer_offsets_;
|
||||
// Allocate the arrays we need within the scratch buffer arena.
|
||||
max_buffer_count_ = scratch_buffer_size / per_buffer_size;
|
||||
max_buffer_count_ = scratch_buffer_size / per_buffer_size();
|
||||
|
||||
unsigned char* next_free = scratch_buffer;
|
||||
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
|
||||
|
|
|
@ -86,6 +86,17 @@ class GreedyMemoryPlanner : public MemoryPlanner {
|
|||
int next_entry_index;
|
||||
};
|
||||
|
||||
// Number of bytes required in order to plan a buffer.
|
||||
static size_t per_buffer_size() {
|
||||
const int per_buffer_size =
|
||||
sizeof(BufferRequirements) + // requirements_
|
||||
sizeof(int) + // buffer_sizes_sorted_by_size_
|
||||
sizeof(int) + // buffer_ids_sorted_by_size_
|
||||
sizeof(ListEntry) + // buffers_sorted_by_offset_
|
||||
sizeof(int); // buffer_offsets_;
|
||||
return per_buffer_size;
|
||||
}
|
||||
|
||||
private:
|
||||
// Whether a buffer is active in a given time range.
|
||||
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
|
||||
|
|
|
@ -440,6 +440,13 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
|
|||
ErrorReporter* error_reporter)
|
||||
: model_(model), error_reporter_(error_reporter), context_(context) {
|
||||
uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
|
||||
if (aligned_arena != tensor_arena) {
|
||||
TF_LITE_REPORT_ERROR(
|
||||
error_reporter_,
|
||||
"%d bytes lost due to alignment. To avoid this loss, please make sure "
|
||||
"the tensor_arena is 16 bytes aligned.",
|
||||
aligned_arena - tensor_arena);
|
||||
}
|
||||
size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
|
||||
// Creates a root memory allocator managing the arena. The allocator itself
|
||||
// also locates in the arena buffer. This allocator doesn't need to be
|
||||
|
|
|
@ -64,9 +64,10 @@ typedef struct {
|
|||
// This information could change in the future version.
|
||||
// ************** .memory_allocator->GetBuffer()
|
||||
// Tensors/Scratch buffers (head)
|
||||
// **************
|
||||
// ************** .head_watermark
|
||||
// unused memory
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetDataSize()
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
|
||||
// - ->GetDataSize()
|
||||
// persistent area (tail)
|
||||
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
|
||||
class MicroAllocator {
|
||||
|
@ -88,6 +89,15 @@ class MicroAllocator {
|
|||
// called in this class.
|
||||
TfLiteStatus FinishTensorAllocation();
|
||||
|
||||
// Returns the arena usage in bytes, only available after
|
||||
// `FinishTensorAllocation`. Otherwise, it will return 0.
|
||||
size_t used_bytes() const {
|
||||
if (active_) {
|
||||
return 0;
|
||||
}
|
||||
return memory_allocator_->GetUsedBytes();
|
||||
}
|
||||
|
||||
// Run through the model to allocate nodes and registrations. We need to keep
|
||||
// them for the entire life time of the model to allow persistent tensors.
|
||||
// This method needs to be called before FinishTensorAllocation method.
|
||||
|
@ -115,6 +125,7 @@ class MicroAllocator {
|
|||
TfLiteStatus Init();
|
||||
|
||||
const Model* model_;
|
||||
// A simple memory allocator that always allocate from the arena tail.
|
||||
SimpleMemoryAllocator* memory_allocator_;
|
||||
ErrorReporter* error_reporter_;
|
||||
TfLiteContext* context_;
|
||||
|
|
|
@ -142,11 +142,15 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) {
|
|||
TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
|
||||
const tflite::Model* model = tflite::testing::GetSimpleMockModel();
|
||||
TfLiteContext context;
|
||||
constexpr size_t arena_size = 1024;
|
||||
constexpr size_t arena_size =
|
||||
760 /* minimal arena size at the time of writting */ +
|
||||
16 /* alignment */ + 100 /* leave some headroom for future proof */;
|
||||
uint8_t arena[arena_size];
|
||||
tflite::MicroAllocator allocator(&context, model, arena, arena_size,
|
||||
micro_test::reporter);
|
||||
TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size);
|
||||
// Memory planning hasn't been finalized, so the used bytes is unknown.
|
||||
TF_LITE_MICRO_EXPECT_EQ(0, allocator.used_bytes());
|
||||
|
||||
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation());
|
||||
// No allocation to be done afterwards.
|
||||
|
@ -170,6 +174,7 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
|
|||
context.tensors[1].data.raw);
|
||||
TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
|
||||
context.tensors[2].data.raw);
|
||||
TF_LITE_MICRO_EXPECT_LE(allocator.used_bytes(), 760 + 100);
|
||||
}
|
||||
|
||||
TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
|
||||
|
|
|
@ -139,6 +139,14 @@ class MicroInterpreter {
|
|||
return node_and_registrations_[node_index];
|
||||
}
|
||||
|
||||
// For debugging only.
|
||||
// Returns the actual used arena in bytes. This method gives the optimal arena
|
||||
// size. It's only available after `AllocateTensors` has been called.
|
||||
// Note that normally `tensor_arena` requires 16 bytes alignment to fully
|
||||
// utilize the space. If it's not the case, the optimial arena size would be
|
||||
// arena_used_bytes() + 16.
|
||||
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
|
||||
|
||||
private:
|
||||
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
|
||||
|
||||
|
|
|
@ -174,7 +174,9 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
|
|||
const tflite::Model* model = tflite::testing::GetSimpleMockModel();
|
||||
TF_LITE_MICRO_EXPECT_NE(nullptr, model);
|
||||
tflite::MockOpResolver mock_resolver;
|
||||
constexpr size_t allocator_buffer_size = 1024;
|
||||
constexpr size_t allocator_buffer_size =
|
||||
928 /* optimal arena size at the time of writting. */ +
|
||||
16 /* alignment */ + 100 /* some headroom */;
|
||||
uint8_t allocator_buffer[allocator_buffer_size];
|
||||
|
||||
// Create a new scope so that we can test the destructor.
|
||||
|
@ -183,6 +185,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
|
|||
allocator_buffer_size,
|
||||
micro_test::reporter);
|
||||
TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
|
||||
TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 928 + 100);
|
||||
TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
|
||||
TF_LITE_MICRO_EXPECT_EQ(2, interpreter.outputs_size());
|
||||
|
||||
|
@ -266,12 +269,15 @@ TF_LITE_MICRO_TEST(TestVariableTensorReset) {
|
|||
TF_LITE_MICRO_EXPECT_NE(nullptr, model);
|
||||
|
||||
tflite::MockOpResolver mock_resolver;
|
||||
constexpr size_t allocator_buffer_size = 2048;
|
||||
constexpr size_t allocator_buffer_size =
|
||||
2096 /* optimal arena size at the time of writting. */ +
|
||||
16 /* alignment */ + 100 /* some headroom */;
|
||||
uint8_t allocator_buffer[allocator_buffer_size];
|
||||
tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer,
|
||||
allocator_buffer_size,
|
||||
micro_test::reporter);
|
||||
TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
|
||||
TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2096 + 100);
|
||||
TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
|
||||
TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size());
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ class SimpleMemoryAllocator {
|
|||
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
|
||||
uint8_t* buffer_tail)
|
||||
: error_reporter_(error_reporter),
|
||||
buffer_head_(buffer_head),
|
||||
buffer_tail_(buffer_tail),
|
||||
head_(buffer_head),
|
||||
tail_(buffer_tail) {}
|
||||
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
|
||||
|
@ -47,9 +49,14 @@ class SimpleMemoryAllocator {
|
|||
uint8_t* GetHead() const { return head_; }
|
||||
uint8_t* GetTail() const { return tail_; }
|
||||
size_t GetAvailableMemory() const { return tail_ - head_; }
|
||||
size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }
|
||||
|
||||
private:
|
||||
size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
|
||||
|
||||
ErrorReporter* error_reporter_;
|
||||
uint8_t* buffer_head_;
|
||||
uint8_t* buffer_tail_;
|
||||
uint8_t* head_;
|
||||
uint8_t* tail_;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue