diff --git a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc index 75dd607f75c..3d1155ef41e 100644 --- a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc +++ b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc @@ -43,8 +43,8 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { tflite::ops::micro::AllOpsResolver resolver; // Create an area of memory to use for input, output, and intermediate arrays. - // Finding the minimum value for your model may require some trial and error. - const int tensor_arena_size = 2 * 1024; + // `arena_used_bytes` can be used to retrieve the optimal size. + const int tensor_arena_size = 2208 + 16 + 100 /* some reserved space */; uint8_t tensor_arena[tensor_arena_size]; // Build an interpreter to run the model with @@ -53,6 +53,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Allocate memory from the tensor_arena for the model's tensors TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + // At the time of writing, the hello world model uses 2208 bytes, we leave + // 100 bytes head room here to make the test less fragile and in the same + // time, alert for substantial increase. + TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2208 + 100); // Obtain a pointer to the model's input tensor TfLiteTensor* input = interpreter.input(0); diff --git a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc index faea73e9169..c5e2d579ccd 100644 --- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc +++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc @@ -41,13 +41,8 @@ void ReverseSortInPlace(int* values, int* ids, int size) { GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size) : buffer_count_(0), need_to_calculate_offsets_(true) { - const int per_buffer_size = sizeof(BufferRequirements) + // requirements_ - sizeof(int) + // buffer_sizes_sorted_by_size_ - sizeof(int) + // buffer_ids_sorted_by_size_ - sizeof(ListEntry) + // buffers_sorted_by_offset_ - sizeof(int); // buffer_offsets_; // Allocate the arrays we need within the scratch buffer arena. - max_buffer_count_ = scratch_buffer_size / per_buffer_size; + max_buffer_count_ = scratch_buffer_size / per_buffer_size(); unsigned char* next_free = scratch_buffer; requirements_ = reinterpret_cast(next_free); diff --git a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h index f2c77ed94f3..0cb81093596 100644 --- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h @@ -86,6 +86,17 @@ class GreedyMemoryPlanner : public MemoryPlanner { int next_entry_index; }; + // Number of bytes required in order to plan a buffer. + static size_t per_buffer_size() { + const int per_buffer_size = + sizeof(BufferRequirements) + // requirements_ + sizeof(int) + // buffer_sizes_sorted_by_size_ + sizeof(int) + // buffer_ids_sorted_by_size_ + sizeof(ListEntry) + // buffers_sorted_by_offset_ + sizeof(int); // buffer_offsets_; + return per_buffer_size; + } + private: // Whether a buffer is active in a given time range. bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used, diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 1bbcadf110e..573ac2e0b11 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -440,6 +440,13 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, ErrorReporter* error_reporter) : model_(model), error_reporter_(error_reporter), context_(context) { uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment); + if (aligned_arena != tensor_arena) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "%d bytes lost due to alignment. To avoid this loss, please make sure " + "the tensor_arena is 16 bytes aligned.", + aligned_arena - tensor_arena); + } size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena; // Creates a root memory allocator managing the arena. The allocator itself // also locates in the arena buffer. This allocator doesn't need to be diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index e7dd4f3e34e..a846b0c63ba 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -64,9 +64,10 @@ typedef struct { // This information could change in the future version. // ************** .memory_allocator->GetBuffer() // Tensors/Scratch buffers (head) -// ************** +// ************** .head_watermark // unused memory -// ************** .memory_allocator->GetBuffer() + ->GetDataSize() +// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() +// - ->GetDataSize() // persistent area (tail) // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() class MicroAllocator { @@ -88,6 +89,15 @@ class MicroAllocator { // called in this class. TfLiteStatus FinishTensorAllocation(); + // Returns the arena usage in bytes, only available after + // `FinishTensorAllocation`. Otherwise, it will return 0. + size_t used_bytes() const { + if (active_) { + return 0; + } + return memory_allocator_->GetUsedBytes(); + } + // Run through the model to allocate nodes and registrations. We need to keep // them for the entire life time of the model to allow persistent tensors. // This method needs to be called before FinishTensorAllocation method. @@ -115,6 +125,7 @@ class MicroAllocator { TfLiteStatus Init(); const Model* model_; + // A simple memory allocator that always allocate from the arena tail. SimpleMemoryAllocator* memory_allocator_; ErrorReporter* error_reporter_; TfLiteContext* context_; diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index 03aa7c0bd39..78419edbbf9 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -142,11 +142,15 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) { TF_LITE_MICRO_TEST(TestFinishTensorAllocation) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); TfLiteContext context; - constexpr size_t arena_size = 1024; + constexpr size_t arena_size = + 760 /* minimal arena size at the time of writting */ + + 16 /* alignment */ + 100 /* leave some headroom for future proof */; uint8_t arena[arena_size]; tflite::MicroAllocator allocator(&context, model, arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size); + // Memory planning hasn't been finalized, so the used bytes is unknown. + TF_LITE_MICRO_EXPECT_EQ(0, allocator.used_bytes()); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation()); // No allocation to be done afterwards. @@ -170,6 +174,7 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) { context.tensors[1].data.raw); TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw, context.tensors[2].data.raw); + TF_LITE_MICRO_EXPECT_LE(allocator.used_bytes(), 760 + 100); } TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index e41f2e3dc0f..b2046128c78 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -139,6 +139,14 @@ class MicroInterpreter { return node_and_registrations_[node_index]; } + // For debugging only. + // Returns the actual used arena in bytes. This method gives the optimal arena + // size. It's only available after `AllocateTensors` has been called. + // Note that normally `tensor_arena` requires 16 bytes alignment to fully + // utilize the space. If it's not the case, the optimial arena size would be + // arena_used_bytes() + 16. + size_t arena_used_bytes() const { return allocator_.used_bytes(); } + private: void CorrectTensorEndianness(TfLiteTensor* tensorCorr); diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index 9517a806f3b..36e8c009b96 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -174,7 +174,9 @@ TF_LITE_MICRO_TEST(TestInterpreter) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); TF_LITE_MICRO_EXPECT_NE(nullptr, model); tflite::MockOpResolver mock_resolver; - constexpr size_t allocator_buffer_size = 1024; + constexpr size_t allocator_buffer_size = + 928 /* optimal arena size at the time of writting. */ + + 16 /* alignment */ + 100 /* some headroom */; uint8_t allocator_buffer[allocator_buffer_size]; // Create a new scope so that we can test the destructor. @@ -183,6 +185,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) { allocator_buffer_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 928 + 100); TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size()); TF_LITE_MICRO_EXPECT_EQ(2, interpreter.outputs_size()); @@ -266,12 +269,15 @@ TF_LITE_MICRO_TEST(TestVariableTensorReset) { TF_LITE_MICRO_EXPECT_NE(nullptr, model); tflite::MockOpResolver mock_resolver; - constexpr size_t allocator_buffer_size = 2048; + constexpr size_t allocator_buffer_size = + 2096 /* optimal arena size at the time of writting. */ + + 16 /* alignment */ + 100 /* some headroom */; uint8_t allocator_buffer[allocator_buffer_size]; tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer, allocator_buffer_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); + TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2096 + 100); TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size()); TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size()); diff --git a/tensorflow/lite/micro/simple_memory_allocator.h b/tensorflow/lite/micro/simple_memory_allocator.h index 42c7d963ff7..cf1818609f6 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.h +++ b/tensorflow/lite/micro/simple_memory_allocator.h @@ -31,6 +31,8 @@ class SimpleMemoryAllocator { SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head, uint8_t* buffer_tail) : error_reporter_(error_reporter), + buffer_head_(buffer_head), + buffer_tail_(buffer_tail), head_(buffer_head), tail_(buffer_tail) {} SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer, @@ -47,9 +49,14 @@ class SimpleMemoryAllocator { uint8_t* GetHead() const { return head_; } uint8_t* GetTail() const { return tail_; } size_t GetAvailableMemory() const { return tail_ - head_; } + size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); } private: + size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; } + ErrorReporter* error_reporter_; + uint8_t* buffer_head_; + uint8_t* buffer_tail_; uint8_t* head_; uint8_t* tail_; };