TFLM: Add an interpreter API to inspect the actual number of bytes used in the arena.

This helps to choose the optimal arena size. - I've also used this tool to adjust the arena size for a few test cases. - This CL changes the GreedyMemoryPlanner by exposing the per buffer size requirement so that we can estimate if the remaining arena is enough for planning the entire buffer. PiperOrigin-RevId: 307628733 Change-Id: Id47f578a0bd0b67a3bbbd2a2ef7103d2336b17aa
2020-04-21 10:14:03 -07:00 · 2020-04-21 10:14:03 -07:00 · 298b24151e
parent e71f97fd6b
commit 298b24151e
9 changed files with 67 additions and 13 deletions
--- a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc
+++ b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc
@ -43,8 +43,8 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
  tflite::ops::micro::AllOpsResolver resolver;

  // Create an area of memory to use for input, output, and intermediate arrays.
-  // Finding the minimum value for your model may require some trial and error.
-  const int tensor_arena_size = 2 * 1024;
+  // `arena_used_bytes` can be used to retrieve the optimal size.
+  const int tensor_arena_size = 2208 + 16 + 100 /* some reserved space */;
  uint8_t tensor_arena[tensor_arena_size];

  // Build an interpreter to run the model with
@ -53,6 +53,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {

  // Allocate memory from the tensor_arena for the model's tensors
  TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
+  // At the time of writing, the hello world model uses 2208 bytes, we leave
+  // 100 bytes head room here to make the test less fragile and in the same
+  // time, alert for substantial increase.
+  TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2208 + 100);

  // Obtain a pointer to the model's input tensor
  TfLiteTensor* input = interpreter.input(0);
--- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
+++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
@ -41,13 +41,8 @@ void ReverseSortInPlace(int* values, int* ids, int size) {
 GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
                                         int scratch_buffer_size)
    : buffer_count_(0), need_to_calculate_offsets_(true) {
-  const int per_buffer_size = sizeof(BufferRequirements) +  // requirements_
-                              sizeof(int) +  // buffer_sizes_sorted_by_size_
-                              sizeof(int) +  // buffer_ids_sorted_by_size_
-                              sizeof(ListEntry) +  // buffers_sorted_by_offset_
-                              sizeof(int);         // buffer_offsets_;
  // Allocate the arrays we need within the scratch buffer arena.
-  max_buffer_count_ = scratch_buffer_size / per_buffer_size;
+  max_buffer_count_ = scratch_buffer_size / per_buffer_size();

  unsigned char* next_free = scratch_buffer;
  requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
--- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
+++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
@ -86,6 +86,17 @@ class GreedyMemoryPlanner : public MemoryPlanner {
    int next_entry_index;
  };

+  // Number of bytes required in order to plan a buffer.
+  static size_t per_buffer_size() {
+    const int per_buffer_size =
+        sizeof(BufferRequirements) +  // requirements_
+        sizeof(int) +                 // buffer_sizes_sorted_by_size_
+        sizeof(int) +                 // buffer_ids_sorted_by_size_
+        sizeof(ListEntry) +           // buffers_sorted_by_offset_
+        sizeof(int);                  // buffer_offsets_;
+    return per_buffer_size;
+  }
+
 private:
  // Whether a buffer is active in a given time range.
  bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@ -440,6 +440,13 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
                               ErrorReporter* error_reporter)
    : model_(model), error_reporter_(error_reporter), context_(context) {
  uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
+  if (aligned_arena != tensor_arena) {
+    TF_LITE_REPORT_ERROR(
+        error_reporter_,
+        "%d bytes lost due to alignment. To avoid this loss, please make sure "
+        "the tensor_arena is 16 bytes aligned.",
+        aligned_arena - tensor_arena);
+  }
  size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
  // Creates a root memory allocator managing the arena. The allocator itself
  // also locates in the arena buffer. This allocator doesn't need to be
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@ -64,9 +64,10 @@ typedef struct {
 // This information could change in the future version.
 // ************** .memory_allocator->GetBuffer()
 // Tensors/Scratch buffers (head)
-// **************
+// ************** .head_watermark
 // unused memory
-// ************** .memory_allocator->GetBuffer() + ->GetDataSize()
+// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
+//                                               - ->GetDataSize()
 // persistent area (tail)
 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
 class MicroAllocator {
@ -88,6 +89,15 @@ class MicroAllocator {
  // called in this class.
  TfLiteStatus FinishTensorAllocation();

+  // Returns the arena usage in bytes, only available after
+  // `FinishTensorAllocation`. Otherwise, it will return 0.
+  size_t used_bytes() const {
+    if (active_) {
+      return 0;
+    }
+    return memory_allocator_->GetUsedBytes();
+  }
+
  // Run through the model to allocate nodes and registrations. We need to keep
  // them for the entire life time of the model to allow persistent tensors.
  // This method needs to be called before FinishTensorAllocation method.
@ -115,6 +125,7 @@ class MicroAllocator {
  TfLiteStatus Init();

  const Model* model_;
+  // A simple memory allocator that always allocate from the arena tail.
  SimpleMemoryAllocator* memory_allocator_;
  ErrorReporter* error_reporter_;
  TfLiteContext* context_;
--- a/tensorflow/lite/micro/micro_allocator_test.cc
+++ b/tensorflow/lite/micro/micro_allocator_test.cc
@ -142,11 +142,15 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) {
 TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
  TfLiteContext context;
-  constexpr size_t arena_size = 1024;
+  constexpr size_t arena_size =
+      760 /* minimal arena size at the time of writting */ +
+      16 /* alignment */ + 100 /* leave some headroom for future proof */;
  uint8_t arena[arena_size];
  tflite::MicroAllocator allocator(&context, model, arena, arena_size,
                                   micro_test::reporter);
  TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size);
+  // Memory planning hasn't been finalized, so the used bytes is unknown.
+  TF_LITE_MICRO_EXPECT_EQ(0, allocator.used_bytes());

  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation());
  // No allocation to be done afterwards.
@ -170,6 +174,7 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
                          context.tensors[1].data.raw);
  TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
                          context.tensors[2].data.raw);
+  TF_LITE_MICRO_EXPECT_LE(allocator.used_bytes(), 760 + 100);
 }

 TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
--- a/tensorflow/lite/micro/micro_interpreter.h
+++ b/tensorflow/lite/micro/micro_interpreter.h
@ -139,6 +139,14 @@ class MicroInterpreter {
    return node_and_registrations_[node_index];
  }

+  // For debugging only.
+  // Returns the actual used arena in bytes. This method gives the optimal arena
+  // size. It's only available after `AllocateTensors` has been called.
+  // Note that normally `tensor_arena` requires 16 bytes alignment to fully
+  // utilize the space. If it's not the case, the optimial arena size would be
+  // arena_used_bytes() + 16.
+  size_t arena_used_bytes() const { return allocator_.used_bytes(); }
+
 private:
  void CorrectTensorEndianness(TfLiteTensor* tensorCorr);

--- a/tensorflow/lite/micro/micro_interpreter_test.cc
+++ b/tensorflow/lite/micro/micro_interpreter_test.cc
@ -174,7 +174,9 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
  TF_LITE_MICRO_EXPECT_NE(nullptr, model);
  tflite::MockOpResolver mock_resolver;
-  constexpr size_t allocator_buffer_size = 1024;
+  constexpr size_t allocator_buffer_size =
+      928 /* optimal arena size at the time of writting. */ +
+      16 /* alignment */ + 100 /* some headroom */;
  uint8_t allocator_buffer[allocator_buffer_size];

  // Create a new scope so that we can test the destructor.
@ -183,6 +185,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
                                         allocator_buffer_size,
                                         micro_test::reporter);
    TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
+    TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 928 + 100);
    TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
    TF_LITE_MICRO_EXPECT_EQ(2, interpreter.outputs_size());

@ -266,12 +269,15 @@ TF_LITE_MICRO_TEST(TestVariableTensorReset) {
  TF_LITE_MICRO_EXPECT_NE(nullptr, model);

  tflite::MockOpResolver mock_resolver;
-  constexpr size_t allocator_buffer_size = 2048;
+  constexpr size_t allocator_buffer_size =
+      2096 /* optimal arena size at the time of writting. */ +
+      16 /* alignment */ + 100 /* some headroom */;
  uint8_t allocator_buffer[allocator_buffer_size];
  tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer,
                                       allocator_buffer_size,
                                       micro_test::reporter);
  TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
+  TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2096 + 100);
  TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
  TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size());

--- a/tensorflow/lite/micro/simple_memory_allocator.h
+++ b/tensorflow/lite/micro/simple_memory_allocator.h
@ -31,6 +31,8 @@ class SimpleMemoryAllocator {
  SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
                        uint8_t* buffer_tail)
      : error_reporter_(error_reporter),
+        buffer_head_(buffer_head),
+        buffer_tail_(buffer_tail),
        head_(buffer_head),
        tail_(buffer_tail) {}
  SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
@ -47,9 +49,14 @@ class SimpleMemoryAllocator {
  uint8_t* GetHead() const { return head_; }
  uint8_t* GetTail() const { return tail_; }
  size_t GetAvailableMemory() const { return tail_ - head_; }
+  size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }

 private:
+  size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
+
  ErrorReporter* error_reporter_;
+  uint8_t* buffer_head_;
+  uint8_t* buffer_tail_;
  uint8_t* head_;
  uint8_t* tail_;
 };