Merge pull request #38121 from jenselofsson:offline_memory_planner

PiperOrigin-RevId: 316533499 Change-Id: Id967e853081829f4c974cf7527a628724ed0edc2
2020-06-15 13:37:09 -07:00 · 2020-06-15 13:37:09 -07:00 · 4381963d2d
commit 4381963d2d
parent 52736a6adc 708ecda43e
10 changed files with 649 additions and 92 deletions
--- a/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc
+++ b/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc
@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_interpreter.h"
 #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+#include "tensorflow/lite/micro/micro_optional_debug_tools.h"
 #include "tensorflow/lite/micro/testing/micro_test.h"
 #include "tensorflow/lite/schema/schema_generated.h"
 #include "tensorflow/lite/version.h"
@ -46,6 +47,7 @@ TF_LITE_MICRO_TEST(TestInvoke) {
                         "to supported version %d.\n",
                         model->version(), TFLITE_SCHEMA_VERSION);
  }
+  PrintModelData(model, error_reporter);

  // Pull in only the operation implementations we need.
  // This relies on a complete list of all the ops needed by this graph.
--- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
+++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
@ -48,10 +48,10 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
  requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
  next_free += sizeof(BufferRequirements) * max_buffer_count_;

-  buffer_sizes_sorted_by_size_ = reinterpret_cast<int*>(next_free);
+  buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
  next_free += sizeof(int) * max_buffer_count_;

-  buffer_ids_sorted_by_size_ = reinterpret_cast<int*>(next_free);
+  buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
  next_free += sizeof(int) * max_buffer_count_;

  buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
@ -76,11 +76,24 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
  current->size = size;
  current->first_time_used = first_time_used;
  current->last_time_used = last_time_used;
+  current->offline_offset = kOnlinePlannedBuffer;
  ++buffer_count_;
  need_to_calculate_offsets_ = true;
  return kTfLiteOk;
 }

+TfLiteStatus GreedyMemoryPlanner::AddBuffer(
+    tflite::ErrorReporter* error_reporter, int size, int first_time_used,
+    int last_time_used, int offline_offset) {
+  BufferRequirements* current = &requirements_[buffer_count_];
+  if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
+      kTfLiteOk) {
+    return kTfLiteError;
+  }
+  current->offline_offset = offline_offset;
+  return kTfLiteOk;
+}
+
 bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
    const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
    const int last_time_used) const {
@ -102,7 +115,7 @@ GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
  ListEntry* result = nullptr;
  ListEntry* candidate_next_entry;
  if (start == nullptr) {
-    candidate_next_entry = &buffers_sorted_by_offset_[0];
+    candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
  } else {
    if (start->next_entry_index == -1) {
      return nullptr;
@ -134,29 +147,51 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
  // This helps find a more compact layout. Intuitively, you can think
  // about putting the large buffers in place first, and then the
  // smaller buffers can fit in the gaps, rather than fragmenting the
-  // gaps with small buffers at the beginning.
+  // gaps with small buffers at the beginning. Add offline planned offsets
+  // first in the list, since they have a predetermined offset.
+  int idx_from_tail = buffer_count_;
+  int idx_from_head = 0;
  for (int i = 0; i < buffer_count_; ++i) {
-    buffer_sizes_sorted_by_size_[i] = requirements_[i].size;
-    buffer_ids_sorted_by_size_[i] = i;
+    if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
+      idx_from_tail--;
+      buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
+      buffer_ids_sorted_[idx_from_tail] = i;
      buffer_offsets_[i] = -1;
+    } else {
+      buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
+      buffer_ids_sorted_[idx_from_head] = i;
+      buffer_offsets_[i] = requirements_[i].offline_offset;
+      idx_from_head++;
+    }
  }
-  // This sorting algorithm is naive, and may end up taking a very long time
-  // with hundreds of buffers.
-  ReverseSortInPlace(buffer_sizes_sorted_by_size_, buffer_ids_sorted_by_size_,
-                     buffer_count_);

-  // Put the largest buffer at offset zero to start the process.
-  ListEntry* first_entry = &buffers_sorted_by_offset_[0];
-  first_entry->offset = 0;
-  first_entry->requirements_index = buffer_ids_sorted_by_size_[0];
-  first_entry->next_entry_index = -1;
+  // This sorting algorithm is naive, and may end up taking a very long time
+  // with hundreds of buffers. Do not sort the offline planned offsets.
+  ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
+                     &buffer_ids_sorted_[idx_from_head],
+                     buffer_count_ - idx_from_head);
+
+  // Initialize the first entry to the first buffer in
+  // buffer_ids_sorted_.
+  //   - If there are no offline planned offsets, the largest buffer will be
+  //     first, and the buffers will be handled in size order.
+  //   - If offline offsets are present, these will be handled first in order
+  //     for the greedy algorithm to utilized gaps in the offline plan.
+  first_entry_index_ = 0;
  next_free_entry_ = 1;
-  buffer_offsets_[buffer_ids_sorted_by_size_[0]] = 0;
+  ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
+  first_entry->next_entry_index = -1;  // to mark the entry as end of list
+  int buffer_id = buffer_ids_sorted_[0];
+  first_entry->requirements_index = buffer_id;
+  if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
+    buffer_offsets_[buffer_id] = 0;
+  }
+  first_entry->offset = buffer_offsets_[buffer_id];

  // Work through the rest of the buffers to find a good gap to place each one.
  for (int i = 1; i < buffer_count_; ++i) {
    // The id is the order the buffer was originally added by the client.
-    const int buffer_id = buffer_ids_sorted_by_size_[i];
+    buffer_id = buffer_ids_sorted_[i];
    // Look at what size and time range the buffer needs to be active.
    BufferRequirements* wanted_requirements = &requirements_[buffer_id];
    const int wanted_size = wanted_requirements->size;
@ -168,9 +203,11 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
    // so that it's easy to find the next buffer in memory, and so the gap.
    // The candidate_entry variable holds the buffer that we're considering
    // placing the current buffer after.
-    ListEntry* prior_entry = nullptr;
+
    int candidate_offset = 0;
    // Loop through the offset-ordered list of buffers, looking for gaps.
+    if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
+      ListEntry* prior_entry = nullptr;
      while (true) {
        // Find out what the next active buffer is.
        ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
@ -200,6 +237,10 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
        // The gap wasn't big enough, so move on to another candidate.
        prior_entry = next_entry;
      }
+    } else {
+      // Offline planned offset are to be considered constant
+      candidate_offset = wanted_requirements->offline_offset;
+    }
    // At this point, we've either found a gap (possibly at the end of the
    // list) and want to place the buffer there, or there are no other active
    // buffers in this time range and so we can put it at offset zero.
@ -212,9 +253,17 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
    new_entry->requirements_index = buffer_id;
    const int new_entry_index = next_free_entry_;
    ++next_free_entry_;
+
+    if (first_entry->offset > candidate_offset) {
+      // The new entry offset is smaller than the first entry offset =>
+      // replace the first entry
+      first_entry = new_entry;
+      first_entry->next_entry_index = first_entry_index_;
+      first_entry_index_ = new_entry_index;
+    } else {
      ListEntry* current_entry = first_entry;
-    // Make sure that we insert the buffer at the correct place in the ordered
-    // list.
+      // Make sure that we insert the buffer at the correct place in the
+      // buffer-offset-ordered list
      while (true) {
        const int next_entry_index = current_entry->next_entry_index;
        if (next_entry_index == -1) {
@ -223,6 +272,7 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
          new_entry->next_entry_index = -1;
          break;
        }
+        // not at the end of the list -> take a look at next entry
        ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
        if (next_entry->offset > candidate_offset) {
          // We're at the right spot to do an insertion and retain the sorting
@ -235,13 +285,14 @@ void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
      }
    }
  }
+}

 size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
  CalculateOffsetsIfNeeded();
  if (buffer_count_ == 0) {
    return 0;
  }
-  ListEntry* entry = &buffers_sorted_by_offset_[0];
+  ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
  size_t max_size = 0;
  while (entry) {
    BufferRequirements* requirements =
--- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
+++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
@ -21,6 +21,8 @@ limitations under the License.

 namespace tflite {

+constexpr int kOnlinePlannedBuffer = -1;
+
 // A memory planner that uses a greedy algorithm to arrange buffers in memory
 // to minimize the overall arena size needed.
 //
@ -59,6 +61,12 @@ class GreedyMemoryPlanner : public MemoryPlanner {
  TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
                         int first_time_used, int last_time_used) override;

+  // Record details of an offline planned buffer offset we want to place.
+  // offline_offset is the buffer offset from the start of the arena.
+  TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
+                         int first_time_used, int last_time_used,
+                         int offline_offset);
+
  // Returns the high-water mark of used memory. This is the minimum size of a
  // memory arena you'd need to allocate to hold these buffers.
  size_t GetMaximumMemorySize() override;
@ -90,8 +98,8 @@ class GreedyMemoryPlanner : public MemoryPlanner {
  static size_t per_buffer_size() {
    const int per_buffer_size =
        sizeof(BufferRequirements) +  // requirements_
-        sizeof(int) +                 // buffer_sizes_sorted_by_size_
-        sizeof(int) +                 // buffer_ids_sorted_by_size_
+        sizeof(int) +                 // buffer_sizes_sorted_
+        sizeof(int) +                 // buffer_ids_sorted_
        sizeof(ListEntry) +           // buffers_sorted_by_offset_
        sizeof(int);                  // buffer_offsets_;
    return per_buffer_size;
@ -121,16 +129,25 @@ class GreedyMemoryPlanner : public MemoryPlanner {
  // Records the client-provided information about each buffer.
  struct BufferRequirements {
    int size;
+    int offline_offset;
    int first_time_used;
    int last_time_used;
  };

  // Working arrays used during the layout algorithm.
  BufferRequirements* requirements_;
-  int* buffer_sizes_sorted_by_size_;
-  int* buffer_ids_sorted_by_size_;
+  // buffer_sizes_sorted_ and buffer_ids_sorted_ are sorted according to:
+  //   {
+  //     offline planned buffers,
+  //     online planned buffers sorted by size
+  //   }
+  int* buffer_sizes_sorted_;
+  int* buffer_ids_sorted_;
  ListEntry* buffers_sorted_by_offset_;
-  int next_free_entry_;
+  int next_free_entry_;    // Index of the next free entry of
+                           // buffers_sorted_by_offset_
+  int first_entry_index_;  // Index of the first entry (smallest offset) of
+                           // buffers_sorted_by_offset_

  // Stores the outcome of the plan, the location of each buffer in the arena.
  int* buffer_offsets_;
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@ -39,16 +39,19 @@ namespace {
 // Used to hold information used during allocation calculations.
 struct AllocationInfo {
  size_t bytes;
+  void** output_ptr;
  int first_created;
  int last_used;
+  int32_t offline_offset;
  bool needs_allocating;
-  void** output_ptr;
 };

 // We align tensor buffers to 16-byte boundaries, since this is a common
 // requirement for SIMD extensions.
 constexpr int kBufferAlignment = 16;

+constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
+
 // Instance of a zero-length int to pass as tensor dims for a flatbuffer
 // Tensor with no shape. Note that the second member of a TfLiteArray is a
 // flexible array member, which is not strictly valid C++. However it is
@ -77,6 +80,71 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
  TF_LITE_REMOVE_VIRTUAL_DELETE
 };

+#if !defined(__clang__)
+// Helper function to check flatbuffer metadata correctness. This function is
+// not called by default. Hence it's not linked in to the final binary code.
+TfLiteStatus CheckOfflinePlannedOffsets(const Model* model,
+                                        ErrorReporter* error_reporter) {
+  // Suppress compile warning for unused function
+  (void)CheckOfflinePlannedOffsets;
+
+  if (model->metadata()) {
+    for (size_t i = 0; i < model->metadata()->size(); ++i) {
+      auto metadata = model->metadata()->Get(i);
+      if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
+                  strlen(kOfflineMemAllocMetadata)) == 0) {
+        auto* subgraphs = model->subgraphs();
+        const SubGraph* subgraph = (*subgraphs)[0];
+        const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
+            subgraph->tensors();
+        const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
+            model->buffers();
+        int nbr_tflite_tensors = tensors->size();
+        auto* buffer = (*buffers)[metadata->buffer()];
+        auto* array = buffer->data();
+        const uint32_t* metadata_buffer = (uint32_t*)array->data();
+        int version = metadata_buffer[0];
+        int subgraph_idx = metadata_buffer[1];
+        const int nbr_offline_offsets = metadata_buffer[2];
+        int* offline_planner_offsets = (int*)&metadata_buffer[3];
+
+        TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: =====");
+        TF_LITE_REPORT_ERROR(error_reporter,
+                             "Offline planner metadata found, version %d, "
+                             "subgraph %d, nbr offline offsets %d",
+                             version, subgraph_idx, nbr_offline_offsets);
+        for (int j = 0; j < nbr_offline_offsets; ++j) {
+          TF_LITE_REPORT_ERROR(
+              error_reporter,
+              "Offline planner tensor index %d, offline offset: %d", j,
+              offline_planner_offsets[j]);
+        }
+
+        if (version != 1) {
+          TF_LITE_REPORT_ERROR(error_reporter, "Version not supported! (%d)\n",
+                               version);
+          return kTfLiteError;
+        }
+        if (subgraph_idx != 0) {
+          TF_LITE_REPORT_ERROR(error_reporter,
+                               "Only 1 subgraph supported! Subgraph idx (%d)\n",
+                               subgraph_idx);
+          return kTfLiteError;
+        }
+        if (nbr_tflite_tensors != nbr_offline_offsets) {
+          TF_LITE_REPORT_ERROR(error_reporter,
+                               "Nbr of offline buffer offsets (%d) in metadata "
+                               "not equal nbr tensors (%d)\n",
+                               nbr_offline_offsets, nbr_tflite_tensors);
+          return kTfLiteError;
+        }
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+#endif
+
 // A helper class to construct AllocationInfo array. This array contains the
 // lifetime of tensors / scratch_buffer and will be used to calculate the memory
 // plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
@ -94,9 +162,17 @@ class AllocationInfoBuilder {
    return Allocate();
  }

+  // Check if model contains offline planned buffer offsets.
+  //  - If there's no metadata available, offline_planner_offsets is not set
+  //  - If there's metadata available, offline_planner_offsets will point to the
+  //    first offset in the metadata buffer list.
+  TfLiteStatus GetOfflinePlannedOffsets(const Model* model,
+                                        int32_t** offline_planner_offsets);
+
  // Add allocaiton information for the tensors.
-  TfLiteStatus AddTensors(const SubGraph* subgraph,
+  TfLiteStatus AddTensors(const SubGraph* subgraph, int32_t* offline_offsets,
                          TfLiteTensor* runtime_tensors);
+
  // Add allocation information for the scratch buffers.
  TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles);

@ -130,6 +206,7 @@ TfLiteStatus AllocationInfoBuilder::Allocate() {
 }

 TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
+                                               int32_t* offline_offsets,
                                               TfLiteTensor* runtime_tensors) {
  // Set up allocation info for all tensors.
  for (size_t i = 0; i < tensor_count_; ++i) {
@ -141,6 +218,11 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
    current->last_used = -1;
    current->needs_allocating = (runtime_tensors[i].data.data == nullptr) &&
                                (!subgraph->tensors()->Get(i)->is_variable());
+    if (offline_offsets) {
+      current->offline_offset = offline_offsets[i];
+    } else {
+      current->offline_offset = kOnlinePlannedBuffer;
+    }
  }

  for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
@ -198,6 +280,52 @@ TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
  return kTfLiteOk;
 }

+// The tensor offsets will be encoded in the metadata:[Metadata] field of the
+// Model. The following encoding applies:
+//
+// | Metadata component |                 Value                                |
+// |    name:string     | “OfflineMemoryAllocation”                            |
+// |    buffer:unit     | Index of buffer containing memory allocation data    |
+//
+// The buffer contents for the memory allocation is a list of 32-bit integers.
+// The number of tensors, n, must be equal to the number of tensors defined in
+// the model. The following encoding applies:
+//
+// |  Offset |                            Value                                |
+// |    0    | Offline allocation format version – set to 0                    |
+// |    1    | Subgraph index to which this allocation applies                 |
+// |    2    | Number offsets following: n                                     |
+// |    3    | Arena byte offset of tensor #0 or -1 to allocate at runtime     |
+// |    4    | Arena byte offset of tensor #1 or -1 to allocate at runtime     |
+// | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
+TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
+    const Model* model, int32_t** offline_planner_offsets) {
+  if (model->metadata()) {
+    for (size_t i = 0; i < model->metadata()->size(); ++i) {
+      auto metadata = model->metadata()->Get(i);
+      if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
+                  strlen(kOfflineMemAllocMetadata)) == 0) {
+        const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
+            model->buffers();
+        auto* buffer = (*buffers)[metadata->buffer()];
+        auto* array = buffer->data();
+        const uint32_t* metadata_buffer = (uint32_t*)array->data();
+        const size_t nbr_tensors = (size_t)metadata_buffer[2];
+        *offline_planner_offsets = (int32_t*)&metadata_buffer[3];
+
+        if (tensor_count_ != nbr_tensors) {
+          TF_LITE_REPORT_ERROR(reporter_,
+                               "Nbr of offline buffer offsets (%d) in metadata "
+                               "not equal nbr tensors (%d)\n",
+                               nbr_tensors, tensor_count_);
+          return kTfLiteError;
+        }
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
 TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
    internal::ScratchBufferHandle* buffer_handles) {
  // Set up allocation info for buffers.
@ -210,11 +338,13 @@ TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
    current->first_created = handle->node_idx;
    current->last_used = handle->node_idx;
    current->needs_allocating = true;
+    current->offline_offset = kOnlinePlannedBuffer;
  }
  return kTfLiteOk;
 }

-TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
+TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
+                        GreedyMemoryPlanner* planner,
                        const AllocationInfo* allocation_info,
                        size_t allocation_info_size) {
  // Add the tensors to our allocation plan.
@ -223,9 +353,15 @@ TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
    if (current->needs_allocating) {
      size_t aligned_bytes_required =
          AlignSizeUp(current->bytes, kBufferAlignment);
+      if (current->offline_offset == kOnlinePlannedBuffer) {
        TF_LITE_ENSURE_STATUS(
            planner->AddBuffer(error_reporter, aligned_bytes_required,
                               current->first_created, current->last_used));
+      } else {
+        TF_LITE_ENSURE_STATUS(planner->AddBuffer(
+            error_reporter, aligned_bytes_required, current->first_created,
+            current->last_used, current->offline_offset));
+      }
    }
  }
  return kTfLiteOk;
@ -466,7 +602,6 @@ TfLiteStatus MicroAllocator::StartModelAllocation(

  const SubGraph* subgraph = GetSubGraphFromModel(model);
  TFLITE_DCHECK(subgraph != nullptr);
-
  model_is_allocating_ = true;

  TF_LITE_ENSURE_STATUS(
@ -491,7 +626,7 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
  const SubGraph* subgraph = GetSubGraphFromModel(model);
  TFLITE_DCHECK(subgraph != nullptr);

-  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(context, subgraph));
+  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, context, subgraph));
  TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph));

  model_is_allocating_ = false;
@ -739,7 +874,8 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
  return (*subgraphs)[0];
 }

-TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
+TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model,
+                                                    TfLiteContext* context,
                                                    const SubGraph* subgraph) {
  // Create static memory plan
  // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
@ -756,7 +892,13 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
    AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
    TF_LITE_ENSURE_STATUS(
        builder.Init(subgraph->tensors()->size(), scratch_buffer_count_));
-    TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, context->tensors));
+
+    int32_t* offline_planner_offsets = nullptr;
+    TF_LITE_ENSURE_STATUS(
+        builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
+    TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, offline_planner_offsets,
+                                             context->tensors));
+
    TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
    const AllocationInfo* allocation_info = builder.Finish();

--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@ -189,7 +189,8 @@ class MicroAllocator {

  // Commits a memory plan for all non-persistent buffer allocations in the
  // 'head' section of the memory arena.
-  virtual TfLiteStatus CommitStaticMemoryPlan(TfLiteContext* context,
+  virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
+                                              TfLiteContext* context,
                                              const SubGraph* subgraph);

  // A simple memory allocator that always allocate from the arena tail or head.
--- a/tensorflow/lite/micro/micro_allocator_test.cc
+++ b/tensorflow/lite/micro/micro_allocator_test.cc
@ -253,10 +253,10 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
  // bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors.
  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
  // t1 can't reuse any memory, as n0 requires both t0 and t1.
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
  // t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires
  // both t1 and t2.
-  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
  // t3 reuses the same memory from t0 as t0 is not an input to any node.
  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);

@ -308,4 +308,218 @@ TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) {
                                                       /*count=*/3);
 }

+TF_LITE_MICRO_TEST(OfflinePlannerBranchesAllOnline) {
+  int version = 1;
+  int subgraph = 0;
+  constexpr int nbr_tensors = 4;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
+                                nbr_tensors] = {version, subgraph,
+                                                nbr_tensors,  // header
+                                                // memory offsets:
+                                                -1, -1, -1, -1};
+
+  // The structure is identical to the one in
+  // TestAllocationForModelsWithBranches
+  int num_conns = 3;
+  tflite::testing::NodeConnection node_list[3] = {{
+                                                      {0},  // input
+                                                      {1}   // output
+                                                  },
+                                                  {
+                                                      {0},  // input
+                                                      {2}   // output
+                                                  },
+                                                  {
+                                                      {1, 2},  // input1, input2
+                                                      {3}      // output
+                                                  }};
+
+  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
+      nbr_tensors, metadata_buffer, node_list, num_conns);
+
+  TfLiteContext context;
+  constexpr size_t arena_size = 4096;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));
+
+  // Since all of the tensors are online planned and the model structure is
+  // identical to that in TestAllocationForModelsWithBranches,
+  // the offsets be should identical to that test.
+  uint8_t* start = context.tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
+  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+}
+
+TF_LITE_MICRO_TEST(OfflinePlannerBasic) {
+  constexpr int nbr_tensors = 4;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
+                                nbr_tensors] = {1,  0, nbr_tensors,
+                                                0,    // t0
+                                                48,   // t1
+                                                0,    // t2
+                                                48};  // t3
+
+  int t0 = 0;
+  int t1 = 1;
+  int t2 = 2;
+  int t3 = 3;
+
+  int num_conns = 3;
+  tflite::testing::NodeConnection node_list[3] = {{
+                                                      {t0},  // input
+                                                      {t1}   // output
+                                                  },
+                                                  {
+                                                      {t1},  // input
+                                                      {t2}   // output
+                                                  },
+                                                  {
+                                                      {t2},  // input
+                                                      {t3}   // output
+                                                  }};
+
+  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
+      nbr_tensors, metadata_buffer, node_list, num_conns);
+
+  TfLiteContext context;
+  constexpr size_t arena_size = 4096;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));
+
+  uint8_t* start = context.tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[3].data.uint8 - start);
+}
+
+TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) {
+  constexpr int nbr_tensors = 4;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
+                                nbr_tensors] = {
+      1, 0, nbr_tensors,  // header: version, subgraph, nbr tensors
+      // memory offsets:
+      0,    // t0
+      0,    // t1
+      48,   // t2
+      -1};  // t3
+
+  int t0 = 0;
+  int t1 = 1;
+  int t2 = 2;
+  int t3 = 3;
+
+  int num_conns = 2;
+  tflite::testing::NodeConnection node_list[2] = {
+      {
+          {t0, t1},  // input, scratch
+          {t2}       // output
+      },
+      {
+          {t2},  // input
+          {t3}   // output
+      },
+  };
+
+  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
+      nbr_tensors, metadata_buffer, node_list, num_conns);
+
+  TfLiteContext context;
+  constexpr size_t arena_size = 4096;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));
+
+  uint8_t* start = context.tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes);
+}
+
+TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) {
+  constexpr int nbr_tensors = 5;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  const int32_t metadata_buffer[tflite::testing::kOfflinePlannerHeaderSize +
+                                nbr_tensors] = {
+      1, 0, nbr_tensors,  // header: version, subgraph, nbr tensors
+      // memory offsets:
+      0,    // t0
+      48,   // t1
+      -1,   // t2
+      0,    // t3
+      -1};  // t4
+
+  int t0 = 0;
+  int t1 = 1;
+  int t2 = 2;
+  int t3 = 3;
+  int t4 = 4;
+
+  int num_conns = 2;
+  tflite::testing::NodeConnection node_list[2] = {
+      {
+          {t0, t1},  // input, scratch
+          {t2},      // output
+      },
+      {
+          {t2},      // input
+          {t3, t4},  // output1, output2
+      },
+  };
+
+  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
+      nbr_tensors, metadata_buffer, node_list, num_conns);
+
+  TfLiteContext context;
+  constexpr size_t arena_size = 4096;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
+
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));
+
+  uint8_t* start = context.tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[4].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+}
+
 TF_LITE_MICRO_TESTS_END
--- a/tensorflow/lite/micro/micro_optional_debug_tools.cc
+++ b/tensorflow/lite/micro/micro_optional_debug_tools.cc
@ -27,6 +27,7 @@ limitations under the License.

 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/micro_allocator.h"
 #include "tensorflow/lite/micro/micro_interpreter.h"
 #include "tensorflow/lite/schema/schema_generated.h"
@ -111,6 +112,33 @@ const char* AllocTypeName(TfLiteAllocationType type) {
 }
 }  // namespace

+// Helper function to print model flatbuffer data. This function is not called
+// by default. Hence it's not linked in to the final binary code.
+void PrintModelData(const Model* model, ErrorReporter* error_reporter) {
+  auto* subgraphs = model->subgraphs();
+  const SubGraph* subgraph = (*subgraphs)[0];
+  const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
+      subgraph->tensors();
+  const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
+      model->buffers();
+  TF_LITE_REPORT_ERROR(error_reporter, "==== Model info: =====");
+  for (size_t i = 0; i < tensors->size(); ++i) {
+    const tflite::Tensor& flatbuffer_tensor = *tensors->Get(i);
+    size_t type_size, tensor_size;
+    auto* buffer = (*buffers)[flatbuffer_tensor.buffer()];
+    auto* array = buffer->data();
+    int array_size = 0;
+    if (array) {
+      array_size = array->size();
+    }
+    BytesRequiredForTensor(flatbuffer_tensor, &tensor_size, &type_size,
+                           error_reporter);
+    TF_LITE_REPORT_ERROR(
+        error_reporter, "Tensor index: %d arena tensor %d size %d ", i,
+        !array_size && !flatbuffer_tensor.is_variable(), tensor_size);
+  }
+}
+
 // Prints a dump of what tensors and what nodes are in the interpreter.
 void PrintInterpreterState(MicroInterpreter* interpreter) {
  printf("Interpreter has %zu tensors and %zu nodes\n",
--- a/tensorflow/lite/micro/micro_optional_debug_tools.h
+++ b/tensorflow/lite/micro/micro_optional_debug_tools.h
@ -20,6 +20,9 @@ limitations under the License.
 #include "tensorflow/lite/micro/micro_interpreter.h"

 namespace tflite {
+// Helper function to print model flatbuffer data. This function is not called
+// by default. Hence it's not linked in to the final binary code.
+void PrintModelData(const Model* model, ErrorReporter* error_reporter);
 // Prints a dump of what tensors and what nodes are in the interpreter.
 void PrintInterpreterState(MicroInterpreter* interpreter);
 }  // namespace tflite
--- a/tensorflow/lite/micro/test_helpers.cc
+++ b/tensorflow/lite/micro/test_helpers.cc
@ -55,7 +55,7 @@ class StackAllocator : public flatbuffers::Allocator {
    return *inst;
  }

-  static constexpr size_t kStackAllocatorSize = 4096;
+  static constexpr size_t kStackAllocatorSize = 8192;

 private:
  uint8_t data_backing_[kStackAllocatorSize];
@ -101,6 +101,9 @@ class ModelBuilder {
  Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
               std::initializer_list<Tensor> outputs);

+  void AddMetadata(const char* description_string,
+                   const int32_t* metadata_buffer_data, size_t num_elements);
+
  // Constructs the flatbuffer model using `builder_` and return a pointer to
  // it. The returned model has the same lifetime as `builder_`.
  const Model* BuildModel(std::initializer_list<Tensor> inputs,
@ -123,6 +126,16 @@ class ModelBuilder {

  static constexpr int kMaxTensors = 50;
  flatbuffers::Offset<tflite::Tensor> tensors_[kMaxTensors];
+
+  static constexpr int kMaxMetadataBuffers = 10;
+
+  static constexpr int kMaxMetadatas = 10;
+  flatbuffers::Offset<Metadata> metadata_[kMaxMetadatas];
+
+  flatbuffers::Offset<Buffer> metadata_buffers_[kMaxMetadataBuffers];
+
+  int nbr_of_metadata_buffers_ = 0;
+
  int next_tensor_id_ = 0;
 };

@ -149,13 +162,33 @@ ModelBuilder::Node ModelBuilder::AddNode(
  return next_operator_id_ - 1;
 }

+void ModelBuilder::AddMetadata(const char* description_string,
+                               const int32_t* metadata_buffer_data,
+                               size_t num_elements) {
+  metadata_[ModelBuilder::nbr_of_metadata_buffers_] =
+      CreateMetadata(*builder_, builder_->CreateString(description_string),
+                     1 + ModelBuilder::nbr_of_metadata_buffers_);
+
+  metadata_buffers_[nbr_of_metadata_buffers_] = tflite::CreateBuffer(
+      *builder_, builder_->CreateVector((uint8_t*)metadata_buffer_data,
+                                        sizeof(uint32_t) * num_elements));
+
+  ModelBuilder::nbr_of_metadata_buffers_++;
+}
+
 const Model* ModelBuilder::BuildModel(
    std::initializer_list<ModelBuilder::Tensor> inputs,
    std::initializer_list<ModelBuilder::Tensor> outputs) {
  // Model schema requires an empty buffer at idx 0.
-  constexpr size_t kBufferSize = 1;
-  const flatbuffers::Offset<Buffer> buffers[kBufferSize] = {
-      tflite::CreateBuffer(*builder_)};
+  size_t buffer_size = 1 + ModelBuilder::nbr_of_metadata_buffers_;
+  flatbuffers::Offset<Buffer> buffers[kMaxMetadataBuffers];
+  buffers[0] = tflite::CreateBuffer(*builder_);
+
+  // Place the metadata buffers first in the buffer since the indices for them
+  // have already been set in AddMetadata()
+  for (int i = 1; i < ModelBuilder::nbr_of_metadata_buffers_ + 1; ++i) {
+    buffers[i] = metadata_buffers_[i - 1];
+  }

  // TFLM only supports single subgraph.
  constexpr size_t subgraphs_size = 1;
@ -166,12 +199,26 @@ const Model* ModelBuilder::BuildModel(
          builder_->CreateVector(outputs.begin(), outputs.size()),
          builder_->CreateVector(operators_, next_operator_id_),
          builder_->CreateString("test_subgraph"))};
-  const flatbuffers::Offset<Model> model_offset = tflite::CreateModel(
+
+  flatbuffers::Offset<Model> model_offset;
+  if (ModelBuilder::nbr_of_metadata_buffers_ > 0) {
+    model_offset = tflite::CreateModel(
        *builder_, 0,
        builder_->CreateVector(operator_codes_, next_operator_code_id_),
        builder_->CreateVector(subgraphs, subgraphs_size),
        builder_->CreateString("teset_model"),
-      builder_->CreateVector(buffers, kBufferSize));
+        builder_->CreateVector(buffers, buffer_size), 0,
+        builder_->CreateVector(metadata_,
+                               ModelBuilder::nbr_of_metadata_buffers_));
+  } else {
+    model_offset = tflite::CreateModel(
+        *builder_, 0,
+        builder_->CreateVector(operator_codes_, next_operator_code_id_),
+        builder_->CreateVector(subgraphs, subgraphs_size),
+        builder_->CreateString("teset_model"),
+        builder_->CreateVector(buffers, buffer_size));
+  }
+
  tflite::FinishModelBuffer(*builder_, model_offset);
  void* model_pointer = builder_->GetBufferPointer();
  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
@ -250,6 +297,35 @@ const Model* BuildSimpleModelWithBranch() {
  return model_builder.BuildModel({t0}, {t3});
 }

+const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
+                                           const int32_t* metadata_buffer,
+                                           NodeConnection* node_conn,
+                                           int num_conns) {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
+
+  ModelBuilder model_builder(fb_builder);
+
+  const int op_id =
+      model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom",
+                               /* version= */ 0);
+
+  for (int i = 0; i < number_of_tensors; ++i) {
+    model_builder.AddTensor(TensorType_FLOAT32, {2, 2, 3});
+  }
+
+  for (int i = 0; i < num_conns; ++i) {
+    model_builder.AddNode(op_id, node_conn[i].input, node_conn[i].output);
+  }
+
+  model_builder.AddMetadata(
+      "OfflineMemoryAllocation", metadata_buffer,
+      number_of_tensors + tflite::testing::kOfflinePlannerHeaderSize);
+
+  return model_builder.BuildModel(node_conn[0].input,
+                                  node_conn[num_conns - 1].output);
+}
+
 const Model* BuildSimpleMockModel() {
  using flatbuffers::Offset;
  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@ -638,6 +714,15 @@ const Model* GetSimpleModelWithBranch() {
  return model;
 }

+const Model* GetModelWithOfflinePlanning(int num_tensors,
+                                         const int32_t* metadata_buffer,
+                                         NodeConnection* node_conn,
+                                         int num_conns) {
+  const Model* model = BuildModelWithOfflinePlanning(
+      num_tensors, metadata_buffer, node_conn, num_conns);
+  return model;
+}
+
 const Model* GetSimpleStatefulModel() {
  static Model* model = nullptr;
  if (!model) {
--- a/tensorflow/lite/micro/test_helpers.h
+++ b/tensorflow/lite/micro/test_helpers.h
@ -30,6 +30,14 @@ limitations under the License.
 namespace tflite {
 namespace testing {

+constexpr int kOfflinePlannerHeaderSize = 3;
+
+struct NodeConnection_ {
+  std::initializer_list<int32_t> input;
+  std::initializer_list<int32_t> output;
+};
+typedef struct NodeConnection_ NodeConnection;
+
 // A simple operator that returns the median of the input with the number of
 // times the kernel was invoked. The implementation below is deliberately
 // complicated, just to demonstrate how kernel memory planning works.
@ -82,6 +90,12 @@ const Model* GetComplexMockModel();
 // Returns a simple flatbuffer model with two branches.
 const Model* GetSimpleModelWithBranch();

+// Returns a simple flatbuffer model with offline planned tensors
+const Model* GetModelWithOfflinePlanning(int num_tensors,
+                                         const int32_t* metadata_buffer,
+                                         NodeConnection* node_conn,
+                                         int num_conns);
+
 // Returns a flatbuffer model with `simple_stateful_op`
 const Model* GetSimpleStatefulModel();