Decouple the model and TfLiteContext instance from the allocator and interpreter.

This change simplifies the interaction between the MicroInterpreter and MicroAllocator. All allocation for a given model is staged in MicroAllocator.StartModelAllocation() and MicroAllocator.FinishModelAllocation(). This change prepares for two upcoming features: 1.) Multi-tenant memory arena 2.) An easy-to-use RecordingMicroInterpreter to allow auditing of recorded memory arena allocations. PiperOrigin-RevId: 315736762 Change-Id: Ia9da1f6edcd1001e3aad975c117905054f172e18
2020-06-10 11:55:17 -07:00 · 2020-06-10 11:55:17 -07:00 · 26ee75e596
commit 26ee75e596
parent 0fecf6f89f
7 changed files with 334 additions and 261 deletions
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@ -434,20 +434,15 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(

 }  // namespace internal

-MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
-                               SimpleMemoryAllocator* memory_allocator,
+MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator,
                               ErrorReporter* error_reporter)
    : memory_allocator_(memory_allocator),
-      model_(model),
-      context_(context),
      error_reporter_(error_reporter),
-      active_(false) {}
+      model_is_allocating_(false) {}

 MicroAllocator::~MicroAllocator() {}

-MicroAllocator* MicroAllocator::Create(TfLiteContext* context,
-                                       const Model* model,
-                                       uint8_t* tensor_arena, size_t arena_size,
+MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
                                       ErrorReporter* error_reporter) {
  uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
  if (aligned_arena != tensor_arena) {
@ -458,112 +453,69 @@ MicroAllocator* MicroAllocator::Create(TfLiteContext* context,
        aligned_arena - tensor_arena);
  }
  size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
-  return Create(context, model,
-                SimpleMemoryAllocator::Create(error_reporter, aligned_arena,
+  return Create(SimpleMemoryAllocator::Create(error_reporter, aligned_arena,
                                              aligned_arena_size),
                error_reporter);
 }

-MicroAllocator* MicroAllocator::Create(TfLiteContext* context,
-                                       const Model* model,
-                                       SimpleMemoryAllocator* memory_allocator,
+MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
                                       ErrorReporter* error_reporter) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(model != nullptr);
  TFLITE_DCHECK(memory_allocator != nullptr);
  TFLITE_DCHECK(error_reporter != nullptr);

  uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
      sizeof(MicroAllocator), alignof(MicroAllocator));
-  MicroAllocator* allocator = new (allocator_buffer)
-      MicroAllocator(context, model, memory_allocator, error_reporter);
-  if (allocator->InitGraphAndContextTensorData() != kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(error_reporter,
-                         "MicroAllocator: Failed to initialize model graph.");
-    return nullptr;
-  }
+  MicroAllocator* allocator =
+      new (allocator_buffer) MicroAllocator(memory_allocator, error_reporter);
  return allocator;
 }

-TfLiteStatus MicroAllocator::PrepareFromFlatbuffer(
+TfLiteStatus MicroAllocator::StartModelAllocation(
+    const Model* model, TfLiteContext* context,
    const MicroOpResolver& op_resolver,
    NodeAndRegistration** node_and_registrations) {
-  if (!active_) {
+  TFLITE_DCHECK(model != nullptr);
+  TFLITE_DCHECK(context != nullptr);
+
+  if (model_is_allocating_) {
+    TF_LITE_REPORT_ERROR(error_reporter_,
+                         "MicroAllocator: Model allocation started before "
+                         "finishing previously allocated model");
    return kTfLiteError;
  }
-  TF_LITE_ENSURE_STATUS(AllocateNodeAndRegistrations(node_and_registrations));
+
+  const SubGraph* subgraph = GetSubGraphFromModel(model);
+  TFLITE_DCHECK(subgraph != nullptr);
+
+  model_is_allocating_ = true;
+
+  TF_LITE_ENSURE_STATUS(
+      InitGraphAndContextTensorData(model, context, subgraph));
+  TF_LITE_ENSURE_STATUS(
+      AllocateNodeAndRegistrations(subgraph, node_and_registrations));
  TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
-      op_resolver, *node_and_registrations));
+      model, subgraph, op_resolver, *node_and_registrations));
+
  return kTfLiteOk;
 }

-TfLiteStatus MicroAllocator::FinishTensorAllocation() {
-  if (!active_) {
+TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
+                                                   TfLiteContext* context) {
+  if (!model_is_allocating_) {
+    TF_LITE_REPORT_ERROR(error_reporter_,
+                         "MicroAllocator: Model allocation finished before "
+                         "starting allocating model");
    return kTfLiteError;
  }

-  // Create static memory plan
-  // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
-  // 2. Add them into the planner (such as the GreedyMemoryPlanner).
-  // 3. Static memory planning using the planner.
-  // 4. Set tensor/buffer pointers based on the offsets from the previous step.
-  // Note that AllocationInfo is only needed for creating the plan. It will be
-  // thrown away when the child allocator (tmp_allocator) goes out of scope.
-  {
-    SimpleMemoryAllocator tmp_allocator(error_reporter_,
-                                        memory_allocator_->GetHead(),
-                                        memory_allocator_->GetTail());
+  const SubGraph* subgraph = GetSubGraphFromModel(model);
+  TFLITE_DCHECK(subgraph != nullptr);

-    AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
-    TF_LITE_ENSURE_STATUS(
-        builder.Init(subgraph_->tensors()->size(), scratch_buffer_count_));
-    TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph_, context_->tensors));
-    TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
-    const AllocationInfo* allocation_info = builder.Finish();
+  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(subgraph, context));
+  TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph->tensors(), context->tensors,
+                                          memory_allocator_));

-    // Remaining arena size that memory planner can use for calculating offsets.
-    size_t remaining_arena_size = tmp_allocator.GetAvailableMemory();
-    uint8_t* planner_arena =
-        tmp_allocator.AllocateFromHead(remaining_arena_size, /*alignment=*/1);
-    TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
-    GreedyMemoryPlanner planner(planner_arena, remaining_arena_size);
-    TF_LITE_ENSURE_STATUS(
-        CreatePlan(error_reporter_, &planner, allocation_info, builder.Size()));
-
-    size_t actual_available_arena_size =
-        memory_allocator_->GetAvailableMemory();
-    // Make sure we have enough arena size.
-    if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
-      TF_LITE_REPORT_ERROR(
-          error_reporter_,
-          "Arena size is too small for activation buffers. Needed %d but only "
-          "%d was available.",
-          planner.GetMaximumMemorySize(), actual_available_arena_size);
-      return kTfLiteError;
-    }
-
-    // Commit the plan.
-    TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
-                                     memory_allocator_->GetHead(),
-                                     allocation_info, builder.Size()));
-    // Allocate the planned area, so the allocator knows it's used.
-    uint8_t* allocated_tensor_memory =
-        memory_allocator_->AllocateFromHead(planner.GetMaximumMemorySize(),
-                                            /*alignment=*/1);
-    TF_LITE_ENSURE(error_reporter_, allocated_tensor_memory != nullptr);
-  }
-
-  // Data in variables need to be kept for the next invocation so allocating
-  // them from the tail (persistent area).
-  if (AllocateVariables(subgraph_->tensors(), context_->tensors,
-                        memory_allocator_) != kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Failed to allocate variables. Please increase arena size.");
-    return kTfLiteError;
-  }
-
-  active_ = false;
+  model_is_allocating_ = false;
  return kTfLiteOk;
 }

@ -629,50 +581,32 @@ void* MicroAllocator::GetScratchBuffer(int buffer_idx) const {
 }

 size_t MicroAllocator::used_bytes() const {
-  if (active_) {
-    return 0;
-  }
  return memory_allocator_->GetUsedBytes();
 }

-TfLiteStatus MicroAllocator::InitGraphAndContextTensorData() {
-  auto* subgraphs = model_->subgraphs();
-  if (subgraphs->size() != 1) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Only 1 subgraph is currently supported.\n");
-    return kTfLiteError;
-  }
-  subgraph_ = (*subgraphs)[0];
-
-  TF_LITE_ENSURE_STATUS(AllocateTfLiteTensorArray());
-  TF_LITE_ENSURE_STATUS(PopulateTfLiteTensorArrayFromFlatbuffer());
-
-  active_ = true;
-  return kTfLiteOk;
-}
-
-TfLiteStatus MicroAllocator::AllocateTfLiteTensorArray() {
-  context_->tensors_size = subgraph_->tensors()->size();
-  context_->tensors =
+TfLiteStatus MicroAllocator::AllocateTfLiteTensorArray(
+    TfLiteContext* context, const SubGraph* subgraph) {
+  context->tensors_size = subgraph->tensors()->size();
+  context->tensors =
      reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
-          sizeof(TfLiteTensor) * context_->tensors_size,
-          alignof(TfLiteTensor)));
-  if (context_->tensors == nullptr) {
+          sizeof(TfLiteTensor) * context->tensors_size, alignof(TfLiteTensor)));
+  if (context->tensors == nullptr) {
    TF_LITE_REPORT_ERROR(
        error_reporter_,
        "Failed to allocate memory for context->tensors, %d bytes required",
-        sizeof(TfLiteTensor) * context_->tensors_size);
+        sizeof(TfLiteTensor) * context->tensors_size);
    return kTfLiteError;
  }
  return kTfLiteOk;
 }

-TfLiteStatus MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer() {
+TfLiteStatus MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer(
+    const Model* model, TfLiteContext* context, const SubGraph* subgraph) {
  // Initialize tensors in context_ using the flatbuffer for quantization data.
-  for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
+  for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
    TfLiteStatus status = internal::InitializeTfLiteTensorFromFlatbuffer(
-        memory_allocator_, *subgraph_->tensors()->Get(i), model_->buffers(),
-        error_reporter_, &context_->tensors[i]);
+        memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
+        error_reporter_, &context->tensors[i]);
    if (status != kTfLiteOk) {
      TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
                           i);
@ -683,10 +617,10 @@ TfLiteStatus MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer() {
 }

 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
-    NodeAndRegistration** node_and_registrations) {
+    const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) {
  NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
      memory_allocator_->AllocateFromTail(
-          sizeof(NodeAndRegistration) * subgraph_->operators()->size(),
+          sizeof(NodeAndRegistration) * subgraph->operators()->size(),
          alignof(NodeAndRegistration)));
  if (output == nullptr) {
    TF_LITE_REPORT_ERROR(
@ -699,13 +633,14 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
 }

 TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
+    const Model* model, const SubGraph* subgraph,
    const MicroOpResolver& op_resolver,
    NodeAndRegistration* node_and_registrations) {
  TfLiteStatus status = kTfLiteOk;
-  auto* opcodes = model_->operator_codes();
+  auto* opcodes = model->operator_codes();
  MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    const auto* op = subgraph_->operators()->Get(i);
+  for (size_t i = 0; i < subgraph->operators()->size(); ++i) {
+    const auto* op = subgraph->operators()->Get(i);
    const size_t index = op->opcode_index();
    if (index >= opcodes->size()) {
      TF_LITE_REPORT_ERROR(error_reporter_,
@ -781,16 +716,81 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
  }

  return kTfLiteOk;
-}  // namespace tflite
-
-size_t MicroAllocator::GetTensorsCount() const {
-  return context_->tensors_size;
-}
-
-size_t MicroAllocator::GetOperatorsCount() const {
-  return subgraph_->operators()->size();
 }

 ErrorReporter* MicroAllocator::error_reporter() { return error_reporter_; }

+TfLiteStatus MicroAllocator::InitGraphAndContextTensorData(
+    const Model* model, TfLiteContext* context, const SubGraph* subgraph) {
+  TF_LITE_ENSURE_STATUS(AllocateTfLiteTensorArray(context, subgraph));
+  TF_LITE_ENSURE_STATUS(
+      PopulateTfLiteTensorArrayFromFlatbuffer(model, context, subgraph));
+  return kTfLiteOk;
+}
+
+const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
+  auto* subgraphs = model->subgraphs();
+  if (subgraphs->size() != 1) {
+    TF_LITE_REPORT_ERROR(error_reporter_,
+                         "Only 1 subgraph is currently supported.\n");
+    return nullptr;
+  }
+  return (*subgraphs)[0];
+}
+
+TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const SubGraph* subgraph,
+                                                    TfLiteContext* context) {
+  // Create static memory plan
+  // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
+  // 2. Add them into the planner (such as the GreedyMemoryPlanner).
+  // 3. Static memory planning using the planner.
+  // 4. Set tensor/buffer pointers based on the offsets from the previous step.
+  // Note that AllocationInfo is only needed for creating the plan. It will be
+  // thrown away when the child allocator (tmp_allocator) goes out of scope.
+  {
+    SimpleMemoryAllocator tmp_allocator(error_reporter_,
+                                        memory_allocator_->GetHead(),
+                                        memory_allocator_->GetTail());
+
+    AllocationInfoBuilder builder(error_reporter_, &tmp_allocator);
+    TF_LITE_ENSURE_STATUS(
+        builder.Init(subgraph->tensors()->size(), scratch_buffer_count_));
+    TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, context->tensors));
+    TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
+    const AllocationInfo* allocation_info = builder.Finish();
+
+    // Remaining arena size that memory planner can use for calculating offsets.
+    size_t remaining_arena_size = tmp_allocator.GetAvailableMemory();
+    uint8_t* planner_arena =
+        tmp_allocator.AllocateFromHead(remaining_arena_size, /*alignment=*/1);
+    TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
+    GreedyMemoryPlanner planner(planner_arena, remaining_arena_size);
+    TF_LITE_ENSURE_STATUS(
+        CreatePlan(error_reporter_, &planner, allocation_info, builder.Size()));
+
+    size_t actual_available_arena_size =
+        memory_allocator_->GetAvailableMemory();
+    // Make sure we have enough arena size.
+    if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
+      TF_LITE_REPORT_ERROR(
+          error_reporter_,
+          "Arena size is too small for activation buffers. Needed %d but only "
+          "%d was available.",
+          planner.GetMaximumMemorySize(), actual_available_arena_size);
+      return kTfLiteError;
+    }
+
+    // Commit the plan.
+    TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
+                                     memory_allocator_->GetHead(),
+                                     allocation_info, builder.Size()));
+    // Allocate the planned area, so the allocator knows it's used.
+    uint8_t* allocated_tensor_memory =
+        memory_allocator_->AllocateFromHead(planner.GetMaximumMemorySize(),
+                                            /*alignment=*/1);
+    TF_LITE_ENSURE(error_reporter_, allocated_tensor_memory != nullptr);
+  }
+  return kTfLiteOk;
+}
+
 }  // namespace tflite
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@ -92,32 +92,32 @@ class MicroAllocator {
  // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
  // bytes aligned, otherwise some head room will be wasted.
  // TODO(b/157615197): Cleanup constructor + factory usage.
-  static MicroAllocator* Create(TfLiteContext* context, const Model* model,
-                                uint8_t* tensor_arena, size_t arena_size,
+  static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
                                ErrorReporter* error_reporter);

  // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
  // intance. This allocator instance will use the SimpleMemoryAllocator
  // instance to manage allocations internally.
-  static MicroAllocator* Create(TfLiteContext* context, const Model* model,
-                                SimpleMemoryAllocator* memory_allocator,
+  static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
                                ErrorReporter* error_reporter);

-  // Run through the model flatbuffer data (loaded from the TfLiteModel
-  // instance) to allocate nodes and registrations. We need to keep them for the
-  // entire life time of the model to allow persistent tensors. This method
-  // needs to be called before FinishTensorAllocation method. This method also
-  // allocates any internal Op data that is required from the flatbuffer.
-  TfLiteStatus PrepareFromFlatbuffer(
+  // Begin allocating internal resources required for model inference.
+  // This method will run through the flatbuffer data supplied in the model to
+  // properly allocate tensor, node, and op registration data. This method is
+  // expected to be followed with a call to FinishModelAllocation() before
+  // resuming allocation with another model.
+  TfLiteStatus StartModelAllocation(
+      const Model* model, TfLiteContext* context,
      const MicroOpResolver& op_resolver,
      NodeAndRegistration** node_and_registrations);

-  // Runs through the model and allocates all necessary input, output and
-  // intermediate tensors.
-  // WARNING: doing any allocation after calling this method has the risk of
-  // corrupting tensor data so this method should be the last non-const method
-  // called in this class.
-  TfLiteStatus FinishTensorAllocation();
+  // Finish allocating internal resources required for model inference.
+  // This method will plan non-persistent buffers and commit a memory plan to
+  // the 'head' section of the memory arena. All variable tensor data will also
+  // be allocated. This method should be called after assigning model resources
+  // in StartModelAllocation().
+  TfLiteStatus FinishModelAllocation(const Model* model,
+                                     TfLiteContext* context);

  // Allocates persistent buffer which has the same life time as the allocator.
  // The memory is immediately available and is allocated from the tail of the
@ -140,56 +140,59 @@ class MicroAllocator {
  size_t used_bytes() const;

 protected:
-  MicroAllocator(TfLiteContext* context, const Model* model,
-                 SimpleMemoryAllocator* memory_allocator,
+  MicroAllocator(SimpleMemoryAllocator* memory_allocator,
                 ErrorReporter* error_reporter);
  virtual ~MicroAllocator();

  // Allocates an array in the arena to hold pointers to the tensors required
  // to initialize and prepare a model. These allocations are stored and
  // populated on the context.
-  virtual TfLiteStatus AllocateTfLiteTensorArray();
+  virtual TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context,
+                                                 const SubGraph* subgraph);

  // Populates content on the list of tensor pointers required to initialize and
  // prepare a model from data in the flatbuffer (loaded from the TfLiteModel
  // instance). Persistent data (e.g. quantization params) is allocated from the
  // arena.
-  virtual TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer();
+  virtual TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer(
+      const Model* model, TfLiteContext* context, const SubGraph* subgraph);

  // Allocates an array in the arena to hold pointers to the node and
  // registration pointers required to represent the inference graph of the
  // model.
  virtual TfLiteStatus AllocateNodeAndRegistrations(
-      NodeAndRegistration** node_and_registrations);
+      const SubGraph* subgraph, NodeAndRegistration** node_and_registrations);

  // Populates node and registration pointers representing the inference graph
  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
  // instance). Persistent data (e.g. operator data) is allocated from the
  // arena.
  virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
+      const Model* model, const SubGraph* subgraph,
      const MicroOpResolver& op_resolver,
      NodeAndRegistration* node_and_registrations);

-  // Returns the number of tensors in the model subgraph.
-  size_t GetTensorsCount() const;
-
-  // Returns the number of operators in the model subgraph.
-  size_t GetOperatorsCount() const;
-
  ErrorReporter* error_reporter();

-  // Initializes the graph and allocates TfLiteContext tensor data.
-  TfLiteStatus InitGraphAndContextTensorData();
-
 private:
-  // A simple memory allocator that always allocate from the arena tail.
+  // Initializes the graph and allocates TfLiteContext tensor data.
+  TfLiteStatus InitGraphAndContextTensorData(const Model* model,
+                                             TfLiteContext* context,
+                                             const SubGraph* subgraph);
+
+  // Returns the first subgraph from the model.
+  const SubGraph* GetSubGraphFromModel(const Model* model);
+
+  // Commits a memory plan for all non-persistent buffer allocations in the
+  // 'head' section of the memory arena.
+  virtual TfLiteStatus CommitStaticMemoryPlan(const SubGraph* subgraph,
+                                              TfLiteContext* context);
+
+  // A simple memory allocator that always allocate from the arena tail or head.
  SimpleMemoryAllocator* memory_allocator_;

-  const Model* model_;
-  TfLiteContext* context_;
  ErrorReporter* error_reporter_;
-  // Indicating if the allocator is ready for allocation.
-  bool active_ = false;
+  bool model_is_allocating_;

  // In reverse order for efficiency.
  // i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
@ -198,8 +201,6 @@ class MicroAllocator {
  // How many scratch buffers have been allocated.
  size_t scratch_buffer_count_ = 0;

-  const SubGraph* subgraph_;
-
  TF_LITE_REMOVE_VIRTUAL_DELETE
 };

--- a/tensorflow/lite/micro/micro_allocator_test.cc
+++ b/tensorflow/lite/micro/micro_allocator_test.cc
@ -57,6 +57,15 @@ void EnsureUniqueVariableTensorBuffer(TfLiteContext* context,
  }
 }

+void VerifyRegistrationAndNodeAllocation(
+    NodeAndRegistration* node_and_registration, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    TF_LITE_MICRO_EXPECT_NE(nullptr, node_and_registration[i].registration);
+    TF_LITE_MICRO_EXPECT_NE(nullptr, node_and_registration[i].node.inputs);
+    TF_LITE_MICRO_EXPECT_NE(nullptr, node_and_registration[i].node.outputs);
+  }
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace tflite
@ -146,23 +155,55 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) {
  TF_LITE_MICRO_EXPECT_EQ(nullptr, allocated_tensor.data.i32);
 }

-TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
+TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
  TfLiteContext context;
-  constexpr size_t arena_size =
-      760 /* minimal arena size at the time of writting */ +
-      16 /* alignment */ + 100 /* leave some headroom for future proof */;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  constexpr size_t arena_size = 1024;
  uint8_t arena[arena_size];
-  tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(
-      &context, model, arena, arena_size, micro_test::reporter);
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
-  TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size);
-  // Memory planning hasn't been finalized, so the used bytes is unknown.
-  TF_LITE_MICRO_EXPECT_EQ(0, allocator->used_bytes());
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator->StartModelAllocation(
+                                            model, &context, mock_resolver,
+                                            &node_and_registration));
+}

-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishTensorAllocation());
-  // No allocation to be done afterwards.
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator->FinishTensorAllocation());
+TF_LITE_MICRO_TEST(TestFailsWhenModelFinishesBeforeStart) {
+  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
+  TfLiteContext context;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError,
+                          allocator->FinishModelAllocation(model, &context));
+}
+
+TF_LITE_MICRO_TEST(TestMockModelAllocation) {
+  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
+  TfLiteContext context;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));
+
+  TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size);

  // NOTE: Tensor indexes match the values in GetSimpleMockModel().
  tflite::testing::VerifyMockTensor(&context.tensors[0]);
@ -183,17 +224,27 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
  TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
                          context.tensors[2].data.raw);
  TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 760 + 100);
+
+  // SimpleMockModel has 2 operators:
+  tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration,
+                                                       /*count=*/2);
 }

 TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
  const tflite::Model* model = tflite::testing::GetSimpleModelWithBranch();
  TfLiteContext context;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
  constexpr size_t arena_size = 4096;
  uint8_t arena[arena_size];
-  tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(
-      &context, model, arena, arena_size, micro_test::reporter);
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishTensorAllocation());
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));

  uint8_t* start = context.tensors[0].data.uint8;
  // Check test_helpers.cc BuildSimpleModelWithBranch for model structure.
@ -208,21 +259,29 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
  // t3 reuses the same memory from t0 as t0 is not an input to any node.
  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+
+  // SimpleModelWithBranch has 3 operators:
+  tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration,
+                                                       /*count=*/3);
 }

-TF_LITE_MICRO_TEST(TestFinishComplexTensorAllocation) {
+TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) {
  const tflite::Model* model = tflite::testing::GetComplexMockModel();
  TfLiteContext context;
+  tflite::testing::MockOpResolver mock_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
  constexpr size_t arena_size = 2048;
  uint8_t arena[arena_size];
-  tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(
-      &context, model, arena, arena_size, micro_test::reporter);
+  tflite::MicroAllocator* allocator =
+      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
-  TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size);
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->StartModelAllocation(model, &context, mock_resolver,
+                                                 &node_and_registration));
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          allocator->FinishModelAllocation(model, &context));

-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishTensorAllocation());
-  // No allocation to be done afterwards.
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator->FinishTensorAllocation());
+  TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size);

  // NOTE: Tensor indexes match the values in GetComplexMockModel().
  tflite::testing::VerifyMockTensor(&context.tensors[0]);
@ -243,17 +302,10 @@ TF_LITE_MICRO_TEST(TestFinishComplexTensorAllocation) {
  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 1);
  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 4);
  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 7);
-}

-TF_LITE_MICRO_TEST(TestDoubleInitFails) {
-  const tflite::Model* model = tflite::testing::GetComplexMockModel();
-  TfLiteContext context;
-  constexpr size_t arena_size = 2048;
-  uint8_t arena[arena_size];
-  tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(
-      &context, model, arena, arena_size, micro_test::reporter);
-  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
-  TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size);
+  // ComplexMockModel has 3 operators:
+  tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration,
+                                                       /*count=*/3);
 }

 TF_LITE_MICRO_TESTS_END
--- a/tensorflow/lite/micro/micro_interpreter.cc
+++ b/tensorflow/lite/micro/micro_interpreter.cc
@ -79,8 +79,8 @@ MicroInterpreter::MicroInterpreter(const Model* model,
    : model_(model),
      op_resolver_(op_resolver),
      error_reporter_(error_reporter),
-      allocator_(*MicroAllocator::Create(&context_, model, tensor_arena,
-                                         tensor_arena_size, error_reporter)),
+      allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
+                                         error_reporter)),
      context_helper_(error_reporter_, &allocator_) {
  Init();
 }
@ -127,19 +127,6 @@ void MicroInterpreter::Init() {
  context_.ReportError = context_helper_.ReportOpError;
  context_.recommended_num_threads = 1;

-  // If the system is big endian then convert weights from the flatbuffer from
-  // little to big endian on startup so that it does not need to be done during
-  // inference.
-  // NOTE: This requires that the flatbuffer is held in memory which can be
-  // modified by this process.
-  if (!FLATBUFFERS_LITTLEENDIAN) {
-    for (size_t t = 0; t < tensors_size(); ++t) {
-      TfLiteTensor* thisTensor = &context_.tensors[t];
-      if (thisTensor->allocation_type == kTfLiteMmapRo)
-        CorrectTensorEndianness(thisTensor);
-    }
-  }
-
  initialization_status_ = kTfLiteOk;
 }

@ -181,8 +168,26 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
 }

 TfLiteStatus MicroInterpreter::AllocateTensors() {
-  TF_LITE_ENSURE_OK(&context_, allocator_.PrepareFromFlatbuffer(
-                                   op_resolver_, &node_and_registrations_));
+  if (allocator_.StartModelAllocation(model_, &context_, op_resolver_,
+                                      &node_and_registrations_) != kTfLiteOk) {
+    TF_LITE_REPORT_ERROR(error_reporter_,
+                         "Failed starting model allocation.\n");
+    initialization_status_ = kTfLiteError;
+    return kTfLiteError;
+  }
+
+  // If the system is big endian then convert weights from the flatbuffer from
+  // little to big endian on startup so that it does not need to be done during
+  // inference.
+  // NOTE: This requires that the flatbuffer is held in memory which can be
+  // modified by this process.
+  if (!FLATBUFFERS_LITTLEENDIAN) {
+    for (size_t t = 0; t < tensors_size(); ++t) {
+      TfLiteTensor* thisTensor = &context_.tensors[t];
+      if (thisTensor->allocation_type == kTfLiteMmapRo)
+        CorrectTensorEndianness(thisTensor);
+    }
+  }

  // Only allow AllocatePersistentBuffer in Init stage.
  context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
@ -237,7 +242,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
  context_.RequestScratchBufferInArena = nullptr;
  context_.GetScratchBuffer = context_helper_.GetScratchBuffer;

-  TF_LITE_ENSURE_OK(&context_, allocator_.FinishTensorAllocation());
+  TF_LITE_ENSURE_OK(&context_,
+                    allocator_.FinishModelAllocation(model_, &context_));
  tensors_allocated_ = true;
  return kTfLiteOk;
 }
--- a/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator.cc
@ -23,30 +23,19 @@ limitations under the License.
 namespace tflite {

 RecordingMicroAllocator::RecordingMicroAllocator(
-    TfLiteContext* context, const Model* model,
    RecordingSimpleMemoryAllocator* recording_memory_allocator,
    ErrorReporter* error_reporter)
-    : MicroAllocator(context, model, recording_memory_allocator,
-                     error_reporter),
+    : MicroAllocator(recording_memory_allocator, error_reporter),
      recording_memory_allocator_(recording_memory_allocator) {}

 RecordingMicroAllocator* RecordingMicroAllocator::Create(
-    TfLiteContext* context, const Model* model,
    RecordingSimpleMemoryAllocator* memory_allocator,
    ErrorReporter* error_reporter) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(model != nullptr);
  TFLITE_DCHECK(memory_allocator != nullptr);
  uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
      sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
  RecordingMicroAllocator* allocator = new (allocator_buffer)
-      RecordingMicroAllocator(context, model, memory_allocator, error_reporter);
-  if (allocator->InitGraphAndContextTensorData() != kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter,
-        "RecordingMicroAllocator: Failed to initialize model graph.");
-    return nullptr;
-  }
+      RecordingMicroAllocator(memory_allocator, error_reporter);
  return allocator;
 }

@ -101,48 +90,52 @@ void RecordingMicroAllocator::PrintRecordedAllocation(
                       allocation.requested_bytes, allocation.count);
 }

-TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray() {
+TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray(
+    TfLiteContext* context, const SubGraph* subgraph) {
  SnapshotAllocationUsage(recorded_tflite_tensor_array_data_);

-  TfLiteStatus status = MicroAllocator::AllocateTfLiteTensorArray();
+  TfLiteStatus status =
+      MicroAllocator::AllocateTfLiteTensorArray(context, subgraph);

  RecordAllocationUsage(recorded_tflite_tensor_array_data_);
-  recorded_tflite_tensor_array_data_.count = GetTensorsCount();
+  recorded_tflite_tensor_array_data_.count = context->tensors_size;
  return status;
 }

-TfLiteStatus
-RecordingMicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer() {
+TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer(
+    const Model* model, TfLiteContext* context, const SubGraph* subgraph) {
  SnapshotAllocationUsage(recorded_tflite_tensor_array_quantization_data_);

-  TfLiteStatus status =
-      MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer();
+  TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer(
+      model, context, subgraph);

  RecordAllocationUsage(recorded_tflite_tensor_array_quantization_data_);
  return status;
 }

 TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
-    NodeAndRegistration** node_and_registrations) {
+    const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) {
  SnapshotAllocationUsage(recorded_node_and_registration_array_data_);

-  TfLiteStatus status =
-      MicroAllocator::AllocateNodeAndRegistrations(node_and_registrations);
+  TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
+      subgraph, node_and_registrations);

  RecordAllocationUsage(recorded_node_and_registration_array_data_);
-  recorded_node_and_registration_array_data_.count = GetOperatorsCount();
+  recorded_node_and_registration_array_data_.count =
+      subgraph->operators()->size();
  return status;
 }

 TfLiteStatus
 RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
+    const Model* model, const SubGraph* subgraph,
    const MicroOpResolver& op_resolver,
    NodeAndRegistration* node_and_registrations) {
  SnapshotAllocationUsage(recorded_op_data_);

  TfLiteStatus status =
      MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-          op_resolver, node_and_registrations);
+          model, subgraph, op_resolver, node_and_registrations);

  RecordAllocationUsage(recorded_op_data_);
  return status;
--- a/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/tensorflow/lite/micro/recording_micro_allocator.h
@ -52,7 +52,6 @@ typedef struct RecordedAllocation {
 class RecordingMicroAllocator : public MicroAllocator {
 public:
  static RecordingMicroAllocator* Create(
-      TfLiteContext* context, const Model* model,
      RecordingSimpleMemoryAllocator* memory_allocator,
      ErrorReporter* error_reporter);

@ -65,11 +64,16 @@ class RecordingMicroAllocator : public MicroAllocator {
  void PrintAllocations();

 protected:
-  TfLiteStatus AllocateTfLiteTensorArray() override;
-  TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer() override;
+  TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context,
+                                         const SubGraph* subgraph) override;
+  TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer(
+      const Model* model, TfLiteContext* context,
+      const SubGraph* subgraph) override;
  TfLiteStatus AllocateNodeAndRegistrations(
+      const SubGraph* subgraph,
      NodeAndRegistration** node_and_registrations) override;
  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
+      const Model* model, const SubGraph* subgraph,
      const MicroOpResolver& op_resolver,
      NodeAndRegistration* node_and_registrations) override;

@ -77,8 +81,7 @@ class RecordingMicroAllocator : public MicroAllocator {
  void RecordAllocationUsage(RecordedAllocation& recorded_allocation);

 private:
-  RecordingMicroAllocator(TfLiteContext* context, const Model* model,
-                          RecordingSimpleMemoryAllocator* memory_allocator,
+  RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
                          ErrorReporter* error_reporter);

  void PrintRecordedAllocation(RecordedAllocationType allocation_type,
--- a/tensorflow/lite/micro/recording_micro_allocator_test.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc
@ -35,14 +35,22 @@ TF_LITE_MICRO_TESTS_BEGIN

 TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) {
  TfLiteContext context;
+  tflite::AllOpsResolver all_ops_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
  uint8_t arena[kTestConvArenaSize];
  tflite::RecordingSimpleMemoryAllocator memory_allocator(
      micro_test::reporter, arena, kTestConvArenaSize);
+
  tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(
-          &context, model, &memory_allocator, micro_test::reporter);
+      tflite::RecordingMicroAllocator::Create(&memory_allocator,
+                                              micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);
+  TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation(
+                                         model, &context, all_ops_resolver,
+                                         &node_and_registration));
+  TF_LITE_MICRO_EXPECT_GE(
+      kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context));

  tflite::RecordedAllocation recorded_allocation =
      micro_allocator->GetRecordedAllocation(
@ -56,14 +64,22 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) {

 TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) {
  TfLiteContext context;
+  tflite::AllOpsResolver all_ops_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
  uint8_t arena[kTestConvArenaSize];
  tflite::RecordingSimpleMemoryAllocator memory_allocator(
      micro_test::reporter, arena, kTestConvArenaSize);
+
  tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(
-          &context, model, &memory_allocator, micro_test::reporter);
+      tflite::RecordingMicroAllocator::Create(&memory_allocator,
+                                              micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);
+  TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation(
+                                         model, &context, all_ops_resolver,
+                                         &node_and_registration));
+  TF_LITE_MICRO_EXPECT_GE(
+      kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context));

  // Walk the model subgraph to find all tensors with quantization params and
  // keep a tally.
@ -106,20 +122,22 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) {

 TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) {
  TfLiteContext context;
+  tflite::AllOpsResolver all_ops_resolver;
+  tflite::NodeAndRegistration* node_and_registration;
  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
  uint8_t arena[kTestConvArenaSize];
  tflite::RecordingSimpleMemoryAllocator memory_allocator(
      micro_test::reporter, arena, kTestConvArenaSize);
-  tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(
-          &context, model, &memory_allocator, micro_test::reporter);
-  TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);

-  tflite::AllOpsResolver ops_resolver;
-  tflite::NodeAndRegistration* node_and_registrations;
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          micro_allocator->PrepareFromFlatbuffer(
-                              ops_resolver, &node_and_registrations));
+  tflite::RecordingMicroAllocator* micro_allocator =
+      tflite::RecordingMicroAllocator::Create(&memory_allocator,
+                                              micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);
+  TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation(
+                                         model, &context, all_ops_resolver,
+                                         &node_and_registration));
+  TF_LITE_MICRO_EXPECT_GE(
+      kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context));

  size_t num_ops = model->subgraphs()->Get(0)->operators()->size();
  tflite::RecordedAllocation recorded_allocation =