Switch TF Micro to use TfLiteEval tensors by default.

This change drastically modifies the way memory is used in TF Micro. Currently, large blocks of persistent memory are allocated for TfLiteTensor structs and any associated quantization data. Instead of this pattern, those TfLiteTensor structs and quantization data will be allocated from the "temp" section of the memory arena. Instead of allocating a large block of TfLiteTensor structs - a minimal TfLiteEval struct is allocated. This new struct will serve as the source of truth for all buffers in the graph. Everything works in the kernel implementations with this change - they are just temporarily slower. All TfLiteTensor structs fetched from GetInput()/GetOutput()/etc are now allocated on the fly through the temp allocation. Each kernel should be updated to fetch the TfLiteEval struct in the Eval() block in each kernel. Additionally, quantization data should be cached in those op kernels. This CL saves up to 50% on the arena for larger conv-based models. PiperOrigin-RevId: 322224278 Change-Id: Id32509a75c9f68177f5bb6b850ea11907afcbb1d
2020-07-20 14:21:51 -07:00 · 2020-07-20 14:21:51 -07:00 · 4f6c86b163
commit 4f6c86b163
parent b7fb764f9f
15 changed files with 802 additions and 498 deletions
--- a/tensorflow/lite/micro/BUILD
+++ b/tensorflow/lite/micro/BUILD
@ -294,6 +294,7 @@ tflite_micro_cc_test(
        "micro_allocator_test.cc",
    ],
    deps = [
+        ":memory_helpers",
        ":micro_framework",
        ":test_helpers",
        "//tensorflow/lite/micro/testing:micro_test",
--- a/tensorflow/lite/micro/memory_arena_threshold_test.cc
+++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc
@ -49,15 +49,14 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15;
 // Run this test with '--copt=-DTF_LITE_STATIC_MEMORY' to get optimized memory
 // runtime values:
 #ifdef TF_LITE_STATIC_MEMORY
-constexpr int kKeywordModelTotalSize = 18192;
-constexpr int kKeywordModelTailSize = 17520;
+constexpr int kKeywordModelTotalSize = 14336;
+constexpr int kKeywordModelTailSize = 13664;
 #else
-constexpr int kKeywordModelTotalSize = 21152;
-constexpr int kKeywordModelTailSize = 20480;
+constexpr int kKeywordModelTotalSize = 14704;
+constexpr int kKeywordModelTailSize = 14032;
 #endif
 constexpr int kKeywordModelHeadSize = 672;
 constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240;
-constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 1728;
 constexpr int kKeywordModelOpRuntimeDataSize = 148;

 constexpr int kTestConvModelArenaSize = 12 * 1024;
@ -69,14 +68,13 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7;
 // NOTE: These values are measured on x86-64:
 // TODO(b/158651472): Consider auditing these values on non-64 bit systems.
 #ifdef TF_LITE_STATIC_MEMORY
-constexpr int kTestConvModelTotalSize = 10816;
-constexpr int kTestConvModelTailSize = 3072;
+constexpr int kTestConvModelTotalSize = 9488;
+constexpr int kTestConvModelTailSize = 1744;
 #else
-constexpr int kTestConvModelTotalSize = 11712;
-constexpr int kTestConvModelTailSize = 3968;
+constexpr int kTestConvModelTotalSize = 9648;
+constexpr int kTestConvModelTailSize = 1904;
 #endif
 constexpr int kTestConvModelHeadSize = 7744;
-constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 768;
 constexpr int kTestConvModelOpRuntimeDataSize = 136;

 struct ModelAllocationThresholds {
@ -86,7 +84,6 @@ struct ModelAllocationThresholds {
  size_t head_alloc_size = 0;
  size_t tail_alloc_size = 0;
  size_t tensor_variable_buffer_data_size = 0;
-  size_t tensor_quantization_data_size = 0;
  size_t op_runtime_data_size = 0;
 };

@ -124,12 +121,12 @@ void ValidateModelAllocationThresholds(
      "Tail", allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(),
      thresholds.tail_alloc_size);
  EnsureAllocatedSizeThreshold(
-      "TfLiteTensor",
+      "TfLiteEvalTensor",
      allocator
          .GetRecordedAllocation(
-              tflite::RecordedAllocationType::kTfLiteTensorArray)
+              tflite::RecordedAllocationType::kTfLiteEvalTensorData)
          .used_bytes,
-      sizeof(TfLiteTensor) * thresholds.tensor_count);
+      sizeof(TfLiteEvalTensor) * thresholds.tensor_count);
  EnsureAllocatedSizeThreshold(
      "VariableBufferData",
      allocator
@ -138,12 +135,19 @@ void ValidateModelAllocationThresholds(
          .used_bytes,
      thresholds.tensor_variable_buffer_data_size);
  EnsureAllocatedSizeThreshold(
-      "QuantizationData",
+      "PersistentTfLiteTensor",
+      allocator
+          .GetRecordedAllocation(
+              tflite::RecordedAllocationType::kPersistentTfLiteTensorData)
+          .used_bytes,
+      0);
+  EnsureAllocatedSizeThreshold(
+      "PersistentTfliteTensorQuantizationData",
      allocator
          .GetRecordedAllocation(tflite::RecordedAllocationType::
-                                     kTfLiteTensorArrayQuantizationData)
+                                     kPersistentTfLiteTensorQuantizationData)
          .used_bytes,
-      thresholds.tensor_quantization_data_size);
+      0);
  EnsureAllocatedSizeThreshold(
      "NodeAndRegistration",
      allocator
@ -159,8 +163,7 @@ void ValidateModelAllocationThresholds(
      thresholds.op_runtime_data_size);

  // Ensure tail allocation recording is not missing any large chunks:
-  size_t tail_est_length = sizeof(TfLiteTensor) * thresholds.tensor_count +
-                           thresholds.tensor_quantization_data_size +
+  size_t tail_est_length = sizeof(TfLiteEvalTensor) * thresholds.tensor_count +
                           thresholds.tensor_variable_buffer_data_size +
                           sizeof(tflite::NodeAndRegistration) *
                               thresholds.node_and_registration_count +
@ -191,8 +194,6 @@ TF_LITE_MICRO_TEST(TestKeywordModelMemoryThreshold) {
  thresholds.tail_alloc_size = kKeywordModelTailSize;
  thresholds.tensor_variable_buffer_data_size =
      kKeywordModelTfLiteTensorVariableBufferDataSize;
-  thresholds.tensor_quantization_data_size =
-      kKeywordModelTfLiteTensorQuantizationDataSize;
  thresholds.op_runtime_data_size = kKeywordModelOpRuntimeDataSize;

  ValidateModelAllocationThresholds(interpreter.GetMicroAllocator(),
@ -214,8 +215,6 @@ TF_LITE_MICRO_TEST(TestConvModelMemoryThreshold) {
  thresholds.total_alloc_size = kTestConvModelTotalSize;
  thresholds.head_alloc_size = kTestConvModelHeadSize;
  thresholds.tail_alloc_size = kTestConvModelTailSize;
-  thresholds.tensor_quantization_data_size =
-      kTestConvModelTfLiteTensorQuantizationDataSize;
  thresholds.op_runtime_data_size = kTestConvModelOpRuntimeDataSize;

  ValidateModelAllocationThresholds(interpreter.GetMicroAllocator(),
--- a/tensorflow/lite/micro/memory_helpers.cc
+++ b/tensorflow/lite/micro/memory_helpers.cc
@ -101,6 +101,23 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
  return kTfLiteOk;
 }

+TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
+                                        size_t* out_bytes) {
+  TFLITE_DCHECK(out_bytes != nullptr);
+
+  int element_count = 1;
+  // If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element.
+  if (eval_tensor->dims != nullptr) {
+    for (int n = 0; n < eval_tensor->dims->size; ++n) {
+      element_count *= eval_tensor->dims->data[n];
+    }
+  }
+  size_t type_size;
+  TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size));
+  *out_bytes = element_count * type_size;
+  return kTfLiteOk;
+}
+
 TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context,
                                               const TfLiteTensor* input1,
                                               const TfLiteTensor* input2,
--- a/tensorflow/lite/micro/memory_helpers.h
+++ b/tensorflow/lite/micro/memory_helpers.h
@ -41,6 +41,11 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
                                    size_t* bytes, size_t* type_size,
                                    ErrorReporter* error_reporter);

+// How many bytes are used in a TfLiteEvalTensor instance. The byte length is
+// returned in out_bytes.
+TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor,
+                                        size_t* out_bytes);
+
 // Deduce output dimensions from input and allocate given size.
 // Useful for operators with two inputs where the largest input should equal the
 // output dimension.
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@ -163,7 +163,7 @@ class AllocationInfoBuilder {
  // Add allocaiton information for the tensors.
  TfLiteStatus AddTensors(const SubGraph* subgraph,
                          const int32_t* offline_offsets,
-                          TfLiteTensor* runtime_tensors);
+                          TfLiteEvalTensor* eval_tensors);

  // Add allocation information for the scratch buffers.
  TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles);
@ -199,16 +199,20 @@ TfLiteStatus AllocationInfoBuilder::Allocate() {

 TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
                                               const int32_t* offline_offsets,
-                                               TfLiteTensor* runtime_tensors) {
+                                               TfLiteEvalTensor* eval_tensors) {
+  TFLITE_DCHECK(eval_tensors != nullptr);
+
  // Set up allocation info for all tensors.
  for (size_t i = 0; i < tensor_count_; ++i) {
    AllocationInfo* current = &info_[i];
-    // TfLiteTensor.uint8 field is deprecated so use .data field instead.
-    current->output_ptr = &(runtime_tensors[i].data.data);
-    current->bytes = runtime_tensors[i].bytes;
+    current->output_ptr = &(eval_tensors[i].data.data);
+
+    TF_LITE_ENSURE_STATUS(
+        TfLiteEvalTensorByteLength(&eval_tensors[i], &current->bytes));
+
    current->first_created = -1;
    current->last_used = -1;
-    current->needs_allocating = (runtime_tensors[i].data.data == nullptr) &&
+    current->needs_allocating = (eval_tensors[i].data.data == nullptr) &&
                                (!subgraph->tensors()->Get(i)->is_variable());
    if (offline_offsets) {
      current->offline_offset = offline_offsets[i];
@ -427,25 +431,19 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
  return kTfLiteOk;
 }

-TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
-    SimpleMemoryAllocator* allocator, bool allocate_temp,
+// Returns a pointer to any buffer associated with the flatbuffer tensor. Can
+// return nullptr if no buffer is found.
+void* GetFlatbufferTensorBuffer(
    const tflite::Tensor& flatbuffer_tensor,
-    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
-    ErrorReporter* error_reporter, TfLiteTensor* result) {
-  *result = {};
-  // Make sure the serialized type is one we know how to deal with, and convert
-  // it from a flatbuffer enum into a constant used by the kernel C API.
-  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
-                                          &result->type, error_reporter));
-  // Make sure we remember if the serialized tensor is designated as a variable.
-  result->is_variable = flatbuffer_tensor.is_variable();
-
+    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers) {
  // We need to figure out where the actual contents of this tensor are stored
  // in memory. We'll check to see if there's a serialized buffer (pretty much
  // the same as a constant op in TensorFlow) associated with this tensor first,
  // and if there is update the runtime structure to point to its location in
  // memory.
  // First see if there's any buffer information in the serialized tensor.
+  // TODO(b/160894903): Add better unit tests that validate flatbuffer values.
+  void* out_buffer = nullptr;
  if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
    // If we've found a buffer, does it have any data?
    if (auto* array = buffer->data()) {
@ -453,10 +451,7 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
      if (array->size()) {
        // We've found a buffer with valid data, so update the runtime tensor
        // data structure to point to it.
-        result->data.data =
-            const_cast<void*>(static_cast<const void*>(array->data()));
-        // We set the data from a serialized buffer, so record tha.
-        result->allocation_type = kTfLiteMmapRo;
+        out_buffer = const_cast<void*>(static_cast<const void*>(array->data()));
      }
    }
    // TODO(petewarden): It's not clear in what circumstances we could have a
@ -465,6 +460,25 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
    // error condition? It would be good to tighten up the specification to make
    // it less ambiguous.
  }
+  return out_buffer;
+}
+
+TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
+    SimpleMemoryAllocator* allocator, bool allocate_temp,
+    const tflite::Tensor& flatbuffer_tensor,
+    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
+    ErrorReporter* error_reporter, TfLiteTensor* result) {
+  TFLITE_DCHECK(result != nullptr);
+
+  *result = {};
+  // Make sure the serialized type is one we know how to deal with, and convert
+  // it from a flatbuffer enum into a constant used by the kernel C API.
+  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
+                                          &result->type, error_reporter));
+  // Make sure we remember if the serialized tensor is designated as a variable.
+  result->is_variable = flatbuffer_tensor.is_variable();
+
+  result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);

  // TODO(petewarden): Some of these paths aren't getting enough testing
  // coverage, so we should figure out some tests that exercise them.
@ -473,6 +487,9 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
    // make a note that they will be allocated from memory. The actual
    // allocation won't happen until later.
    result->allocation_type = kTfLiteArenaRw;
+  } else {
+    // We set the data from a serialized buffer, so record tha.
+    result->allocation_type = kTfLiteMmapRo;
  }

  // Figure out what the size in bytes of the buffer is and store it.
@ -530,7 +547,7 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
    // zero_point is stored as a int64_t.
    quantization->zero_point =
        allocate_temp
-            ? reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
+            ? reinterpret_cast<TfLiteIntArray*>(allocator->AllocateTemp(
                  TfLiteIntArrayGetSizeInBytes(channels),
                  alignof(TfLiteIntArray)))
            : reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
@ -560,6 +577,29 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
  return kTfLiteOk;
 }

+TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
+    SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
+    const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
+    ErrorReporter* error_reporter, TfLiteEvalTensor* result) {
+  *result = {};
+  // Make sure the serialized type is one we know how to deal with, and convert
+  // it from a flatbuffer enum into a constant used by the kernel C API.
+  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
+                                          &result->type, error_reporter));
+
+  result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
+
+  if (flatbuffer_tensor.shape() == nullptr) {
+    // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
+    // tensor.
+    result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
+  } else {
+    TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
+        allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
+  }
+  return kTfLiteOk;
+}
+
 }  // namespace internal

 MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator,
@ -599,11 +639,10 @@ MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
 }

 TfLiteStatus MicroAllocator::StartModelAllocation(
-    const Model* model, TfLiteContext* context,
-    const MicroOpResolver& op_resolver,
-    NodeAndRegistration** node_and_registrations) {
+    const Model* model, const MicroOpResolver& op_resolver,
+    NodeAndRegistration** node_and_registrations,
+    TfLiteEvalTensor** eval_tensors) {
  TFLITE_DCHECK(model != nullptr);
-  TFLITE_DCHECK(context != nullptr);

  if (model_is_allocating_) {
    TF_LITE_REPORT_ERROR(error_reporter_,
@ -612,23 +651,19 @@ TfLiteStatus MicroAllocator::StartModelAllocation(
    return kTfLiteError;
  }

-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
  model_is_allocating_ = true;

-  TF_LITE_ENSURE_STATUS(AllocateTfLiteTensorArray(context, subgraph));
+  TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors));
  TF_LITE_ENSURE_STATUS(
-      PopulateTfLiteTensorArrayFromFlatbuffer(model, context, subgraph));
-  TF_LITE_ENSURE_STATUS(
-      AllocateNodeAndRegistrations(subgraph, node_and_registrations));
+      AllocateNodeAndRegistrations(model, node_and_registrations));
  TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
-      model, subgraph, op_resolver, *node_and_registrations));
+      model, op_resolver, *node_and_registrations));

  return kTfLiteOk;
 }

-TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
-                                                   TfLiteContext* context) {
+TfLiteStatus MicroAllocator::FinishModelAllocation(
+    const Model* model, TfLiteEvalTensor* eval_tensors) {
  if (!model_is_allocating_) {
    TF_LITE_REPORT_ERROR(error_reporter_,
                         "MicroAllocator: Model allocation finished before "
@ -639,8 +674,8 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
  const SubGraph* subgraph = GetSubGraphFromModel(model);
  TFLITE_DCHECK(subgraph != nullptr);

-  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, context, subgraph));
-  TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph));
+  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors));
+  TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors));

  model_is_allocating_ = false;
  return kTfLiteOk;
@ -711,41 +746,13 @@ size_t MicroAllocator::used_bytes() const {
  return memory_allocator_->GetUsedBytes();
 }

-TfLiteStatus MicroAllocator::AllocateTfLiteTensorArray(
-    TfLiteContext* context, const SubGraph* subgraph) {
-  context->tensors_size = subgraph->tensors()->size();
-  context->tensors =
-      reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
-          sizeof(TfLiteTensor) * context->tensors_size, alignof(TfLiteTensor)));
-  if (context->tensors == nullptr) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Failed to allocate memory for context->tensors, %d bytes required",
-        sizeof(TfLiteTensor) * context->tensors_size);
-    return kTfLiteError;
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer(
-    const Model* model, TfLiteContext* context, const SubGraph* subgraph) {
-  // Initialize tensors in context_ using the flatbuffer for quantization data.
-  for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
-    TfLiteStatus status = internal::InitializeTfLiteTensorFromFlatbuffer(
-        memory_allocator_, /*allocate_temp=*/false,
-        *subgraph->tensors()->Get(i), model->buffers(), error_reporter_,
-        &context->tensors[i]);
-    if (status != kTfLiteOk) {
-      TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
-                           i);
-      return kTfLiteError;
-    }
-  }
-  return kTfLiteOk;
-}
-
 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
-    const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) {
+    const Model* model, NodeAndRegistration** node_and_registrations) {
+  TFLITE_DCHECK(node_and_registrations);
+
+  const SubGraph* subgraph = GetSubGraphFromModel(model);
+  TFLITE_DCHECK(subgraph != nullptr);
+
  NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
      memory_allocator_->AllocateFromTail(
          sizeof(NodeAndRegistration) * subgraph->operators()->size(),
@ -761,9 +768,14 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
 }

 TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-    const Model* model, const SubGraph* subgraph,
-    const MicroOpResolver& op_resolver,
+    const Model* model, const MicroOpResolver& op_resolver,
    NodeAndRegistration* node_and_registrations) {
+  TFLITE_DCHECK(model != nullptr);
+  TFLITE_DCHECK(node_and_registrations != nullptr);
+
+  const SubGraph* subgraph = GetSubGraphFromModel(model);
+  TFLITE_DCHECK(subgraph != nullptr);
+
  TfLiteStatus status = kTfLiteOk;
  auto* opcodes = model->operator_codes();
  MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
@ -847,25 +859,40 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
  return kTfLiteOk;
 }

-TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(const Model* model,
-                                                             int tensor_index) {
+TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
+    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
  const SubGraph* subgraph = GetSubGraphFromModel(model);
  TFLITE_DCHECK(subgraph != nullptr);

  // This value is allocated from persistent arena space. It is guaranteed to be
  // around for the lifetime of the application.
  TfLiteTensor* tensor =
-      reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
-          sizeof(TfLiteTensor), alignof(TfLiteTensor)));
-  internal::InitializeTfLiteTensorFromFlatbuffer(
-      memory_allocator_, /*allocate_temp=*/false,
-      *subgraph->tensors()->Get(tensor_index), model->buffers(),
-      error_reporter_, tensor);
+      AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index);
+
+  // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
+  // allocated in the persistent section of the arena, ensure that additional
+  // allocations also take place in that section of the arena.
+  if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
+                                         /*allocate_temp=*/false) !=
+      kTfLiteOk) {
+    TF_LITE_REPORT_ERROR(error_reporter_,
+                         "Failed to populate a persistent TfLiteTensor struct "
+                         "from flatbuffer data!");
+    return nullptr;
+  }
+
+  if (eval_tensors != nullptr) {
+    // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
+    // and not located in the flatbuffer are stored on the pre-allocated list of
+    // TfLiteEvalTensors structs. These structs are the source of truth, simply
+    // point the corresponding buffer to the new TfLiteTensor data value.
+    tensor->data.data = eval_tensors[tensor_index].data.data;
+  }
  return tensor;
 }

-TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(const Model* model,
-                                                       int tensor_index) {
+TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
+    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
  const SubGraph* subgraph = GetSubGraphFromModel(model);
  TFLITE_DCHECK(subgraph != nullptr);

@ -875,10 +902,25 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(const Model* model,
  TfLiteTensor* tensor =
      reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateTemp(
          sizeof(TfLiteTensor), alignof(TfLiteTensor)));
-  internal::InitializeTfLiteTensorFromFlatbuffer(
-      memory_allocator_, /*allocate_temp=*/true,
-      *subgraph->tensors()->Get(tensor_index), model->buffers(),
-      error_reporter_, tensor);
+
+  // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
+  // allocated in the temp section of the arena, ensure that additional
+  // allocations also take place in that section of the arena.
+  if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
+                                         /*allocate_temp=*/true) != kTfLiteOk) {
+    TF_LITE_REPORT_ERROR(
+        error_reporter_,
+        "Failed to populate a temp TfLiteTensor struct from flatbuffer data!");
+    return nullptr;
+  }
+
+  if (eval_tensors != nullptr) {
+    // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
+    // and not located in the flatbuffer are stored on the pre-allocated list of
+    // TfLiteEvalTensors structs. These structs are the source of truth, simply
+    // point the corresponding buffer to the new TfLiteTensor data value.
+    tensor->data.data = eval_tensors[tensor_index].data.data;
+  }
  return tensor;
 }

@ -886,26 +928,79 @@ void MicroAllocator::ResetTempAllocations() {
  memory_allocator_->ResetTempAllocations();
 }

-TfLiteStatus MicroAllocator::AllocateVariables(TfLiteContext* context,
-                                               const SubGraph* subgraph) {
-  for (size_t i = 0; i < context->tensors_size; ++i) {
-    if (subgraph->tensors()->Get(i)->is_variable()) {
-      context->tensors[i].data.data = memory_allocator_->AllocateFromTail(
-          context->tensors[i].bytes, kBufferAlignment);
-      // Allocation failure.
-      if (context->tensors[i].data.data == nullptr) {
+TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
+    const Model* model, TfLiteEvalTensor** eval_tensors) {
+  TFLITE_DCHECK(eval_tensors != nullptr);
+
+  const SubGraph* subgraph = GetSubGraphFromModel(model);
+  TFLITE_DCHECK(subgraph != nullptr);
+
+  size_t alloc_count = subgraph->tensors()->size();
+  TfLiteEvalTensor* tensors =
+      reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
+          sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
+  if (tensors == nullptr) {
+    TF_LITE_REPORT_ERROR(error_reporter_,
+                         "Failed to allocate memory for context->eval_tensors, "
+                         "%d bytes required",
+                         sizeof(TfLiteEvalTensor) * alloc_count);
+    return kTfLiteError;
+  }
+
+  for (size_t i = 0; i < alloc_count; ++i) {
+    TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
+        memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
+        error_reporter_, &tensors[i]);
+    if (status != kTfLiteOk) {
+      TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
+                           i);
+      return kTfLiteError;
+    }
+  }
+  *eval_tensors = tensors;
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
+                                               TfLiteEvalTensor* eval_tensors) {
+  for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
+    auto* tensor = subgraph->tensors()->Get(i);
+    if (tensor->is_variable()) {
+      size_t buffer_size;
+      TF_LITE_ENSURE_STATUS(
+          TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
+
+      eval_tensors[i].data.data =
+          memory_allocator_->AllocateFromTail(buffer_size, kBufferAlignment);
+
+      if (eval_tensors[i].data.data == nullptr) {
        TF_LITE_REPORT_ERROR(error_reporter_,
                             "Failed to allocate variable tensor of size %d",
-                             context->tensors[i].bytes);
+                             buffer_size);
        return kTfLiteError;
      }
    }
-    tflite::ResetVariableTensor(&(context->tensors[i]));
  }
-
  return kTfLiteOk;
 }

+TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal(
+    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
+  return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
+      sizeof(TfLiteTensor), alignof(TfLiteTensor)));
+}
+
+TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
+    const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
+    int tensor_index, bool allocate_temp) {
+  // TODO(b/160894903): This method serves as a stub to ensure quantized
+  // allocations in the tail can be recorded. Once all kernels have been ported
+  // to the new API this can be dropped.
+  return internal::InitializeTfLiteTensorFromFlatbuffer(
+      memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index),
+      model->buffers(), error_reporter_, tensor);
+}
+
 ErrorReporter* MicroAllocator::error_reporter() const {
  return error_reporter_;
 }
@ -920,9 +1015,9 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
  return (*subgraphs)[0];
 }

-TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model,
-                                                    TfLiteContext* context,
-                                                    const SubGraph* subgraph) {
+TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
+    const Model* model, const SubGraph* subgraph,
+    TfLiteEvalTensor* eval_tensors) {
  // Create static memory plan
  // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
  // 2. Add them into the planner (such as the GreedyMemoryPlanner).
@ -942,8 +1037,8 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model,
    const int32_t* offline_planner_offsets = nullptr;
    TF_LITE_ENSURE_STATUS(
        builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
-    TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, offline_planner_offsets,
-                                             context->tensors));
+    TF_LITE_ENSURE_STATUS(
+        builder.AddTensors(subgraph, offline_planner_offsets, eval_tensors));

    TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_));
    const AllocationInfo* allocation_info = builder.Finish();
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@ -1,5 +1,5 @@
 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
+b/160894903
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@ -110,32 +110,40 @@ class MicroAllocator {
  // This method will run through the flatbuffer data supplied in the model to
  // properly allocate tensor, node, and op registration data. This method is
  // expected to be followed with a call to FinishModelAllocation() before
-  // resuming allocation with another model.
+  // resuming allocation with another model. All persistent tensor buffers are
+  // stored in the out-param eval_tensors. This value is allocated from the
+  // persistent memory arena and will be used to host runtime tensor buffers.
  TfLiteStatus StartModelAllocation(
-      const Model* model, TfLiteContext* context,
-      const MicroOpResolver& op_resolver,
-      NodeAndRegistration** node_and_registrations);
+      const Model* model, const MicroOpResolver& op_resolver,
+      NodeAndRegistration** node_and_registrations,
+      TfLiteEvalTensor** eval_tensors);

  // Finish allocating internal resources required for model inference.
  // This method will plan non-persistent buffers and commit a memory plan to
  // the 'head' section of the memory arena. All variable tensor data will also
  // be allocated. This method should be called after assigning model resources
-  // in StartModelAllocation().
+  // in StartModelAllocation(). The eval_tensors pointer should be the value
+  // passed into this class during StartModelAllocation().
  TfLiteStatus FinishModelAllocation(const Model* model,
-                                     TfLiteContext* context);
+                                     TfLiteEvalTensor* eval_tensors);

  // Allocates a TfLiteTensor struct and populates the returned value with
  // properties from the model flatbuffer. This struct is allocated from
  // persistent arena memory is only guaranteed for the lifetime of the
-  // application.
-  virtual TfLiteTensor* AllocatePersistentTfLiteTensor(const Model* model,
-                                                       int tensor_index);
+  // application. The eval_tensors pointer should be the value passed into this
+  // class during StartModelAllocation() and contains the source-of-truth for
+  // buffers.
+  virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
+      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);

  // Allocates a TfLiteTensor struct and populates the returned value with
  // properties from the model flatbuffer. This struct is allocated from
  // temporary arena memory is only guaranteed until a call is made to
-  // ResetTempAllocations().
+  // ResetTempAllocations(). The eval_tensors pointer should be the value passed
+  // into this class during StartModelAllocation() and contains the
+  // source-of-truth for buffers.
  virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
+                                                 TfLiteEvalTensor* eval_tensors,
                                                 int tensor_index);

  // Resets all temporary allocations. This method should be called after a
@ -168,51 +176,59 @@ class MicroAllocator {
                 ErrorReporter* error_reporter);
  virtual ~MicroAllocator();

-  // Allocates an array in the arena to hold pointers to the tensors required
-  // to initialize and prepare a model. These allocations are stored and
-  // populated on the context.
-  // TODO(b/160894903): Remove this function when new kernel API is ready.
-  virtual TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context,
-                                                 const SubGraph* subgraph);
-
-  // Populates content on the list of tensor pointers required to initialize and
-  // prepare a model from data in the flatbuffer (loaded from the TfLiteModel
-  // instance). Persistent data (e.g. quantization params) is allocated from the
-  // arena.
-  // TODO(b/160894903): Remove this function when new kernel API is ready.
-  virtual TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer(
-      const Model* model, TfLiteContext* context, const SubGraph* subgraph);
-
  // Allocates an array in the arena to hold pointers to the node and
  // registration pointers required to represent the inference graph of the
  // model.
  virtual TfLiteStatus AllocateNodeAndRegistrations(
-      const SubGraph* subgraph, NodeAndRegistration** node_and_registrations);
+      const Model* model, NodeAndRegistration** node_and_registrations);

  // Populates node and registration pointers representing the inference graph
  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
  // instance). Persistent data (e.g. operator data) is allocated from the
  // arena.
  virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const SubGraph* subgraph,
-      const MicroOpResolver& op_resolver,
+      const Model* model, const MicroOpResolver& op_resolver,
      NodeAndRegistration* node_and_registrations);

+  // Allocates the list of persistent TfLiteEvalTensors that are used for the
+  // "eval" phase of model inference. These structs will be the source of truth
+  // for all tensor buffers. Allocation results are stored in the out-param
+  // eval_tensors.
+  virtual TfLiteStatus AllocateTfLiteEvalTensors(
+      const Model* model, TfLiteEvalTensor** eval_tensors);
+
  // Allocates persistent tensor buffers for variable tensors in the subgraph.
-  virtual TfLiteStatus AllocateVariables(TfLiteContext* context,
-                                         const SubGraph* subgraph);
+  virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
+                                         TfLiteEvalTensor* eval_tensors);
+
+  // TODO(b/160894903): Once all kernels have been updated to the new API drop
+  // this method. It is only used to record TfLiteTensor persistent allocations.
+  virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
+      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
+
+  // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
+  // quantization data is allocated from either the tail (persistent) or temp
+  // sections of the arena based on the allocation flag.
+  // TODO(b/160894903): Once all kernels have been updated to the new API drop
+  // this function since all allocations for quantized data will take place in
+  // the temp section.
+  virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
+      const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
+      int tensor_index, bool allocate_temp);

  ErrorReporter* error_reporter() const;

- private:
  // Returns the first subgraph from the model.
  const SubGraph* GetSubGraphFromModel(const Model* model);

+ private:
  // Commits a memory plan for all non-persistent buffer allocations in the
-  // 'head' section of the memory arena.
+  // 'head' section of the memory arena. The eval_tensors pointer is the list of
+  // pre-allocated TfLiteEvalTensor structs that will point to the buffers that
+  // will be allocated into the head section in this function call.
  virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
-                                              TfLiteContext* context,
-                                              const SubGraph* subgraph);
+                                              const SubGraph* subgraph,
+                                              TfLiteEvalTensor* eval_tensors);

  // A simple memory allocator that always allocate from the arena tail or head.
  SimpleMemoryAllocator* memory_allocator_;
--- a/tensorflow/lite/micro/micro_allocator_test.cc
+++ b/tensorflow/lite/micro/micro_allocator_test.cc
@ -17,6 +17,7 @@ limitations under the License.

 #include <cstdint>

+#include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/simple_memory_allocator.h"
 #include "tensorflow/lite/micro/test_helpers.h"
 #include "tensorflow/lite/micro/testing/micro_test.h"
@ -28,7 +29,7 @@ namespace {

 constexpr int kExpectedAlignment = 4;

-void VerifyMockTensor(TfLiteTensor* tensor, bool is_variable = false) {
+void VerifyMockTfLiteTensor(TfLiteTensor* tensor, bool is_variable = false) {
  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, tensor->type);
  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size);
  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]);
@ -40,7 +41,7 @@ void VerifyMockTensor(TfLiteTensor* tensor, bool is_variable = false) {
                           kExpectedAlignment));
 }

-void VerifyMockWeightTensor(TfLiteTensor* tensor) {
+void VerifyMockWeightTfLiteTensor(TfLiteTensor* tensor) {
  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, tensor->type);
  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size);
  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]);
@ -48,12 +49,54 @@ void VerifyMockWeightTensor(TfLiteTensor* tensor) {
  TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw);
 }

-void EnsureUniqueVariableTensorBuffer(TfLiteContext* context,
+void VerifyMockTfLiteEvalTensor(TfLiteEvalTensor* tensor) {
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, tensor->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]);
+  size_t buffer_size;
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, tflite::TfLiteEvalTensorByteLength(tensor, &buffer_size));
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(4), buffer_size);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw);
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(0),
+                          (reinterpret_cast<std::uintptr_t>(tensor->data.raw) %
+                           kExpectedAlignment));
+}
+
+void VerifyMockWeightTfLiteEvalTensor(TfLiteEvalTensor* tensor) {
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, tensor->type);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size);
+  TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]);
+  size_t buffer_size;
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, tflite::TfLiteEvalTensorByteLength(tensor, &buffer_size));
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(1), buffer_size);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw);
+}
+
+void VerifyMockTensor(const Model* model, MicroAllocator* allocator,
+                      TfLiteEvalTensor* eval_tensors, int tensor_idx,
+                      bool is_variable = false) {
+  VerifyMockTfLiteTensor(allocator->AllocatePersistentTfLiteTensor(
+                             model, eval_tensors, tensor_idx),
+                         is_variable);
+  VerifyMockTfLiteEvalTensor(&eval_tensors[tensor_idx]);
+}
+
+void VerifyMockWeightTensor(const Model* model, MicroAllocator* allocator,
+                            TfLiteEvalTensor* eval_tensors, int tensor_idx) {
+  VerifyMockWeightTfLiteTensor(allocator->AllocatePersistentTfLiteTensor(
+      model, eval_tensors, tensor_idx));
+  VerifyMockWeightTfLiteEvalTensor(&eval_tensors[tensor_idx]);
+}
+
+void EnsureUniqueVariableTensorBuffer(const Model* model,
+                                      TfLiteEvalTensor* eval_tensors,
                                      const int variable_tensor_idx) {
-  for (size_t i = 0; i < context->tensors_size; i++) {
+  for (size_t i = 0; i < GetModelTensorCount(model); i++) {
    if (i != static_cast<size_t>(variable_tensor_idx)) {
-      TF_LITE_MICRO_EXPECT_NE(context->tensors[variable_tensor_idx].data.raw,
-                              context->tensors[i].data.raw);
+      TF_LITE_MICRO_EXPECT_NE(eval_tensors[variable_tensor_idx].data.raw,
+                              eval_tensors[i].data.raw);
    }
  }
 }
@ -182,7 +225,7 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) {

 TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();
  tflite::NodeAndRegistration* node_and_registration;
  constexpr size_t arena_size = 1024;
@ -191,29 +234,31 @@ TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT(nullptr != allocator);
  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteError, allocator->StartModelAllocation(
-                        model, &context, op_resolver, &node_and_registration));
+      kTfLiteError,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
 }

 TF_LITE_MICRO_TEST(TestFailsWhenModelFinishesBeforeStart) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();
  constexpr size_t arena_size = 1024;
  uint8_t arena[arena_size];
  tflite::MicroAllocator* allocator =
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
-  TF_LITE_MICRO_EXPECT(nullptr != allocator);
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteError,
-                          allocator->FinishModelAllocation(model, &context));
+  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteError, allocator->FinishModelAllocation(model, eval_tensors));
 }

 TF_LITE_MICRO_TEST(TestMockModelAllocation) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();
  tflite::NodeAndRegistration* node_and_registration;
  constexpr size_t arena_size = 1024;
@ -222,32 +267,27 @@ TF_LITE_MICRO_TEST(TestMockModelAllocation) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT(nullptr != allocator);
  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

-  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(4), context.tensors_size);
+  size_t model_tensor_size = tflite::testing::GetModelTensorCount(model);
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(4), model_tensor_size);

-  // NOTE: Tensor indexes match the values in GetSimpleMockModel().
-  tflite::testing::VerifyMockTensor(&context.tensors[0]);
-  tflite::testing::VerifyMockWeightTensor(&context.tensors[1]);
-  tflite::testing::VerifyMockTensor(&context.tensors[2]);
-  tflite::testing::VerifyMockTensor(&context.tensors[3]);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 0);
+  tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 1);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 2);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 3);

-  TF_LITE_MICRO_EXPECT_NE(context.tensors[1].data.raw,
-                          context.tensors[0].data.raw);
-  TF_LITE_MICRO_EXPECT_NE(context.tensors[2].data.raw,
-                          context.tensors[0].data.raw);
-  TF_LITE_MICRO_EXPECT_NE(context.tensors[1].data.raw,
-                          context.tensors[2].data.raw);
-  TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
-                          context.tensors[0].data.raw);
-  TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
-                          context.tensors[1].data.raw);
-  TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
-                          context.tensors[2].data.raw);
-  TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 760 + 100);
+  TF_LITE_MICRO_EXPECT_NE(eval_tensors[1].data.raw, eval_tensors[0].data.raw);
+  TF_LITE_MICRO_EXPECT_NE(eval_tensors[2].data.raw, eval_tensors[0].data.raw);
+  TF_LITE_MICRO_EXPECT_NE(eval_tensors[1].data.raw, eval_tensors[2].data.raw);
+  TF_LITE_MICRO_EXPECT_NE(eval_tensors[3].data.raw, eval_tensors[0].data.raw);
+  TF_LITE_MICRO_EXPECT_NE(eval_tensors[3].data.raw, eval_tensors[1].data.raw);
+  TF_LITE_MICRO_EXPECT_NE(eval_tensors[3].data.raw, eval_tensors[2].data.raw);
+  TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 856 + 100);

  // SimpleMockModel has 2 operators:
  tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration,
@ -256,7 +296,7 @@ TF_LITE_MICRO_TEST(TestMockModelAllocation) {

 TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
  const tflite::Model* model = tflite::testing::GetSimpleModelWithBranch();
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();
  tflite::NodeAndRegistration* node_and_registration;
  constexpr size_t arena_size = 4096;
@ -265,24 +305,28 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(nullptr, allocator);
  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

-  uint8_t* start = context.tensors[0].data.uint8;
+  uint8_t* start = eval_tensors[0].data.uint8;
  // Check test_helpers.cc BuildSimpleModelWithBranch for model structure.
  // t0 is the first tensor, so place it in offset 0.
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start);
  // bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors.
-  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(48), context.tensors[0].bytes);
+  size_t buffer_size;
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, tflite::TfLiteEvalTensorByteLength(
+                                         &eval_tensors[0], &buffer_size));
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(48), buffer_size);
  // t1 can't reuse any memory, as n0 requires both t0 and t1.
-  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(96, eval_tensors[1].data.uint8 - start);
  // t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires
  // both t1 and t2.
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[2].data.uint8 - start);
  // t3 reuses the same memory from t0 as t0 is not an input to any node.
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start);

  // SimpleModelWithBranch has 3 operators:
  tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration,
@ -291,7 +335,7 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {

 TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) {
  const tflite::Model* model = tflite::testing::GetComplexMockModel();
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();
  tflite::NodeAndRegistration* node_and_registration;
  constexpr size_t arena_size = 2048;
@ -300,32 +344,34 @@ TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT(nullptr != allocator);
  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

-  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(10), context.tensors_size);
+  size_t model_tensor_size = tflite::testing::GetModelTensorCount(model);
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(10), model_tensor_size);

  // NOTE: Tensor indexes match the values in GetComplexMockModel().
-  tflite::testing::VerifyMockTensor(&context.tensors[0]);
-  tflite::testing::VerifyMockTensor(&context.tensors[1],
-                                    true /* is_variable */);
-  tflite::testing::VerifyMockWeightTensor(&context.tensors[2]);
-  tflite::testing::VerifyMockTensor(&context.tensors[3]);
-  tflite::testing::VerifyMockTensor(&context.tensors[4],
-                                    true /* is_variable */);
-  tflite::testing::VerifyMockWeightTensor(&context.tensors[5]);
-  tflite::testing::VerifyMockTensor(&context.tensors[6]);
-  tflite::testing::VerifyMockTensor(&context.tensors[7],
-                                    true /* is_variable */);
-  tflite::testing::VerifyMockWeightTensor(&context.tensors[8]);
-  tflite::testing::VerifyMockTensor(&context.tensors[9]);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 0);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 1,
+                                    /*is_variable=*/true);
+  tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 2);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 3);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 4,
+                                    /*is_variable=*/true);
+  tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 5);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 6);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 7,
+                                    /*is_variable=*/true);
+  tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 8);
+  tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 9);

-  // Ensure that variable tensors have unique address
-  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 1);
-  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 4);
-  tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 7);
+  // // Ensure that variable tensors have unique address
+  tflite::testing::EnsureUniqueVariableTensorBuffer(model, eval_tensors, 1);
+  tflite::testing::EnsureUniqueVariableTensorBuffer(model, eval_tensors, 4);
+  tflite::testing::EnsureUniqueVariableTensorBuffer(model, eval_tensors, 7);

  // ComplexMockModel has 3 operators:
  tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration,
@ -363,27 +409,32 @@ TF_LITE_MICRO_TEST(OfflinePlannerBranchesAllOnline) {
  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
      nbr_tensors, metadata_buffer, node_list, num_conns);

-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  constexpr size_t arena_size = 4096;
  uint8_t arena[arena_size];
  tflite::MicroAllocator* allocator =
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);

  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

  // Since all of the tensors are online planned and the model structure is
  // identical to that in TestAllocationForModelsWithBranches,
  // the offsets be should identical to that test.
-  uint8_t* start = context.tensors[0].data.uint8;
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(48), context.tensors[0].bytes);
-  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+  uint8_t* start = eval_tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start);
+
+  size_t buffer_size;
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, tflite::TfLiteEvalTensorByteLength(
+                                         &eval_tensors[0], &buffer_size));
+  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(48), buffer_size);
+  TF_LITE_MICRO_EXPECT_EQ(96, eval_tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start);
 }

 TF_LITE_MICRO_TEST(OfflinePlannerBasic) {
@ -419,23 +470,24 @@ TF_LITE_MICRO_TEST(OfflinePlannerBasic) {
  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
      nbr_tensors, metadata_buffer, node_list, num_conns);

-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  constexpr size_t arena_size = 4096;
  uint8_t arena[arena_size];
  tflite::MicroAllocator* allocator =
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);

  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

-  uint8_t* start = context.tensors[0].data.uint8;
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[2].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[3].data.uint8 - start);
+  uint8_t* start = eval_tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[3].data.uint8 - start);
 }

 TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) {
@ -471,24 +523,25 @@ TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) {
  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
      nbr_tensors, metadata_buffer, node_list, num_conns);

-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  constexpr size_t arena_size = 4096;
  uint8_t arena[arena_size];
  tflite::MicroAllocator* allocator =
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);

  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

-  uint8_t* start = context.tensors[0].data.uint8;
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[1].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(48), context.tensors[0].bytes);
+  uint8_t* start = eval_tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start);
+  // TF_LITE_MICRO_EXPECT_EQ(static_cast<size_t>(48), context.tensors[0].bytes);
 }

 TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) {
@ -526,24 +579,25 @@ TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) {
  const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning(
      nbr_tensors, metadata_buffer, node_list, num_conns);

-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  constexpr size_t arena_size = 4096;
  uint8_t arena[arena_size];
  tflite::MicroAllocator* allocator =
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);

  TF_LITE_MICRO_EXPECT_EQ(
-      kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver,
-                                                 &node_and_registration));
-  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
-                          allocator->FinishModelAllocation(model, &context));
+      kTfLiteOk,
+      allocator->StartModelAllocation(model, op_resolver,
+                                      &node_and_registration, &eval_tensors));
+  TF_LITE_MICRO_EXPECT_EQ(
+      kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors));

-  uint8_t* start = context.tensors[0].data.uint8;
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[4].data.uint8 - start);
-  TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start);
+  uint8_t* start = eval_tensors[0].data.uint8;
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[1].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(96, eval_tensors[2].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[4].data.uint8 - start);
+  TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start);
 }

 TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) {
@ -554,14 +608,14 @@ TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(allocator, nullptr);

-  TfLiteTensor* tensor1 =
-      allocator->AllocatePersistentTfLiteTensor(model, /*tensor_index=*/1);
+  TfLiteTensor* tensor1 = allocator->AllocatePersistentTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/1);
  TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr);
  TF_LITE_MICRO_EXPECT_NE(tensor1->quantization.params, nullptr);
  TF_LITE_MICRO_EXPECT_FALSE(tensor1->is_variable);

-  TfLiteTensor* tensor2 =
-      allocator->AllocatePersistentTfLiteTensor(model, /*tensor_index=*/2);
+  TfLiteTensor* tensor2 = allocator->AllocatePersistentTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/2);
  TF_LITE_MICRO_EXPECT_NE(tensor2, nullptr);
  TF_LITE_MICRO_EXPECT_NE(tensor2->quantization.params, nullptr);
  TF_LITE_MICRO_EXPECT_FALSE(tensor2->is_variable);
@ -571,7 +625,7 @@ TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) {
  TF_LITE_MICRO_EXPECT_GT(tensor1, tensor2);
 }

-TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) {
+TF_LITE_MICRO_TEST(TestAllocateSingleTempTfLiteTensor) {
  const tflite::Model* model = tflite::testing::GetSimpleMockModel();
  constexpr size_t arena_size = 1024;
  uint8_t arena[arena_size];
@ -579,8 +633,8 @@ TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(allocator, nullptr);

-  TfLiteTensor* tensor1 =
-      allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1);
+  TfLiteTensor* tensor1 = allocator->AllocateTempTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/1);
  TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr);
 }

@ -592,13 +646,13 @@ TF_LITE_MICRO_TEST(TestAllocateChainOfTfLiteTensor) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT_NE(allocator, nullptr);

-  TfLiteTensor* tensor1 =
-      allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1);
+  TfLiteTensor* tensor1 = allocator->AllocateTempTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/1);
  TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr);

-  TfLiteTensor* tensor2 =
-      allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/3);
-  TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr);
+  TfLiteTensor* tensor2 = allocator->AllocateTempTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/2);
+  TF_LITE_MICRO_EXPECT_NE(tensor2, nullptr);

  // The address of tensor2 should be higher than the address of tensor1
  // (chained allocations):
@ -613,15 +667,15 @@ TF_LITE_MICRO_TEST(TestAllocateTfLiteTensorWithReset) {
      tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter);
  TF_LITE_MICRO_EXPECT(allocator != nullptr);

-  TfLiteTensor* tensor1 =
-      allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1);
+  TfLiteTensor* tensor1 = allocator->AllocateTempTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/1);
  TF_LITE_MICRO_EXPECT(tensor1 != nullptr);

  allocator->ResetTempAllocations();

-  TfLiteTensor* tensor2 =
-      allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/2);
-  TF_LITE_MICRO_EXPECT(tensor1 != nullptr);
+  TfLiteTensor* tensor2 = allocator->AllocateTempTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, /*tensor_index=*/2);
+  TF_LITE_MICRO_EXPECT(tensor2 != nullptr);

  // The address of tensor2 should be equal than the address of tensor1 since
  // allocations were not chained:
--- a/tensorflow/lite/micro/micro_interpreter.cc
+++ b/tensorflow/lite/micro/micro_interpreter.cc
@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/core/api/tensor_utils.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
 #include "tensorflow/lite/micro/micro_allocator.h"
 #include "tensorflow/lite/micro/micro_op_resolver.h"
 #include "tensorflow/lite/micro/micro_profiler.h"
@ -42,6 +43,10 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) {

 namespace internal {

+ContextHelper::ContextHelper(ErrorReporter* error_reporter,
+                             MicroAllocator* allocator, const Model* model)
+    : allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
+
 TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
                                                     size_t bytes, void** ptr) {
  return reinterpret_cast<ContextHelper*>(ctx->impl_)
@ -72,8 +77,21 @@ void ContextHelper::ReportOpError(struct TfLiteContext* context,

 TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
                                       int tensor_idx) {
-  // TODO(b/160894903): Return this value from temp allocated memory.
-  return &context->tensors[tensor_idx];
+  ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
+  return helper->allocator_->AllocateTempTfLiteTensor(
+      helper->model_, helper->eval_tensors_, tensor_idx);
+}
+
+TfLiteEvalTensor* ContextHelper::GetEvalTensor(
+    const struct TfLiteContext* context, int tensor_idx) {
+  ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
+  return &helper->eval_tensors_[tensor_idx];
+}
+
+void ContextHelper::SetNodeIndex(int idx) { current_node_idx_ = idx; }
+
+void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
+  eval_tensors_ = eval_tensors;
 }

 }  // namespace internal
@ -91,7 +109,8 @@ MicroInterpreter::MicroInterpreter(const Model* model,
                                         error_reporter)),
      tensors_allocated_(false),
      initialization_status_(kTfLiteError),
-      context_helper_(error_reporter_, &allocator_),
+      eval_tensors_(nullptr),
+      context_helper_(error_reporter_, &allocator_, model),
      input_tensor_(nullptr),
      output_tensor_(nullptr) {
  Init(profiler);
@ -108,7 +127,8 @@ MicroInterpreter::MicroInterpreter(const Model* model,
      allocator_(*allocator),
      tensors_allocated_(false),
      initialization_status_(kTfLiteError),
-      context_helper_(error_reporter_, &allocator_),
+      eval_tensors_(nullptr),
+      context_helper_(error_reporter_, &allocator_, model),
      input_tensor_(nullptr),
      output_tensor_(nullptr) {
  Init(profiler);
@ -143,13 +163,14 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) {
  context_.impl_ = static_cast<void*>(&context_helper_);
  context_.ReportError = context_helper_.ReportOpError;
  context_.GetTensor = context_helper_.GetTensor;
+  context_.GetEvalTensor = context_helper_.GetEvalTensor;
  context_.recommended_num_threads = 1;
  context_.profiler = profiler;

  initialization_status_ = kTfLiteOk;
 }

-void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) {
+void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
  int32_t tensorSize = 1;
  for (int d = 0; d < tensorCorr->dims->size; ++d)
    tensorSize *= reinterpret_cast<const int32_t*>(tensorCorr->dims->data)[d];
@ -190,24 +211,40 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) {
 }

 TfLiteStatus MicroInterpreter::AllocateTensors() {
-  if (allocator_.StartModelAllocation(model_, &context_, op_resolver_,
-                                      &node_and_registrations_) != kTfLiteOk) {
+  if (allocator_.StartModelAllocation(model_, op_resolver_,
+                                      &node_and_registrations_,
+                                      &eval_tensors_) != kTfLiteOk) {
    TF_LITE_REPORT_ERROR(error_reporter_,
                         "Failed starting model allocation.\n");
    initialization_status_ = kTfLiteError;
    return kTfLiteError;
  }

+  // Update the pointer now that TfLiteEvalTensor allocation has completed on
+  // the context helper.
+  // TODO(b/16157777): This call would not be needed if ContextHelper rolled
+  // into the interpreter.
+  context_helper_.SetTfLiteEvalTensors(eval_tensors_);
+
  // If the system is big endian then convert weights from the flatbuffer from
  // little to big endian on startup so that it does not need to be done during
  // inference.
  // NOTE: This requires that the flatbuffer is held in memory which can be
  // modified by this process.
  if (!FLATBUFFERS_LITTLEENDIAN) {
-    for (size_t t = 0; t < tensors_size(); ++t) {
-      TfLiteTensor* thisTensor = &context_.tensors[t];
-      if (thisTensor->allocation_type == kTfLiteMmapRo)
-        CorrectTensorEndianness(thisTensor);
+    for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) {
+      if (auto* buffer =
+              (*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) {
+        // If we've found a buffer, does it have any data?
+        if (auto* array = buffer->data()) {
+          // If it has any data, is the data size larger than zero?
+          if (array->size()) {
+            // Update the endianness of the corresponding eval tensor since that
+            // struct holds the buffer used at inference time.
+            CorrectTensorEndianness(&eval_tensors_[t]);
+          }
+        }
+      }
    }
  }

@ -236,8 +273,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
  }
  context_helper_.SetNodeIndex(-1);

-  // Both AllocatePersistentBuffer and RequestScratchBufferInArena is available
-  // in Prepare stage.
+  // Both AllocatePersistentBuffer and RequestScratchBufferInArena is
+  // available in Prepare stage.
  context_.RequestScratchBufferInArena =
      context_helper_.RequestScratchBufferInArena;
  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
@ -255,6 +292,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
        return kTfLiteError;
      }
    }
+    allocator_.ResetTempAllocations();
  }
  context_helper_.SetNodeIndex(-1);

@ -265,7 +303,9 @@ TfLiteStatus MicroInterpreter::AllocateTensors() {
  context_.GetScratchBuffer = context_helper_.GetScratchBuffer;

  TF_LITE_ENSURE_OK(&context_,
-                    allocator_.FinishModelAllocation(model_, &context_));
+                    allocator_.FinishModelAllocation(model_, eval_tensors_));
+  TF_LITE_ENSURE_STATUS(ResetVariableTensors());
+
  tensors_allocated_ = true;
  return kTfLiteOk;
 }
@ -290,7 +330,8 @@ TfLiteStatus MicroInterpreter::Invoke() {
    if (registration->invoke) {
      TfLiteStatus invoke_status;
 #ifndef NDEBUG  // Omit profiler overhead from release builds.
-      // The case where profiler == nullptr is handled by ScopedOperatorProfile.
+      // The case where profiler == nullptr is handled by
+      // ScopedOperatorProfile.
      tflite::Profiler* profiler =
          reinterpret_cast<tflite::Profiler*>(context_.profiler);
      ScopedOperatorProfile scoped_profiler(
@ -298,6 +339,12 @@ TfLiteStatus MicroInterpreter::Invoke() {
 #endif
      invoke_status = registration->invoke(&context_, node);

+      // All TfLiteTensor structs used in the kernel are allocated from temp
+      // memory in the allocator. This creates a chain of allocations in the
+      // temp section. The call below resets the chain of allocations to
+      // prepare for the next call.
+      allocator_.ResetTempAllocations();
+
      if (invoke_status == kTfLiteError) {
        TF_LITE_REPORT_ERROR(
            error_reporter_,
@ -321,15 +368,17 @@ TfLiteTensor* MicroInterpreter::input(size_t index) {
    return nullptr;
  }
  if (index != 0) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Input tensors not at index 0 will allocate from the "
-                         "persistent memory arena in the future!");
-    return &(context_.tensors[inputs().Get(index)]);
+    TF_LITE_REPORT_ERROR(
+        error_reporter_,
+        "Input tensors not at index 0 are allocated from the "
+        "persistent memory arena. Repeat calls will cause excess "
+        "allocation!");
+    return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
+                                                     inputs().Get(index));
  }
  if (input_tensor_ == nullptr) {
-    // TODO(b/160894903): This API will allocate TfLiteTensor structs from
-    // persistent (tail) memory and cache on this pointer.
-    input_tensor_ = &(context_.tensors[inputs().Get(index)]);
+    input_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
+        model_, eval_tensors_, inputs().Get(index));
  }
  return input_tensor_;
 }
@ -343,15 +392,19 @@ TfLiteTensor* MicroInterpreter::output(size_t index) {
    return nullptr;
  }
  if (index != 0) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Output tensors not at index 0 will allocate from the "
-                         "persistent memory arena in the future!");
-    return &(context_.tensors[outputs().Get(index)]);
+    TF_LITE_REPORT_ERROR(
+        error_reporter_,
+        "Output tensors not at index 0 are allocated from the "
+        "persistent memory arena. Repeat calls will cause excess "
+        "allocation!");
+    return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
+                                                     outputs().Get(index));
  }
  if (output_tensor_ == nullptr) {
    // TODO(b/160894903): This API will allocate TfLiteTensor structs from
    // persistent (tail) memory and cache on this pointer.
-    output_tensor_ = &(context_.tensors[outputs().Get(index)]);
+    output_tensor_ = allocator_.AllocatePersistentTfLiteTensor(
+        model_, eval_tensors_, outputs().Get(index));
  }
  return output_tensor_;
 }
@ -364,22 +417,26 @@ TfLiteTensor* MicroInterpreter::tensor(size_t index) {
                         length);
    return nullptr;
  }
-  return &context_.tensors[index];
+  return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
+                                                   index);
 }

 TfLiteStatus MicroInterpreter::ResetVariableTensors() {
-  const size_t length = tensors_size();
-  for (size_t i = 0; i < length; ++i) {
-    TfLiteTensor* cur_tensor = tensor(i);
-    if (cur_tensor->is_variable) {
-      TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor);
-      if (status != kTfLiteOk) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Failed to reset variable tensor at index: %d", i);
-        return status;
+  for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
+    auto* tensor = subgraph_->tensors()->Get(i);
+    if (tensor->is_variable()) {
+      size_t buffer_size;
+      TF_LITE_ENSURE_STATUS(
+          TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
+
+      int value = 0;
+      if (tensor->type() == tflite::TensorType_INT8) {
+        value = tensor->quantization()->zero_point()->Get(0);
      }
+      memset(eval_tensors_[i].data.raw, value, buffer_size);
    }
  }
+
  return kTfLiteOk;
 }

--- a/tensorflow/lite/micro/micro_interpreter.h
+++ b/tensorflow/lite/micro/micro_interpreter.h
@ -35,32 +35,37 @@ namespace internal {
 // A helper class to encapsulate the implementation of APIs in Context.
 // context->impl_ points to an instance of this class.
 // Check tensorflow/lite/c/common.h for detailed descriptions.
+// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
 class ContextHelper {
 public:
  explicit ContextHelper(ErrorReporter* error_reporter,
-                         MicroAllocator* allocator)
-      : allocator_(allocator), error_reporter_(error_reporter) {}
+                         MicroAllocator* allocator, const Model* model);

+  // Functions that will be assigned to function pointers on TfLiteContext:
  static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes,
                                               void** ptr);
-
  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
                                                  size_t bytes,
                                                  int* buffer_idx);
-
  static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
-
  static void ReportOpError(struct TfLiteContext* context, const char* format,
                            ...);
-
  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
                                 int tensor_idx);
+  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
+                                         int tensor_idx);

-  void SetNodeIndex(int idx) { current_node_idx_ = idx; }
+  // Sets the current node index to assist with scratch buffer allocations:
+  void SetNodeIndex(int idx);
+
+  // Sets the pointer to a list of TfLiteEvalTensor instances.
+  void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);

 private:
  MicroAllocator* allocator_;
  ErrorReporter* error_reporter_;
+  const Model* model_;
+  TfLiteEvalTensor* eval_tensors_;
  int current_node_idx_ = -1;
 };

@ -173,7 +178,7 @@ class MicroInterpreter {
  // error reporting during initialization.
  void Init(tflite::Profiler* profiler);

-  void CorrectTensorEndianness(TfLiteTensor* tensorCorr);
+  void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr);

  template <class T>
  void CorrectTensorDataEndianness(T* data, int32_t size);
@ -190,6 +195,7 @@ class MicroInterpreter {
  TfLiteStatus initialization_status_;

  const SubGraph* subgraph_;
+  TfLiteEvalTensor* eval_tensors_;
  internal::ContextHelper context_helper_;

  // TODO(b/160894903): Clean these pointers up when all APIs are updated to new
--- a/tensorflow/lite/micro/micro_interpreter_test.cc
+++ b/tensorflow/lite/micro/micro_interpreter_test.cc
@ -72,7 +72,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) {

  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();

-  constexpr size_t allocator_buffer_size = 1000;
+  constexpr size_t allocator_buffer_size = 2000;
  uint8_t allocator_buffer[allocator_buffer_size];

  // Create a new scope so that we can test the destructor.
@ -127,7 +127,7 @@ TF_LITE_MICRO_TEST(TestKernelMemoryPlanning) {

  tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver();

-  constexpr size_t allocator_buffer_size = 1024;
+  constexpr size_t allocator_buffer_size = 2048;
  uint8_t allocator_buffer[allocator_buffer_size];
  tflite::MicroInterpreter interpreter(model, op_resolver, allocator_buffer,
                                       allocator_buffer_size,
@ -312,13 +312,7 @@ TF_LITE_MICRO_TEST(TestIncompleteInitializationAllocationsWithSmallArena) {
      static_cast<size_t>(0),
      allocator
          ->GetRecordedAllocation(
-              tflite::RecordedAllocationType::kTfLiteTensorArray)
-          .used_bytes);
-  TF_LITE_MICRO_EXPECT_EQ(
-      static_cast<size_t>(0),
-      allocator
-          ->GetRecordedAllocation(tflite::RecordedAllocationType::
-                                      kTfLiteTensorArrayQuantizationData)
+              tflite::RecordedAllocationType::kTfLiteEvalTensorData)
          .used_bytes);
  TF_LITE_MICRO_EXPECT_EQ(
      static_cast<size_t>(0),
@ -358,13 +352,13 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) {
      static_cast<size_t>(0),
      allocator
          ->GetRecordedAllocation(
-              tflite::RecordedAllocationType::kTfLiteTensorArray)
+              tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData)
          .used_bytes);
  TF_LITE_MICRO_EXPECT_EQ(
      static_cast<size_t>(0),
      allocator
          ->GetRecordedAllocation(
-              tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData)
+              tflite::RecordedAllocationType::kTfLiteEvalTensorData)
          .used_bytes);
  TF_LITE_MICRO_EXPECT_EQ(
      static_cast<size_t>(0),
@ -382,9 +376,9 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) {
  TF_LITE_MICRO_EXPECT_GT(
      allocator
          ->GetRecordedAllocation(
-              tflite::RecordedAllocationType::kTfLiteTensorArray)
+              tflite::RecordedAllocationType::kTfLiteEvalTensorData)
          .used_bytes,
-      static_cast<size_t>(0));
+      0);

  TF_LITE_MICRO_EXPECT_GT(
      allocator
--- a/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator.cc
@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/micro/compatibility.h"
+#include "tensorflow/lite/micro/micro_allocator.h"
 #include "tensorflow/lite/micro/recording_simple_memory_allocator.h"

 namespace tflite {
@ -47,10 +48,12 @@ RecordingMicroAllocator* RecordingMicroAllocator::Create(
 RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
    RecordedAllocationType allocation_type) const {
  switch (allocation_type) {
-    case RecordedAllocationType::kTfLiteTensorArray:
-      return recorded_tflite_tensor_array_data_;
-    case RecordedAllocationType::kTfLiteTensorArrayQuantizationData:
-      return recorded_tflite_tensor_array_quantization_data_;
+    case RecordedAllocationType::kTfLiteEvalTensorData:
+      return recorded_tflite_eval_tensor_data_;
+    case RecordedAllocationType::kPersistentTfLiteTensorData:
+      return recorded_persistent_tflite_tensor_data_;
+    case RecordedAllocationType::kPersistentTfLiteTensorQuantizationData:
+      return recorded_persistent_tflite_tensor_quantization_data_;
    case RecordedAllocationType::kTfLiteTensorVariableBufferData:
      return recorded_tflite_tensor_variable_buffer_data_;
    case RecordedAllocationType::kNodeAndRegistrationArray:
@ -81,11 +84,13 @@ void RecordingMicroAllocator::PrintAllocations() const {
      error_reporter(),
      "[RecordingMicroAllocator] Arena allocation tail %d bytes",
      recording_memory_allocator_->GetTailUsedBytes());
-  PrintRecordedAllocation(RecordedAllocationType::kTfLiteTensorArray,
-                          "TfLiteTensor struct", "tensors");
+  PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData,
+                          "TfLiteEvalTensor data", "allocations");
+  PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData,
+                          "Persistent TfLiteTensor data", "tensors");
  PrintRecordedAllocation(
-      RecordedAllocationType::kTfLiteTensorArrayQuantizationData,
-      "TfLiteTensor quantization data", "allocations");
+      RecordedAllocationType::kPersistentTfLiteTensorQuantizationData,
+      "Persistent TfLiteTensor quantization data", "allocations");
  PrintRecordedAllocation(
      RecordedAllocationType::kTfLiteTensorVariableBufferData,
      "TfLiteTensor variable buffer data", "allocations");
@ -108,42 +113,12 @@ void RecordingMicroAllocator::PrintRecordedAllocation(
      allocation.count, allocation_description);
 }

-TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray(
-    TfLiteContext* context, const SubGraph* subgraph) {
-  RecordedAllocation allocations = SnapshotAllocationUsage();
-
-  TfLiteStatus status =
-      MicroAllocator::AllocateTfLiteTensorArray(context, subgraph);
-
-  RecordAllocationUsage(allocations, recorded_tflite_tensor_array_data_);
-  // The allocation for this recording will always be 1. This is because the
-  // parent class mallocs one large allocation for the number of tensors in the
-  // graph (e.g. sizeof(TfLiteTensor) * num_tensors).
-  // To prevent extra overhead and potential for fragmentation, manually adjust
-  // the accounting by decrementing by 1 and adding the actual number of tensors
-  // used in the graph:
-  recorded_tflite_tensor_array_data_.count += context->tensors_size - 1;
-  return status;
-}
-
-TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer(
-    const Model* model, TfLiteContext* context, const SubGraph* subgraph) {
-  RecordedAllocation allocations = SnapshotAllocationUsage();
-
-  TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer(
-      model, context, subgraph);
-
-  RecordAllocationUsage(allocations,
-                        recorded_tflite_tensor_array_quantization_data_);
-  return status;
-}
-
 TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
-    const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) {
+    const Model* model, NodeAndRegistration** node_and_registrations) {
  RecordedAllocation allocations = SnapshotAllocationUsage();

  TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
-      subgraph, node_and_registrations);
+      model, node_and_registrations);

  RecordAllocationUsage(allocations,
                        recorded_node_and_registration_array_data_);
@ -157,36 +132,79 @@ TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
  // the accounting by decrementing by 1 and adding the actual number of nodes
  // used in the graph:
  recorded_node_and_registration_array_data_.count +=
-      subgraph->operators()->size() - 1;
+      GetSubGraphFromModel(model)->operators()->size() - 1;
  return status;
 }

 TfLiteStatus
 RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-    const Model* model, const SubGraph* subgraph,
-    const MicroOpResolver& op_resolver,
+    const Model* model, const MicroOpResolver& op_resolver,
    NodeAndRegistration* node_and_registrations) {
  RecordedAllocation allocations = SnapshotAllocationUsage();

  TfLiteStatus status =
      MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-          model, subgraph, op_resolver, node_and_registrations);
+          model, op_resolver, node_and_registrations);

  RecordAllocationUsage(allocations, recorded_op_data_);
  return status;
 }

-TfLiteStatus RecordingMicroAllocator::AllocateVariables(
-    TfLiteContext* context, const SubGraph* subgraph) {
+TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
+    const Model* model, TfLiteEvalTensor** eval_tensors) {
  RecordedAllocation allocations = SnapshotAllocationUsage();

-  TfLiteStatus status = MicroAllocator::AllocateVariables(context, subgraph);
+  TfLiteStatus status =
+      MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
+
+  RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
+  // The allocation for this recording will always be 1. This is because the
+  // parent class mallocs one large allocation for the number of tensors in the
+  // graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
+  // To prevent extra overhead and potential for fragmentation, manually adjust
+  // the accounting by decrementing by 1 and adding the actual number of tensors
+  // used in the graph:
+  recorded_tflite_eval_tensor_data_.count +=
+      GetSubGraphFromModel(model)->tensors()->size() - 1;
+  return status;
+}
+
+TfLiteStatus RecordingMicroAllocator::AllocateVariables(
+    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
+  RecordedAllocation allocations = SnapshotAllocationUsage();
+
+  TfLiteStatus status =
+      MicroAllocator::AllocateVariables(subgraph, eval_tensors);

  RecordAllocationUsage(allocations,
                        recorded_tflite_tensor_variable_buffer_data_);
  return status;
 }

+TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
+    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
+  RecordedAllocation allocations = SnapshotAllocationUsage();
+
+  TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
+      model, eval_tensors, tensor_index);
+
+  RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
+  return result;
+}
+
+TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
+    const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
+    int tensor_index, bool allocate_temp) {
+  RecordedAllocation allocations = SnapshotAllocationUsage();
+
+  TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
+      model, subgraph, tensor, tensor_index, allocate_temp);
+
+  RecordAllocationUsage(allocations,
+                        recorded_persistent_tflite_tensor_quantization_data_);
+  return status;
+}
+
 RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const {
  return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(),
          /*used_bytes=*/recording_memory_allocator_->GetUsedBytes(),
--- a/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/tensorflow/lite/micro/recording_micro_allocator.h
@ -25,8 +25,9 @@ namespace tflite {
 // List of buckets currently recorded by this class. Each type keeps a list of
 // allocated information during model initialization.
 enum class RecordedAllocationType {
-  kTfLiteTensorArray,
-  kTfLiteTensorArrayQuantizationData,
+  kTfLiteEvalTensorData,
+  kPersistentTfLiteTensorData,
+  kPersistentTfLiteTensorQuantizationData,
  kTfLiteTensorVariableBufferData,
  kNodeAndRegistrationArray,
  kOpData,
@ -66,20 +67,29 @@ class RecordingMicroAllocator : public MicroAllocator {
  void PrintAllocations() const;

 protected:
-  TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context,
-                                         const SubGraph* subgraph) override;
-  TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer(
-      const Model* model, TfLiteContext* context,
-      const SubGraph* subgraph) override;
  TfLiteStatus AllocateNodeAndRegistrations(
-      const SubGraph* subgraph,
+      const Model* model,
      NodeAndRegistration** node_and_registrations) override;
  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const SubGraph* subgraph,
-      const MicroOpResolver& op_resolver,
+      const Model* model, const MicroOpResolver& op_resolver,
      NodeAndRegistration* node_and_registrations) override;
-  TfLiteStatus AllocateVariables(TfLiteContext* context,
-                                 const SubGraph* subgraph) override;
+  TfLiteStatus AllocateTfLiteEvalTensors(
+      const Model* model, TfLiteEvalTensor** eval_tensors) override;
+  TfLiteStatus AllocateVariables(const SubGraph* subgraph,
+                                 TfLiteEvalTensor* eval_tensors) override;
+  // TODO(b/160894903): Once all kernels have been updated to the new API drop
+  // this method. It is only used to record TfLiteTensor persistent allocations.
+  TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
+      const Model* model, TfLiteEvalTensor* eval_tensors,
+      int tensor_index) override;
+  // TODO(b/160894903): Once all kernels have been updated to the new API drop
+  // this function since all allocations for quantized data will take place in
+  // the temp section.
+  TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
+                                                  const SubGraph* subgraph,
+                                                  TfLiteTensor* tensor,
+                                                  int tensor_index,
+                                                  bool allocate_temp) override;

 private:
  RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
@ -95,8 +105,9 @@ class RecordingMicroAllocator : public MicroAllocator {

  const RecordingSimpleMemoryAllocator* recording_memory_allocator_;

-  RecordedAllocation recorded_tflite_tensor_array_data_ = {};
-  RecordedAllocation recorded_tflite_tensor_array_quantization_data_ = {};
+  RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
+  RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
+  RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {};
  RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
  RecordedAllocation recorded_node_and_registration_array_data_ = {};
  RecordedAllocation recorded_op_data_ = {};
--- a/tensorflow/lite/micro/recording_micro_allocator_test.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc
@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/testing/test_conv_model.h"

 #define TF_LITE_TENSOR_STRUCT_SIZE sizeof(TfLiteTensor)
+#define TF_LITE_EVAL_TENSOR_STRUCT_SIZE sizeof(TfLiteEvalTensor)
 #define TF_LITE_AFFINE_QUANTIZATION_SIZE sizeof(TfLiteAffineQuantization)
 #define NODE_AND_REGISTRATION_STRUCT_SIZE sizeof(tflite::NodeAndRegistration)

@ -33,8 +34,8 @@ constexpr int kTestConvArenaSize = 1024 * 12;

 TF_LITE_MICRO_TESTS_BEGIN

-TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) {
-  TfLiteContext context;
+TF_LITE_MICRO_TEST(TestRecordsTfLiteEvalTensorArrayData) {
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver all_ops_resolver;
  tflite::NodeAndRegistration* node_and_registration;
  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
@ -50,11 +51,11 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) {

  TfLiteStatus status;
  status = micro_allocator->StartModelAllocation(
-      model, &context, all_ops_resolver, &node_and_registration);
+      model, all_ops_resolver, &node_and_registration, &eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

-  status = micro_allocator->FinishModelAllocation(model, &context);
+  status = micro_allocator->FinishModelAllocation(model, eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

@ -62,76 +63,21 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) {

  tflite::RecordedAllocation recorded_allocation =
      micro_allocator->GetRecordedAllocation(
-          tflite::RecordedAllocationType::kTfLiteTensorArray);
-  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, context.tensors_size);
+          tflite::RecordedAllocationType::kTfLiteEvalTensorData);
+
+  micro_allocator->PrintAllocations();
+
+  size_t tensors_count = tflite::testing::GetModelTensorCount(model);
+
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, tensors_count);
  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
-                          context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE);
+                          tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE);
  TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,
-                          context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE);
-}
-
-TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) {
-  TfLiteContext context;
-  tflite::AllOpsResolver all_ops_resolver;
-  tflite::NodeAndRegistration* node_and_registration;
-  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
-  uint8_t arena[kTestConvArenaSize];
-
-  tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize,
-                                              micro_test::reporter);
-  TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr);
-  if (micro_allocator == nullptr) return 1;
-
-  TfLiteStatus status;
-  status = micro_allocator->StartModelAllocation(
-      model, &context, all_ops_resolver, &node_and_registration);
-  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
-  if (status != kTfLiteOk) return 1;
-
-  status = micro_allocator->FinishModelAllocation(model, &context);
-  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
-  if (status != kTfLiteOk) return 1;
-
-  // Walk the model subgraph to find all tensors with quantization params and
-  // keep a tally.
-  size_t quantized_tensor_count = 0;
-  size_t quantized_channel_bytes = 0;
-  for (size_t i = 0; i < context.tensors_size; ++i) {
-    const tflite::Tensor* cur_tensor =
-        model->subgraphs()->Get(0)->tensors()->Get(i);
-    const tflite::QuantizationParameters* quantization_params =
-        cur_tensor->quantization();
-    if (quantization_params && quantization_params->scale() &&
-        quantization_params->scale()->size() > 0 &&
-        quantization_params->zero_point() &&
-        quantization_params->zero_point()->size() > 0) {
-      quantized_tensor_count++;
-      size_t num_channels = quantization_params->scale()->size();
-      quantized_channel_bytes += TfLiteIntArrayGetSizeInBytes(num_channels);
-    }
-  }
-
-  // Calculate the expected allocation bytes with subgraph quantization data:
-  size_t expected_requested_bytes =
-      quantized_tensor_count * TF_LITE_AFFINE_QUANTIZATION_SIZE +
-      quantized_channel_bytes;
-
-  tflite::RecordedAllocation recorded_allocation =
-      micro_allocator->GetRecordedAllocation(
-          tflite::RecordedAllocationType::kTfLiteTensorArrayQuantizationData);
-
-  // Each quantized tensors has 2 mallocs (quant struct, zero point dimensions):
-  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count,
-                          quantized_tensor_count * 2);
-  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
-                          expected_requested_bytes);
-  TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,
-                          expected_requested_bytes);
+                          tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE);
 }

 TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) {
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver all_ops_resolver;
  tflite::NodeAndRegistration* node_and_registration;
  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
@ -145,11 +91,11 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) {

  TfLiteStatus status;
  status = micro_allocator->StartModelAllocation(
-      model, &context, all_ops_resolver, &node_and_registration);
+      model, all_ops_resolver, &node_and_registration, &eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

-  status = micro_allocator->FinishModelAllocation(model, &context);
+  status = micro_allocator->FinishModelAllocation(model, eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

@ -165,7 +111,7 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) {
 }

 TF_LITE_MICRO_TEST(TestRecordsMultiTenantAllocations) {
-  TfLiteContext context;
+  TfLiteEvalTensor* eval_tensors = nullptr;
  tflite::AllOpsResolver all_ops_resolver;
  tflite::NodeAndRegistration* node_and_registration;
  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
@ -183,34 +129,108 @@ TF_LITE_MICRO_TEST(TestRecordsMultiTenantAllocations) {

  // First allocation with the model in the arena:
  status = micro_allocator->StartModelAllocation(
-      model, &context, all_ops_resolver, &node_and_registration);
+      model, all_ops_resolver, &node_and_registration, &eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

-  status = micro_allocator->FinishModelAllocation(model, &context);
+  status = micro_allocator->FinishModelAllocation(model, eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

  // Second allocation with the same model in the arena:
  status = micro_allocator->StartModelAllocation(
-      model, &context, all_ops_resolver, &node_and_registration);
+      model, all_ops_resolver, &node_and_registration, &eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

-  status = kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context);
+  status = kTfLiteOk,
+  micro_allocator->FinishModelAllocation(model, eval_tensors);
  TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk);
  if (status != kTfLiteOk) return 1;

+  size_t tensors_count = tflite::testing::GetModelTensorCount(model);
+
  tflite::RecordedAllocation recorded_allocation =
      micro_allocator->GetRecordedAllocation(
-          tflite::RecordedAllocationType::kTfLiteTensorArray);
-  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, context.tensors_size * 2);
-  TF_LITE_MICRO_EXPECT_EQ(
-      recorded_allocation.requested_bytes,
-      context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE * 2);
-  TF_LITE_MICRO_EXPECT_GE(
-      recorded_allocation.used_bytes,
-      context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE * 2);
+          tflite::RecordedAllocationType::kTfLiteEvalTensorData);
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, tensors_count * 2);
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
+                          tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE * 2);
+  TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,
+                          tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE * 2);
+}
+
+TF_LITE_MICRO_TEST(TestRecordsPersistentTfLiteTensorData) {
+  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
+  uint8_t arena[kTestConvArenaSize];
+
+  tflite::RecordingMicroAllocator* micro_allocator =
+      tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize,
+                                              micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr);
+  if (micro_allocator == nullptr) return 1;
+
+  TfLiteTensor* tensor = micro_allocator->AllocatePersistentTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, 0);
+  TF_LITE_MICRO_EXPECT_NE(tensor, nullptr);
+  if (tensor == nullptr) return 1;
+
+  tflite::RecordedAllocation recorded_allocation =
+      micro_allocator->GetRecordedAllocation(
+          tflite::RecordedAllocationType::kPersistentTfLiteTensorData);
+
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, static_cast<size_t>(1));
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
+                          TF_LITE_TENSOR_STRUCT_SIZE);
+  TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,
+                          TF_LITE_TENSOR_STRUCT_SIZE);
+}
+
+TF_LITE_MICRO_TEST(TestRecordsPersistentTfLiteTensorQuantizationData) {
+  const tflite::Model* model = tflite::GetModel(kTestConvModelData);
+  uint8_t arena[kTestConvArenaSize];
+
+  tflite::RecordingMicroAllocator* micro_allocator =
+      tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize,
+                                              micro_test::reporter);
+  TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr);
+  if (micro_allocator == nullptr) return 1;
+
+  TfLiteTensor* tensor = micro_allocator->AllocatePersistentTfLiteTensor(
+      model, /*eval_tensors=*/nullptr, 0);
+  TF_LITE_MICRO_EXPECT_NE(tensor, nullptr);
+  if (tensor == nullptr) return 1;
+
+  // Walk the model subgraph to find all tensors with quantization params and
+  // keep a tally.
+  size_t quantized_channel_bytes = 0;
+  const tflite::Tensor* cur_tensor =
+      model->subgraphs()->Get(0)->tensors()->Get(0);
+  const tflite::QuantizationParameters* quantization_params =
+      cur_tensor->quantization();
+  if (quantization_params && quantization_params->scale() &&
+      quantization_params->scale()->size() > 0 &&
+      quantization_params->zero_point() &&
+      quantization_params->zero_point()->size() > 0) {
+    size_t num_channels = quantization_params->scale()->size();
+    quantized_channel_bytes += TfLiteIntArrayGetSizeInBytes(num_channels);
+  }
+
+  // Calculate the expected allocation bytes with subgraph quantization data:
+  size_t expected_requested_bytes =
+      TF_LITE_AFFINE_QUANTIZATION_SIZE + quantized_channel_bytes;
+
+  tflite::RecordedAllocation recorded_allocation =
+      micro_allocator->GetRecordedAllocation(
+          tflite::RecordedAllocationType::
+              kPersistentTfLiteTensorQuantizationData);
+
+  // Each quantized tensors has 2 mallocs (quant struct, zero point dimensions):
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, static_cast<size_t>(2));
+  TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
+                          expected_requested_bytes);
+  TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,
+                          expected_requested_bytes);
 }

 // TODO(b/158124094): Find a way to audit OpData allocations on
--- a/tensorflow/lite/micro/test_helpers.cc
+++ b/tensorflow/lite/micro/test_helpers.cc
@ -996,5 +996,13 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
  return result;
 }

+size_t GetModelTensorCount(const Model* model) {
+  auto* subgraphs = model->subgraphs();
+  if (subgraphs) {
+    return (*subgraphs)[0]->tensors()->size();
+  }
+  return 0;
+}
+
 }  // namespace testing
 }  // namespace tflite
--- a/tensorflow/lite/micro/test_helpers.h
+++ b/tensorflow/lite/micro/test_helpers.h
@ -177,6 +177,9 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
    int* zero_points, TfLiteAffineQuantization* affine_quant,
    int quantized_dimension, bool is_variable = false);

+// Returns the number of tensors in the default subgraph for a tflite::Model.
+size_t GetModelTensorCount(const Model* model);
+
 }  // namespace testing
 }  // namespace tflite