From d8881eb71d3bb4e1a8a2358cad848b11d664b675 Mon Sep 17 00:00:00 2001
From: Nick Kreeger <kreeger@google.com>
Date: Thu, 11 Jun 2020 12:50:23 -0700
Subject: [PATCH] Add a memory threshold allocation test for the Keyword model.

This new test ensures that TF Micro does not regress current allocations (on x86-64 systems) for a canonical model. As RAM reduction changes are introduced, the values in this test can be updated from the console log of this test.

Current output for the keyword model:
Testing TestKeywordModelMemoryThreshold
[RecordingMicroAllocator] Arena allocation total 21440 bytes
[RecordingMicroAllocator] Arena allocation head 672 bytes
[RecordingMicroAllocator] Arena allocation tail 20768 bytes
[RecordingMicroAllocator] 'TfLiteTensor struct allocation' used 6048 bytes (requested 6048 bytes 54 times)
[RecordingMicroAllocator] 'TfLiteTensor quantization data allocations' used 2160 bytes (requested 2160 bytes 162 times)
[RecordingMicroAllocator] 'NodeAndRegistration struct allocation' used 1200 bytes (requested 1200 bytes 15 times)
[RecordingMicroAllocator] 'Operator runtime data allocation' used 148 bytes (requested 148 bytes 13 times)

PiperOrigin-RevId: 315958032
Change-Id: I226f6a01aa555970805388632559241a41ff8342
---
 tensorflow/lite/micro/BUILD                   |  14 +++
 tensorflow/lite/micro/benchmarks/BUILD        |   8 ++
 .../lite/micro/memory_arena_threshold_test.cc | 110 ++++++++++++++++++
 tensorflow/lite/micro/micro_allocator.cc      |   4 +-
 tensorflow/lite/micro/micro_allocator.h       |   2 +-
 tensorflow/lite/micro/micro_interpreter.h     |   4 +
 .../lite/micro/recording_micro_allocator.cc   |  47 +++++---
 .../lite/micro/recording_micro_allocator.h    |  17 +--
 .../micro/recording_micro_allocator_test.cc   |  12 +-
 .../lite/micro/recording_micro_interpreter.h  |  58 +++++++++
 .../recording_simple_memory_allocator.cc      |  18 +++
 .../micro/recording_simple_memory_allocator.h |   4 +
 tensorflow/lite/micro/tools/make/Makefile     |   4 +-
 13 files changed, 266 insertions(+), 36 deletions(-)
 create mode 100644 tensorflow/lite/micro/memory_arena_threshold_test.cc
 create mode 100644 tensorflow/lite/micro/recording_micro_interpreter.h

diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD
index 41f3fde803b..86ceef5a7f9 100644
--- a/tensorflow/lite/micro/BUILD
+++ b/tensorflow/lite/micro/BUILD
@@ -205,6 +205,7 @@ cc_library(
     ],
     hdrs = [
         "recording_micro_allocator.h",
+        "recording_micro_interpreter.h",
         "recording_simple_memory_allocator.h",
     ],
     build_for_embedded = True,
@@ -361,3 +362,16 @@ tflite_micro_cc_test(
         "//tensorflow/lite/micro/testing:micro_test",
     ],
 )
+
+tflite_micro_cc_test(
+    name = "memory_arena_threshold_test",
+    srcs = [
+        "memory_arena_threshold_test.cc",
+    ],
+    deps = [
+        ":op_resolvers",
+        ":recording_allocators",
+        "//tensorflow/lite/micro/benchmarks:keyword_scrambled_model_data",
+        "//tensorflow/lite/micro/testing:micro_test",
+    ],
+)
diff --git a/tensorflow/lite/micro/benchmarks/BUILD b/tensorflow/lite/micro/benchmarks/BUILD
index cdd394e8bcb..637f7dd17f5 100644
--- a/tensorflow/lite/micro/benchmarks/BUILD
+++ b/tensorflow/lite/micro/benchmarks/BUILD
@@ -1,5 +1,10 @@
 licenses(["notice"])  # Apache 2.0
 
+package_group(
+    name = "micro_top_level",
+    packages = ["//tensorflow/lite/micro"],
+)
+
 cc_library(
     name = "micro_benchmark",
     hdrs = [
@@ -47,6 +52,9 @@ cc_library(
     hdrs = [
         "keyword_scrambled_model_data.h",
     ],
+    visibility = [
+        ":micro_top_level",
+    ],
 )
 
 cc_binary(
diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc
new file mode 100644
index 00000000000..3f0bbe9ad32
--- /dev/null
+++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc
@@ -0,0 +1,110 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stdint.h>
+
+#include "tensorflow/lite/micro/all_ops_resolver.h"
+#include "tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.h"
+#include "tensorflow/lite/micro/recording_micro_allocator.h"
+#include "tensorflow/lite/micro/recording_micro_interpreter.h"
+#include "tensorflow/lite/micro/testing/micro_test.h"
+
+/**
+ * Tests to ensure arena memory allocation does not regress by more than 3%.
+ */
+
+namespace {
+
+// Ensure memory doesn't expand more that 3%:
+constexpr float kAllocationThreshold = 0.03;
+const bool kIs64BitSystem = sizeof(void*) == 8;
+
+constexpr int kKeywordTensorArenaSize = 22 * 1024;
+uint8_t tensor_arena[kKeywordTensorArenaSize];
+
+constexpr int kKeywordModelTensorCount = 54;
+constexpr int kKeywordModelNodeAndRegistrationCount = 15;
+
+// NOTE: These values are measured on x86-64:
+// TODO(b/158651472): Consider auditing these values on non-64 bit systems.
+constexpr int kKeywordModelTotalSize = 21440;
+constexpr int kKeywordModelHeadSize = 672;
+constexpr int kKeywordModelTailSize = 20768;
+constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 2160;
+constexpr int kKeywordModelOpRuntimeDataSize = 148;
+
+void EnsureAllocatedSizeThreshold(size_t actual, size_t expected) {
+  // TODO(b/158651472): Better auditing of non-64 bit systems:
+  if (kIs64BitSystem) {
+    // 64-bit systems should check floor and ceiling to catch memory savings:
+    TF_LITE_MICRO_EXPECT_NEAR(actual, expected, kAllocationThreshold);
+  } else {
+    // Non-64 bit systems should just expect allocation does not exceed the
+    // ceiling:
+    TF_LITE_MICRO_EXPECT_LE(actual, expected + expected * kAllocationThreshold);
+  }
+}
+
+}  // namespace
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestKeywordModelMemoryThreshold) {
+  tflite::AllOpsResolver all_ops_resolver;
+  tflite::RecordingMicroInterpreter interpreter(
+      tflite::GetModel(g_keyword_scrambled_model_data), &all_ops_resolver,
+      tensor_arena, kKeywordTensorArenaSize, micro_test::reporter);
+
+  interpreter.AllocateTensors();
+
+  const tflite::RecordingMicroAllocator& allocator =
+      interpreter.GetMicroAllocator();
+  allocator.PrintAllocations();
+
+  EnsureAllocatedSizeThreshold(
+      allocator.GetSimpleMemoryAllocator()->GetUsedBytes(),
+      kKeywordModelTotalSize);
+  EnsureAllocatedSizeThreshold(
+      allocator.GetSimpleMemoryAllocator()->GetHeadUsedBytes(),
+      kKeywordModelHeadSize);
+  EnsureAllocatedSizeThreshold(
+      allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(),
+      kKeywordModelTailSize);
+  EnsureAllocatedSizeThreshold(
+      allocator
+          .GetRecordedAllocation(
+              tflite::RecordedAllocationType::kTfLiteTensorArray)
+          .used_bytes,
+      sizeof(TfLiteTensor) * kKeywordModelTensorCount);
+  EnsureAllocatedSizeThreshold(
+      allocator
+          .GetRecordedAllocation(tflite::RecordedAllocationType::
+                                     kTfLiteTensorArrayQuantizationData)
+          .used_bytes,
+      kKeywordModelTfLiteTensorQuantizationDataSize);
+  EnsureAllocatedSizeThreshold(
+      allocator
+          .GetRecordedAllocation(
+              tflite::RecordedAllocationType::kNodeAndRegistrationArray)
+          .used_bytes,
+      sizeof(tflite::NodeAndRegistration) *
+          kKeywordModelNodeAndRegistrationCount);
+  EnsureAllocatedSizeThreshold(
+      allocator.GetRecordedAllocation(tflite::RecordedAllocationType::kOpData)
+          .used_bytes,
+      kKeywordModelOpRuntimeDataSize);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc
index 13838ae66a6..be0cd9c436b 100644
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@@ -718,7 +718,9 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
   return kTfLiteOk;
 }
 
-ErrorReporter* MicroAllocator::error_reporter() { return error_reporter_; }
+ErrorReporter* MicroAllocator::error_reporter() const {
+  return error_reporter_;
+}
 
 TfLiteStatus MicroAllocator::InitGraphAndContextTensorData(
     const Model* model, TfLiteContext* context, const SubGraph* subgraph) {
diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h
index 7fc091196a5..de1e4c715cb 100644
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@@ -172,7 +172,7 @@ class MicroAllocator {
       const MicroOpResolver& op_resolver,
       NodeAndRegistration* node_and_registrations);
 
-  ErrorReporter* error_reporter();
+  ErrorReporter* error_reporter() const;
 
  private:
   // Initializes the graph and allocates TfLiteContext tensor data.
diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h
index bffaca9a292..34358f051d3 100644
--- a/tensorflow/lite/micro/micro_interpreter.h
+++ b/tensorflow/lite/micro/micro_interpreter.h
@@ -159,6 +159,10 @@ class MicroInterpreter {
   // arena_used_bytes() + 16.
   size_t arena_used_bytes() const { return allocator_.used_bytes(); }
 
+ protected:
+  const MicroAllocator& allocator() const { return allocator_; }
+  const TfLiteContext& context() const { return context_; }
+
  private:
   // TODO(b/158263161): Consider switching to Create() function to enable better
   // error reporting during initialization.
diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc
index 7cf38d1df98..f3a246747d1 100644
--- a/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator.cc
@@ -29,18 +29,23 @@ RecordingMicroAllocator::RecordingMicroAllocator(
       recording_memory_allocator_(recording_memory_allocator) {}
 
 RecordingMicroAllocator* RecordingMicroAllocator::Create(
-    RecordingSimpleMemoryAllocator* memory_allocator,
-    ErrorReporter* error_reporter) {
-  TFLITE_DCHECK(memory_allocator != nullptr);
-  uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
+    uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) {
+  TFLITE_DCHECK(error_reporter != nullptr);
+
+  RecordingSimpleMemoryAllocator* simple_memory_allocator =
+      RecordingSimpleMemoryAllocator::Create(error_reporter, tensor_arena,
+                                             arena_size);
+  TFLITE_DCHECK(simple_memory_allocator != nullptr);
+
+  uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail(
       sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
   RecordingMicroAllocator* allocator = new (allocator_buffer)
-      RecordingMicroAllocator(memory_allocator, error_reporter);
+      RecordingMicroAllocator(simple_memory_allocator, error_reporter);
   return allocator;
 }
 
 RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
-    RecordedAllocationType allocation_type) {
+    RecordedAllocationType allocation_type) const {
   switch (allocation_type) {
     case RecordedAllocationType::kTfLiteTensorArray:
       return recorded_tflite_tensor_array_data_;
@@ -56,7 +61,12 @@ RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
   return RecordedAllocation();
 }
 
-void RecordingMicroAllocator::PrintAllocations() {
+const RecordingSimpleMemoryAllocator*
+RecordingMicroAllocator::GetSimpleMemoryAllocator() const {
+  return recording_memory_allocator_;
+}
+
+void RecordingMicroAllocator::PrintAllocations() const {
   TF_LITE_REPORT_ERROR(
       error_reporter(),
       "[RecordingMicroAllocator] Arena allocation total %d bytes",
@@ -70,24 +80,27 @@ void RecordingMicroAllocator::PrintAllocations() {
       "[RecordingMicroAllocator] Arena allocation tail %d bytes",
       recording_memory_allocator_->GetTailUsedBytes());
   PrintRecordedAllocation(RecordedAllocationType::kTfLiteTensorArray,
-                          "TfLiteTensor struct allocation");
+                          "TfLiteTensor struct allocation", "tensors");
   PrintRecordedAllocation(
       RecordedAllocationType::kTfLiteTensorArrayQuantizationData,
-      "TfLiteTensor quantization data allocations");
+      "TfLiteTensor quantization data allocations", "allocations");
   PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray,
-                          "NodeAndRegistration struct allocation");
+                          "NodeAndRegistration struct allocation",
+                          "NodeAndRegistration structs");
   PrintRecordedAllocation(RecordedAllocationType::kOpData,
-                          "Operator runtime data allocation");
+                          "Operator runtime data allocation", "OpData structs");
 }
 
 void RecordingMicroAllocator::PrintRecordedAllocation(
-    RecordedAllocationType allocation_type, const char* allocation_name) {
+    RecordedAllocationType allocation_type, const char* allocation_name,
+    const char* allocation_description) const {
   RecordedAllocation allocation = GetRecordedAllocation(allocation_type);
-  TF_LITE_REPORT_ERROR(error_reporter(),
-                       "[RecordingMicroAllocator] '%s' used %d bytes "
-                       "(requested %d bytes %d times)",
-                       allocation_name, allocation.used_bytes,
-                       allocation.requested_bytes, allocation.count);
+  TF_LITE_REPORT_ERROR(
+      error_reporter(),
+      "[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead "
+      "(requested %d bytes for %d %s)",
+      allocation_name, allocation.used_bytes, allocation.requested_bytes,
+      allocation.count, allocation_description);
 }
 
 TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray(
diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h
index 25ff82d8dbd..6c33331f05b 100644
--- a/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/tensorflow/lite/micro/recording_micro_allocator.h
@@ -51,17 +51,19 @@ typedef struct RecordedAllocation {
 // auditing memory usage or integration testing.
 class RecordingMicroAllocator : public MicroAllocator {
  public:
-  static RecordingMicroAllocator* Create(
-      RecordingSimpleMemoryAllocator* memory_allocator,
-      ErrorReporter* error_reporter);
+  static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
+                                         size_t arena_size,
+                                         ErrorReporter* error_reporter);
 
   // Returns the recorded allocations information for a given allocation type.
   RecordedAllocation GetRecordedAllocation(
-      RecordedAllocationType allocation_type);
+      RecordedAllocationType allocation_type) const;
+
+  const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
 
   // Logs out through the ErrorReporter all allocation recordings by type
   // defined in RecordedAllocationType.
-  void PrintAllocations();
+  void PrintAllocations() const;
 
  protected:
   TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context,
@@ -85,9 +87,10 @@ class RecordingMicroAllocator : public MicroAllocator {
                           ErrorReporter* error_reporter);
 
   void PrintRecordedAllocation(RecordedAllocationType allocation_type,
-                               const char* allocation_name);
+                               const char* allocation_name,
+                               const char* allocation_description) const;
 
-  RecordingSimpleMemoryAllocator* recording_memory_allocator_;
+  const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
 
   RecordedAllocation recorded_tflite_tensor_array_data_;
   RecordedAllocation recorded_tflite_tensor_array_quantization_data_;
diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc
index b256cc9a86b..9bbe0f405d4 100644
--- a/tensorflow/lite/micro/recording_micro_allocator_test.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc
@@ -39,11 +39,9 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) {
   tflite::NodeAndRegistration* node_and_registration;
   const tflite::Model* model = tflite::GetModel(kTestConvModelData);
   uint8_t arena[kTestConvArenaSize];
-  tflite::RecordingSimpleMemoryAllocator memory_allocator(
-      micro_test::reporter, arena, kTestConvArenaSize);
 
   tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(&memory_allocator,
+      tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize,
                                               micro_test::reporter);
   TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);
   TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation(
@@ -68,11 +66,9 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) {
   tflite::NodeAndRegistration* node_and_registration;
   const tflite::Model* model = tflite::GetModel(kTestConvModelData);
   uint8_t arena[kTestConvArenaSize];
-  tflite::RecordingSimpleMemoryAllocator memory_allocator(
-      micro_test::reporter, arena, kTestConvArenaSize);
 
   tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(&memory_allocator,
+      tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize,
                                               micro_test::reporter);
   TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);
   TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation(
@@ -126,11 +122,9 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) {
   tflite::NodeAndRegistration* node_and_registration;
   const tflite::Model* model = tflite::GetModel(kTestConvModelData);
   uint8_t arena[kTestConvArenaSize];
-  tflite::RecordingSimpleMemoryAllocator memory_allocator(
-      micro_test::reporter, arena, kTestConvArenaSize);
 
   tflite::RecordingMicroAllocator* micro_allocator =
-      tflite::RecordingMicroAllocator::Create(&memory_allocator,
+      tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize,
                                               micro_test::reporter);
   TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator);
   TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation(
diff --git a/tensorflow/lite/micro/recording_micro_interpreter.h b/tensorflow/lite/micro/recording_micro_interpreter.h
new file mode 100644
index 00000000000..dcb0b431f29
--- /dev/null
+++ b/tensorflow/lite/micro/recording_micro_interpreter.h
@@ -0,0 +1,58 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
+#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
+
+#include "tensorflow/lite/micro/micro_interpreter.h"
+#include "tensorflow/lite/micro/recording_micro_allocator.h"
+
+namespace tflite {
+
+// Utility subclass that enables internal recordings of the MicroInterpreter.
+// This class should be used to audit and analyze memory arena usage for a given
+// model and interpreter.
+//
+// After construction and the first Invoke() or AllocateTensors() call - the
+// memory usage is recorded and available through the GetMicroAllocator()
+// function. See RecordingMicroAlloctor for more details on what is currently
+// recorded from arena allocations.
+//
+// It is recommended for users to increase the tensor arena size by at least 1kb
+// to ensure enough additional memory is available for internal recordings.
+class RecordingMicroInterpreter : public MicroInterpreter {
+ public:
+  RecordingMicroInterpreter(const Model* model,
+                            const MicroOpResolver* op_resolver,
+                            uint8_t* tensor_arena, size_t tensor_arena_size,
+                            ErrorReporter* error_reporter)
+      : MicroInterpreter(model, op_resolver,
+                         RecordingMicroAllocator::Create(
+                             tensor_arena, tensor_arena_size, error_reporter),
+                         error_reporter),
+        recording_micro_allocator_(
+            static_cast<const RecordingMicroAllocator&>(allocator())) {}
+
+  const RecordingMicroAllocator& GetMicroAllocator() const {
+    return recording_micro_allocator_;
+  }
+
+ private:
+  const RecordingMicroAllocator& recording_micro_allocator_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.cc b/tensorflow/lite/micro/recording_simple_memory_allocator.cc
index 934fa260e30..5e7eb5754e7 100644
--- a/tensorflow/lite/micro/recording_simple_memory_allocator.cc
+++ b/tensorflow/lite/micro/recording_simple_memory_allocator.cc
@@ -15,6 +15,10 @@ limitations under the License.
 
 #include "tensorflow/lite/micro/recording_simple_memory_allocator.h"
 
+#include <new>
+
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+
 namespace tflite {
 
 RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator(
@@ -26,6 +30,20 @@ RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator(
 
 RecordingSimpleMemoryAllocator::~RecordingSimpleMemoryAllocator() {}
 
+RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create(
+    ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
+  TFLITE_DCHECK(error_reporter != nullptr);
+  TFLITE_DCHECK(buffer_head != nullptr);
+  RecordingSimpleMemoryAllocator tmp =
+      RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
+
+  uint8_t* allocator_buffer =
+      tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator),
+                           alignof(RecordingSimpleMemoryAllocator));
+  // Use the default copy constructor to populate internal states.
+  return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp);
+}
+
 size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const {
   return requested_bytes_;
 }
diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.h b/tensorflow/lite/micro/recording_simple_memory_allocator.h
index 77edadb35be..270d9543404 100644
--- a/tensorflow/lite/micro/recording_simple_memory_allocator.h
+++ b/tensorflow/lite/micro/recording_simple_memory_allocator.h
@@ -32,6 +32,10 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
   // functions.
   ~RecordingSimpleMemoryAllocator() override;
 
+  static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
+                                                uint8_t* buffer_head,
+                                                size_t buffer_size);
+
   // Returns the number of bytes requested from the head or tail.
   size_t GetRequestedBytes() const;
 
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
index 93bda8d9139..8b6cba06a0b 100644
--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@@ -111,13 +111,14 @@ $(wildcard tensorflow/lite/micro/memory_planner/*test.cc)
 
 # TODO(b/152645559): move all benchmarks to benchmarks directory.
 MICROLITE_BENCHMARK_SRCS := \
-$(wildcard tensorflow/lite/micro/benchmarks/*.cc)
+$(wildcard tensorflow/lite/micro/benchmarks/*benchmark.cc)
 
 MICROLITE_TEST_HDRS := \
 $(wildcard tensorflow/lite/micro/testing/*.h)
 
 MICROLITE_CC_BASE_SRCS := \
 $(wildcard tensorflow/lite/micro/*.cc) \
+$(wildcard tensorflow/lite/micro/benchmarks/*model_data.cc) \
 $(wildcard tensorflow/lite/micro/kernels/*.cc) \
 $(wildcard tensorflow/lite/micro/memory_planner/*.cc) \
 $(wildcard tensorflow/lite/micro/testing/*model.cc) \
@@ -135,6 +136,7 @@ MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_BENCHMARK_SRCS), $(MICROLITE_CC_SR
 
 MICROLITE_CC_HDRS := \
 $(wildcard tensorflow/lite/micro/*.h) \
+$(wildcard tensorflow/lite/micro/benchmarks/*model_data.h) \
 $(wildcard tensorflow/lite/micro/kernels/*.h) \
 $(wildcard tensorflow/lite/micro/memory_planner/*.h) \
 LICENSE \