Track variable tensor buffer allocation in the "recording" MicroAllocator.

The RecordingMicroAllocator class currently doesn't track variable tensor allocations. This was noted why the measured allocations had a missing ~10kb of tail space unaccounted for in the keyword model. This change tracks variable tensor allocation for the keyword model (the test conv model does not have any variable tensors). Total and tail allocation creep up a bit here to handle the additional fields in RecordingMicroAllocator: TestKeywordModelMemoryThreshold: ------------------------------- [RecordingMicroAllocator] Arena allocation total 21472 bytes [RecordingMicroAllocator] Arena allocation head 672 bytes [RecordingMicroAllocator] Arena allocation tail 20800 bytes [RecordingMicroAllocator] 'TfLiteTensor struct' used 6048 bytes with alignment overhead (requested 6048 bytes for 54 tensors) [RecordingMicroAllocator] 'TfLiteTensor quantization data' used 2160 bytes with alignment overhead (requested 2160 bytes for 162 allocations) [RecordingMicroAllocator] 'TfLiteTensor variable buffer data' used 10240 bytes with alignment overhead (requested 10240 bytes for 7 allocations) [RecordingMicroAllocator] 'NodeAndRegistration struct' used 1200 bytes with alignment overhead (requested 1200 bytes for 15 NodeAndRegistration structs) [RecordingMicroAllocator] 'Operator runtime data' used 148 bytes with alignment overhead (requested 148 bytes for 13 OpData structs) TestConvModelMemoryThreshold: ----------------------------- [RecordingMicroAllocator] Arena allocation total 12128 bytes [RecordingMicroAllocator] Arena allocation head 7744 bytes [RecordingMicroAllocator] Arena allocation tail 4384 bytes [RecordingMicroAllocator] 'TfLiteTensor struct' used 1680 bytes with alignment overhead (requested 1680 bytes for 15 tensors) [RecordingMicroAllocator] 'TfLiteTensor quantization data' used 1216 bytes with alignment overhead (requested 1216 bytes for 36 allocations) [RecordingMicroAllocator] 'TfLiteTensor variable buffer data' used 0 bytes with alignment overhead (requested 0 bytes for 0 allocations) [RecordingMicroAllocator] 'NodeAndRegistration struct' used 560 bytes with alignment overhead (requested 560 bytes for 7 NodeAndRegistration structs) [RecordingMicroAllocator] 'Operator runtime data' used 136 bytes with alignment overhead (requested 136 bytes for 5 OpData structs) PiperOrigin-RevId: 316166016 Change-Id: I7d806f901b39e5d6a73c3baaf11d85fa7f6e17b1
2020-06-12 13:25:37 -07:00 · 2020-06-12 13:25:37 -07:00 · cd6a929e30
commit cd6a929e30
parent 3dd8cb721a
5 changed files with 68 additions and 33 deletions
--- a/tensorflow/lite/micro/memory_arena_threshold_test.cc
+++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc
@ -40,9 +40,10 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15;

 // NOTE: These values are measured on x86-64:
 // TODO(b/158651472): Consider auditing these values on non-64 bit systems.
-constexpr int kKeywordModelTotalSize = 21440;
+constexpr int kKeywordModelTotalSize = 21472;
 constexpr int kKeywordModelHeadSize = 672;
-constexpr int kKeywordModelTailSize = 20768;
+constexpr int kKeywordModelTailSize = 20800;
+constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240;
 constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 2160;
 constexpr int kKeywordModelOpRuntimeDataSize = 148;

@ -54,9 +55,9 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7;

 // NOTE: These values are measured on x86-64:
 // TODO(b/158651472): Consider auditing these values on non-64 bit systems.
-constexpr int kTestConvModelTotalSize = 12096;
+constexpr int kTestConvModelTotalSize = 12128;
 constexpr int kTestConvModelHeadSize = 7744;
-constexpr int kTestConvModelTailSize = 4352;
+constexpr int kTestConvModelTailSize = 4384;
 constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 1216;
 constexpr int kTestConvModelOpRuntimeDataSize = 136;

@ -66,6 +67,7 @@ struct ModelAllocationThresholds {
  size_t total_alloc_size = 0;
  size_t head_alloc_size = 0;
  size_t tail_alloc_size = 0;
+  size_t tensor_variable_buffer_data_size = 0;
  size_t tensor_quantization_data_size = 0;
  size_t op_runtime_data_size = 0;
 };
@ -102,6 +104,12 @@ void ValidateModelAllocationThresholds(
              tflite::RecordedAllocationType::kTfLiteTensorArray)
          .used_bytes,
      sizeof(TfLiteTensor) * thresholds.tensor_count);
+  EnsureAllocatedSizeThreshold(
+      allocator
+          .GetRecordedAllocation(
+              tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData)
+          .used_bytes,
+      thresholds.tensor_variable_buffer_data_size);
  EnsureAllocatedSizeThreshold(
      allocator
          .GetRecordedAllocation(tflite::RecordedAllocationType::
@ -141,6 +149,8 @@ TF_LITE_MICRO_TEST(TestKeywordModelMemoryThreshold) {
  thresholds.total_alloc_size = kKeywordModelTotalSize;
  thresholds.head_alloc_size = kKeywordModelHeadSize;
  thresholds.tail_alloc_size = kKeywordModelTailSize;
+  thresholds.tensor_variable_buffer_data_size =
+      kKeywordModelTfLiteTensorVariableBufferDataSize;
  thresholds.tensor_quantization_data_size =
      kKeywordModelTfLiteTensorQuantizationDataSize;
  thresholds.op_runtime_data_size = kKeywordModelOpRuntimeDataSize;
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@ -77,23 +77,6 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
  TF_LITE_REMOVE_VIRTUAL_DELETE
 };

-TfLiteStatus AllocateVariables(
-    const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* flatbuffer_tensors,
-    TfLiteTensor* runtime_tensors, SimpleMemoryAllocator* allocator) {
-  for (size_t i = 0; i < flatbuffer_tensors->size(); ++i) {
-    if (flatbuffer_tensors->Get(i)->is_variable()) {
-      runtime_tensors[i].data.data = allocator->AllocateFromTail(
-          runtime_tensors[i].bytes, kBufferAlignment);
-      // Allocation failure.
-      if (runtime_tensors[i].data.data == nullptr) {
-        return kTfLiteError;
-      }
-    }
-    tflite::ResetVariableTensor(&(runtime_tensors[i]));
-  }
-  return kTfLiteOk;
-}
-
 // A helper class to construct AllocationInfo array. This array contains the
 // lifetime of tensors / scratch_buffer and will be used to calculate the memory
 // plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
@ -508,9 +491,8 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model,
  const SubGraph* subgraph = GetSubGraphFromModel(model);
  TFLITE_DCHECK(subgraph != nullptr);

-  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(subgraph, context));
-  TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph->tensors(), context->tensors,
-                                          memory_allocator_));
+  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(context, subgraph));
+  TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph));

  model_is_allocating_ = false;
  return kTfLiteOk;
@ -715,6 +697,26 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
  return kTfLiteOk;
 }

+TfLiteStatus MicroAllocator::AllocateVariables(TfLiteContext* context,
+                                               const SubGraph* subgraph) {
+  for (size_t i = 0; i < context->tensors_size; ++i) {
+    if (subgraph->tensors()->Get(i)->is_variable()) {
+      context->tensors[i].data.data = memory_allocator_->AllocateFromTail(
+          context->tensors[i].bytes, kBufferAlignment);
+      // Allocation failure.
+      if (context->tensors[i].data.data == nullptr) {
+        TF_LITE_REPORT_ERROR(error_reporter_,
+                             "Failed to allocate variable tensor of size %d",
+                             context->tensors[i].bytes);
+        return kTfLiteError;
+      }
+    }
+    tflite::ResetVariableTensor(&(context->tensors[i]));
+  }
+
+  return kTfLiteOk;
+}
+
 ErrorReporter* MicroAllocator::error_reporter() const {
  return error_reporter_;
 }
@ -737,8 +739,8 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
  return (*subgraphs)[0];
 }

-TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const SubGraph* subgraph,
-                                                    TfLiteContext* context) {
+TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(TfLiteContext* context,
+                                                    const SubGraph* subgraph) {
  // Create static memory plan
  // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
  // 2. Add them into the planner (such as the GreedyMemoryPlanner).
--- a/tensorflow/lite/micro/micro_allocator.h
+++ b/tensorflow/lite/micro/micro_allocator.h
@ -172,6 +172,10 @@ class MicroAllocator {
      const MicroOpResolver& op_resolver,
      NodeAndRegistration* node_and_registrations);

+  // Allocates persistent tensor buffers for variable tensors in the subgraph.
+  virtual TfLiteStatus AllocateVariables(TfLiteContext* context,
+                                         const SubGraph* subgraph);
+
  ErrorReporter* error_reporter() const;

 private:
@ -185,8 +189,8 @@ class MicroAllocator {

  // Commits a memory plan for all non-persistent buffer allocations in the
  // 'head' section of the memory arena.
-  virtual TfLiteStatus CommitStaticMemoryPlan(const SubGraph* subgraph,
-                                              TfLiteContext* context);
+  virtual TfLiteStatus CommitStaticMemoryPlan(TfLiteContext* context,
+                                              const SubGraph* subgraph);

  // A simple memory allocator that always allocate from the arena tail or head.
  SimpleMemoryAllocator* memory_allocator_;
--- a/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator.cc
@ -51,6 +51,8 @@ RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
      return recorded_tflite_tensor_array_data_;
    case RecordedAllocationType::kTfLiteTensorArrayQuantizationData:
      return recorded_tflite_tensor_array_quantization_data_;
+    case RecordedAllocationType::kTfLiteTensorVariableBufferData:
+      return recorded_tflite_tensor_variable_buffer_data_;
    case RecordedAllocationType::kNodeAndRegistrationArray:
      return recorded_node_and_registration_array_data_;
    case RecordedAllocationType::kOpData:
@ -80,15 +82,18 @@ void RecordingMicroAllocator::PrintAllocations() const {
      "[RecordingMicroAllocator] Arena allocation tail %d bytes",
      recording_memory_allocator_->GetTailUsedBytes());
  PrintRecordedAllocation(RecordedAllocationType::kTfLiteTensorArray,
-                          "TfLiteTensor struct allocation", "tensors");
+                          "TfLiteTensor struct", "tensors");
  PrintRecordedAllocation(
      RecordedAllocationType::kTfLiteTensorArrayQuantizationData,
-      "TfLiteTensor quantization data allocations", "allocations");
+      "TfLiteTensor quantization data", "allocations");
+  PrintRecordedAllocation(
+      RecordedAllocationType::kTfLiteTensorVariableBufferData,
+      "TfLiteTensor variable buffer data", "allocations");
  PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray,
-                          "NodeAndRegistration struct allocation",
+                          "NodeAndRegistration struct",
                          "NodeAndRegistration structs");
  PrintRecordedAllocation(RecordedAllocationType::kOpData,
-                          "Operator runtime data allocation", "OpData structs");
+                          "Operator runtime data", "OpData structs");
 }

 void RecordingMicroAllocator::PrintRecordedAllocation(
@ -154,6 +159,16 @@ RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
  return status;
 }

+TfLiteStatus RecordingMicroAllocator::AllocateVariables(
+    TfLiteContext* context, const SubGraph* subgraph) {
+  SnapshotAllocationUsage(recorded_tflite_tensor_variable_buffer_data_);
+
+  TfLiteStatus status = MicroAllocator::AllocateVariables(context, subgraph);
+
+  RecordAllocationUsage(recorded_tflite_tensor_variable_buffer_data_);
+  return status;
+}
+
 void RecordingMicroAllocator::SnapshotAllocationUsage(
    RecordedAllocation& recorded_allocation) {
  recorded_allocation.requested_bytes =
--- a/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/tensorflow/lite/micro/recording_micro_allocator.h
@ -27,8 +27,9 @@ namespace tflite {
 enum class RecordedAllocationType {
  kTfLiteTensorArray,
  kTfLiteTensorArrayQuantizationData,
+  kTfLiteTensorVariableBufferData,
  kNodeAndRegistrationArray,
-  kOpData
+  kOpData,
 };

 // Container for holding information about allocation recordings by a given
@ -78,6 +79,8 @@ class RecordingMicroAllocator : public MicroAllocator {
      const Model* model, const SubGraph* subgraph,
      const MicroOpResolver& op_resolver,
      NodeAndRegistration* node_and_registrations) override;
+  TfLiteStatus AllocateVariables(TfLiteContext* context,
+                                 const SubGraph* subgraph) override;

  void SnapshotAllocationUsage(RecordedAllocation& recorded_allocation);
  void RecordAllocationUsage(RecordedAllocation& recorded_allocation);
@ -94,6 +97,7 @@ class RecordingMicroAllocator : public MicroAllocator {

  RecordedAllocation recorded_tflite_tensor_array_data_;
  RecordedAllocation recorded_tflite_tensor_array_quantization_data_;
+  RecordedAllocation recorded_tflite_tensor_variable_buffer_data_;
  RecordedAllocation recorded_node_and_registration_array_data_;
  RecordedAllocation recorded_op_data_;