diff --git a/tensorflow/lite/arena_planner.cc b/tensorflow/lite/arena_planner.cc index eb7133234c7..2df0bcfb3be 100644 --- a/tensorflow/lite/arena_planner.cc +++ b/tensorflow/lite/arena_planner.cc @@ -62,6 +62,21 @@ TfLiteStatus ArenaPlanner::ResetAllocations() { return kTfLiteOk; } +TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) { + for (int i = 0; i < static_cast(allocs_.size()); ++i) { + if (allocs_[i].node > node && allocs_[i].size > 0) { + TfLiteTensor& tensor = *graph_info_->tensor(i); + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i])); + allocs_[i].reset(); + tensor.data.raw = nullptr; + } + } + } + + return kTfLiteOk; +} + TfLiteStatus ArenaPlanner::PlanAllocations() { // Invalidate any existing data. TF_LITE_ENSURE_STATUS(ResetAllocations()); @@ -263,7 +278,8 @@ TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) { } // Handle the current item. if (alloc_info.type == AllocationInfo::ALLOC) { - TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor)); + TF_LITE_ENSURE_STATUS( + CalculateTensorAllocation(alloc_info.tensor, alloc_info.node)); } else { TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor)); } @@ -298,15 +314,18 @@ TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) { return kTfLiteOk; } -TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) { +TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index, + int node_index) { TfLiteTensor& tensor = *graph_info_->tensor(tensor_index); if (tensor.allocation_type == kTfLiteArenaRw) { - TF_LITE_ENSURE_STATUS(arena_.Allocate( - context_, tensor_alignment_, tensor.bytes, &allocs_[tensor_index])); + TF_LITE_ENSURE_STATUS(arena_.Allocate(context_, tensor_alignment_, + tensor.bytes, tensor_index, + node_index, &allocs_[tensor_index])); } if (tensor.allocation_type == kTfLiteArenaRwPersistent) { TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate( - context_, tensor_alignment_, tensor.bytes, &allocs_[tensor_index])); + context_, tensor_alignment_, tensor.bytes, tensor_index, node_index, + &allocs_[tensor_index])); } return kTfLiteOk; } @@ -326,7 +345,8 @@ TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors( TfLiteIntArray* node_temporaries = node.temporaries; for (int i = 0; i < node_temporaries->size; ++i) { int tensor_index = node_temporaries->data[i]; - TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index)); + TF_LITE_ENSURE_STATUS( + CalculateTensorAllocation(tensor_index, node_index)); } } return kTfLiteOk; diff --git a/tensorflow/lite/arena_planner.h b/tensorflow/lite/arena_planner.h index 5c08476fbb3..b1fc1df019d 100644 --- a/tensorflow/lite/arena_planner.h +++ b/tensorflow/lite/arena_planner.h @@ -45,11 +45,6 @@ struct AllocationInfo; // execution. Since dynamic tensors don't have sizes until after the // corresponding operation is executed, this class supports incremental // planning. -// -// TODO(b/127354079): Remove the constrain below when the issue is fixed. -// WARNING: MemoryPlanner's behavior must be deterministic. If the first N -// nodes are unchanged, it must produce exactly the same allocation plan for -// the first N nodes. class ArenaPlanner : public MemoryPlanner { public: // Ownership of 'context' is not taken and it must remain util the @@ -64,6 +59,7 @@ class ArenaPlanner : public MemoryPlanner { ArenaPlanner& operator=(const ArenaPlanner&) = delete; TfLiteStatus ResetAllocations() override; + TfLiteStatus ResetAllocationsAfter(int node) override; TfLiteStatus PlanAllocations() override; TfLiteStatus ExecuteAllocations(int first_node, int last_node) override; TfLiteStatus ReleaseNonPersistentMemory() override; @@ -87,7 +83,7 @@ class ArenaPlanner : public MemoryPlanner { TfLiteStatus ResolveTensorAllocation(int tensor_index); // Register an allocation for the given tensor. - TfLiteStatus CalculateTensorAllocation(int tensor_index); + TfLiteStatus CalculateTensorAllocation(int tensor_index, int node_index); // Register a deallocation for the given tensor. TfLiteStatus CalculateTensorDeallocation(int tensor_index); diff --git a/tensorflow/lite/arena_planner_test.cc b/tensorflow/lite/arena_planner_test.cc index 9fd85a2ac92..b38673fe787 100644 --- a/tensorflow/lite/arena_planner_test.cc +++ b/tensorflow/lite/arena_planner_test.cc @@ -190,6 +190,10 @@ class ArenaPlannerTest : public ::testing::Test { CHECK(planner_->AcquireNonPersistentMemory() == kTfLiteOk); } + void ResetAllocationsAfter(int node) { + CHECK(planner_->ResetAllocationsAfter(node) == kTfLiteOk); + } + bool HasNonPersistentMemory() { return planner_ && planner_->HasNonPersistentMemory(); } @@ -213,6 +217,11 @@ class ArenaPlannerTest : public ::testing::Test { return offset; } + // Returns if the given tensor is unallocated or not. + bool IsUnallocated(int tensor_index) { + return (*graph_->tensors())[tensor_index].data.raw == nullptr; + } + TfLiteContext context_; TestGraph* graph_; std::unique_ptr planner_; @@ -330,6 +339,37 @@ TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) { EXPECT_EQ(GetOffset(3), 0); } +TEST_F(ArenaPlannerTest, SimpleGraphWithResetAllocationsAfter) { + TestGraph graph({0, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4}, {5}}, // Second op, with temporary + {{4}, {3}, {}} // Third op + }, + {3}); + SetGraph(&graph); + Execute(0, 10); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(3), 0); + + // Reset allocations after the first node + ResetAllocationsAfter(0); + + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_TRUE(IsUnallocated(3)); + EXPECT_TRUE(IsUnallocated(4)); + EXPECT_TRUE(IsUnallocated(5)); +} + TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) { TestGraph graph({0, -1, 1}, { @@ -446,10 +486,6 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) { {10}); SetGraph(&graph); - auto is_unallocated = [&](int tensor_index) { - return (*graph.tensors())[tensor_index].data.raw == nullptr; - }; - // The allocation plan is made at the beginning and is independent of // the execution steps. Here's the allocation order: // Op0: +0 +1 +2 +3 @@ -463,13 +499,13 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) { EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); EXPECT_EQ(GetOffset(3), GetOffsetAfter(2)); - EXPECT_TRUE(is_unallocated(6)); - EXPECT_TRUE(is_unallocated(4)); - EXPECT_TRUE(is_unallocated(5)); - EXPECT_TRUE(is_unallocated(7)); - EXPECT_TRUE(is_unallocated(9)); - EXPECT_TRUE(is_unallocated(8)); - EXPECT_TRUE(is_unallocated(10)); + EXPECT_TRUE(IsUnallocated(6)); + EXPECT_TRUE(IsUnallocated(4)); + EXPECT_TRUE(IsUnallocated(5)); + EXPECT_TRUE(IsUnallocated(7)); + EXPECT_TRUE(IsUnallocated(9)); + EXPECT_TRUE(IsUnallocated(8)); + EXPECT_TRUE(IsUnallocated(10)); Execute(1, 1); EXPECT_EQ(GetOffset(0), 0); @@ -479,10 +515,10 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) { EXPECT_EQ(GetOffset(6), GetOffsetAfter(3)); EXPECT_EQ(GetOffset(4), GetOffsetAfter(6)); EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); - EXPECT_TRUE(is_unallocated(7)); - EXPECT_TRUE(is_unallocated(9)); - EXPECT_TRUE(is_unallocated(8)); - EXPECT_TRUE(is_unallocated(10)); + EXPECT_TRUE(IsUnallocated(7)); + EXPECT_TRUE(IsUnallocated(9)); + EXPECT_TRUE(IsUnallocated(8)); + EXPECT_TRUE(IsUnallocated(10)); Execute(2, 2); EXPECT_EQ(GetOffset(0), 0); @@ -496,9 +532,9 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) { // its deallocation freed up 24 bytes due to the alignment requirements in // the arena. That means we can fit #7 in the same space! EXPECT_EQ(GetOffset(7), GetOffsetAfter(3)); - EXPECT_TRUE(is_unallocated(9)); - EXPECT_TRUE(is_unallocated(8)); - EXPECT_TRUE(is_unallocated(10)); + EXPECT_TRUE(IsUnallocated(9)); + EXPECT_TRUE(IsUnallocated(8)); + EXPECT_TRUE(IsUnallocated(10)); Execute(3, 3); EXPECT_EQ(GetOffset(0), 0); @@ -513,7 +549,7 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) { // for #9, so it goes at the end. EXPECT_EQ(GetOffset(9), GetOffsetAfter(5)); EXPECT_EQ(GetOffset(8), GetOffsetAfter(9)); - EXPECT_TRUE(is_unallocated(10)); + EXPECT_TRUE(IsUnallocated(10)); Execute(4, 4); EXPECT_EQ(GetOffset(0), 0); diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index ae6df9acf6f..0f94ca0ae3c 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -920,16 +920,13 @@ TfLiteStatus Subgraph::Invoke() { // This happens when an intermediate dynamic tensor is resized. // We don't have to prepare all the ops, but we need to recompute // the allocation plan. - // - // This is a workaround for b/127354079. It relies on the property that - // ArenaPlanner's behavior is deterministic. A better solution is being - // able to "Rewind" to a specific index in ArenaPlanner. - // TODO(b/127354079): Improve ArenaPlanner and remove this mechanism. if (next_execution_plan_index_to_plan_allocation_ > next_execution_plan_index_to_prepare_) { - next_execution_plan_index_to_plan_allocation_ = 0; + next_execution_plan_index_to_plan_allocation_ = + next_execution_plan_index_to_prepare_; if (memory_planner_) { - TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations()); + TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocationsAfter( + next_execution_plan_index_to_plan_allocation_ - 1)); } } } diff --git a/tensorflow/lite/memory_planner.h b/tensorflow/lite/memory_planner.h index ba3cc5cabfe..e4b6aee26b1 100644 --- a/tensorflow/lite/memory_planner.h +++ b/tensorflow/lite/memory_planner.h @@ -21,11 +21,6 @@ namespace tflite { // A MemoryPlanner is responsible for planning and executing a number of // memory-related operations that are necessary in TF Lite. -// -// TODO(b/127354079): Remove the constrain below when the issue is fixed. -// WARNING: MemoryPlanner's behavior must be deterministic. If the first N -// nodes are unchanged, it must produce exactly the same allocation plan for -// the first N nodes. class MemoryPlanner { public: virtual ~MemoryPlanner() {} @@ -44,6 +39,9 @@ class MemoryPlanner { // ExecuteAllocations() is called. virtual TfLiteStatus ResetAllocations() = 0; + // Invalidates allocations after the given node execution. + virtual TfLiteStatus ResetAllocationsAfter(int node) = 0; + // NOTE: The following two methods modify the data pointers for all tensors on // the non-persistent arena (inputs, outputs, intermediates). If the user has // manually set the pointers for any of these, they would need to be set diff --git a/tensorflow/lite/simple_memory_arena.cc b/tensorflow/lite/simple_memory_arena.cc index 4724e816396..fd192bed138 100644 --- a/tensorflow/lite/simple_memory_arena.cc +++ b/tensorflow/lite/simple_memory_arena.cc @@ -34,12 +34,14 @@ namespace tflite { TfLiteStatus SimpleMemoryArena::Allocate(TfLiteContext* context, size_t alignment, size_t size, + int32_t tensor, int32_t node, ArenaAlloc* new_alloc) { TF_LITE_ENSURE(context, alignment <= arena_alignment_); - + new_alloc->tensor = tensor; + new_alloc->node = node; + new_alloc->size = size; if (size == 0) { new_alloc->offset = 0; - new_alloc->size = 0; return kTfLiteOk; } @@ -74,7 +76,6 @@ TfLiteStatus SimpleMemoryArena::Allocate(TfLiteContext* context, high_water_mark_ = std::max(high_water_mark_, best_offset + size); new_alloc->offset = best_offset; - new_alloc->size = size; allocs_.insert(best_insertion_it, *new_alloc); return kTfLiteOk; @@ -89,15 +90,14 @@ TfLiteStatus SimpleMemoryArena::Deallocate(TfLiteContext* context, int erased_allocs_count = 0; auto it = allocs_.begin(); while (it != allocs_.end()) { - if (it->offset == alloc.offset) { - TF_LITE_ENSURE_EQ(context, it->size, alloc.size); + if (it->tensor == alloc.tensor) { erased_allocs_count++; it = allocs_.erase(it); } else { ++it; } } - TF_LITE_ENSURE_EQ(context, erased_allocs_count, 1); + TF_LITE_ENSURE(context, erased_allocs_count <= 1); return kTfLiteOk; } diff --git a/tensorflow/lite/simple_memory_arena.h b/tensorflow/lite/simple_memory_arena.h index 761b1cb78f6..f8127d78719 100644 --- a/tensorflow/lite/simple_memory_arena.h +++ b/tensorflow/lite/simple_memory_arena.h @@ -28,10 +28,19 @@ namespace tflite { // underlying buffer is set, the alloc can be resolved into an actual memory // pointer. struct ArenaAlloc { - ArenaAlloc() : offset(0), size(0) {} + ArenaAlloc() { reset(); } size_t offset; size_t size; + int32_t tensor; + int32_t node; + + inline void reset() { + offset = 0; + size = 0; + tensor = -1; + node = -1; + } inline bool operator<(const ArenaAlloc& other) const { return offset < other.offset; @@ -53,7 +62,7 @@ class SimpleMemoryArena { allocs_() {} TfLiteStatus Allocate(TfLiteContext* context, size_t alignment, size_t size, - ArenaAlloc* new_alloc); + int32_t tensor, int32_t node, ArenaAlloc* new_alloc); TfLiteStatus Deallocate(TfLiteContext* context, const ArenaAlloc& alloc); diff --git a/tensorflow/lite/simple_memory_arena_test.cc b/tensorflow/lite/simple_memory_arena_test.cc index c1ee936d900..1d56bc37598 100644 --- a/tensorflow/lite/simple_memory_arena_test.cc +++ b/tensorflow/lite/simple_memory_arena_test.cc @@ -29,14 +29,14 @@ TEST(SimpleMemoryArenaTest, BasicArenaOperations) { SimpleMemoryArena arena(64); ArenaAlloc allocs[6]; - arena.Allocate(&context, 32, 2047, &allocs[0]); - arena.Allocate(&context, 32, 2047, &allocs[1]); - arena.Allocate(&context, 32, 2047, &allocs[2]); + arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]); + arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]); + arena.Allocate(&context, 32, 2047, 2, 3, &allocs[2]); arena.Deallocate(&context, allocs[0]); - arena.Allocate(&context, 32, 1023, &allocs[3]); - arena.Allocate(&context, 32, 2047, &allocs[4]); + arena.Allocate(&context, 32, 1023, 3, 4, &allocs[3]); + arena.Allocate(&context, 32, 2047, 4, 5, &allocs[4]); arena.Deallocate(&context, allocs[1]); - arena.Allocate(&context, 32, 1023, &allocs[5]); + arena.Allocate(&context, 32, 1023, 5, 6, &allocs[5]); EXPECT_EQ(allocs[0].offset, 0); EXPECT_EQ(allocs[1].offset, 2048); @@ -52,7 +52,7 @@ TEST(SimpleMemoryArenaTest, BasicZeroAlloc) { ArenaAlloc alloc; // Zero-sized allocs should have a 0 offset and size. - ASSERT_EQ(arena.Allocate(&context, 32, 0, &alloc), kTfLiteOk); + ASSERT_EQ(arena.Allocate(&context, 32, 0, 0, 1, &alloc), kTfLiteOk); EXPECT_EQ(alloc.offset, 0); EXPECT_EQ(alloc.size, 0); @@ -73,12 +73,12 @@ TEST(SimpleMemoryArenaTest, InterleavedZeroAlloc) { ArenaAlloc allocs[4]; // Interleave some zero and non-zero-sized allocations and deallocations. - ASSERT_EQ(arena.Allocate(&context, 32, 2047, &allocs[0]), kTfLiteOk); - ASSERT_EQ(arena.Allocate(&context, 32, 0, &allocs[1]), kTfLiteOk); - ASSERT_EQ(arena.Allocate(&context, 32, 1023, &allocs[2]), kTfLiteOk); + ASSERT_EQ(arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]), kTfLiteOk); + ASSERT_EQ(arena.Allocate(&context, 32, 0, 1, 2, &allocs[1]), kTfLiteOk); + ASSERT_EQ(arena.Allocate(&context, 32, 1023, 2, 3, &allocs[2]), kTfLiteOk); ASSERT_EQ(arena.Deallocate(&context, allocs[1]), kTfLiteOk); ASSERT_EQ(arena.Deallocate(&context, allocs[2]), kTfLiteOk); - ASSERT_EQ(arena.Allocate(&context, 32, 2047, &allocs[3]), kTfLiteOk); + ASSERT_EQ(arena.Allocate(&context, 32, 2047, 3, 4, &allocs[3]), kTfLiteOk); // Deallocation of a zero-sized alloc should not impact the allocator offsets. EXPECT_EQ(allocs[0].offset, 0); @@ -92,9 +92,9 @@ TEST(SimpleMemoryArenaTest, TestClearPlan) { SimpleMemoryArena arena(64); ArenaAlloc allocs[9]; - arena.Allocate(&context, 32, 2047, &allocs[0]); - arena.Allocate(&context, 32, 2047, &allocs[1]); - arena.Allocate(&context, 32, 2047, &allocs[2]); + arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]); + arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]); + arena.Allocate(&context, 32, 2047, 2, 3, &allocs[2]); arena.Commit(&context); EXPECT_EQ(allocs[0].offset, 0); @@ -104,9 +104,9 @@ TEST(SimpleMemoryArenaTest, TestClearPlan) { arena.ClearPlan(); // Test with smaller allocs. - arena.Allocate(&context, 32, 1023, &allocs[3]); - arena.Allocate(&context, 32, 1023, &allocs[4]); - arena.Allocate(&context, 32, 1023, &allocs[5]); + arena.Allocate(&context, 32, 1023, 3, 1, &allocs[3]); + arena.Allocate(&context, 32, 1023, 4, 2, &allocs[4]); + arena.Allocate(&context, 32, 1023, 5, 3, &allocs[5]); arena.Commit(&context); EXPECT_EQ(allocs[3].offset, 0); @@ -116,9 +116,9 @@ TEST(SimpleMemoryArenaTest, TestClearPlan) { arena.ClearPlan(); // Test larger allocs which should require a reallocation. - arena.Allocate(&context, 32, 4095, &allocs[6]); - arena.Allocate(&context, 32, 4095, &allocs[7]); - arena.Allocate(&context, 32, 4095, &allocs[8]); + arena.Allocate(&context, 32, 4095, 6, 1, &allocs[6]); + arena.Allocate(&context, 32, 4095, 7, 2, &allocs[7]); + arena.Allocate(&context, 32, 4095, 8, 3, &allocs[8]); arena.Commit(&context); EXPECT_EQ(allocs[6].offset, 0); @@ -132,8 +132,8 @@ TEST(SimpleMemoryArenaTest, TestClearBuffer) { SimpleMemoryArena arena(64); ArenaAlloc allocs[9]; - arena.Allocate(&context, 32, 2047, &allocs[0]); - arena.Allocate(&context, 32, 2047, &allocs[1]); + arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]); + arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]); // Should be a no-op. ASSERT_EQ(arena.ReleaseBuffer(), kTfLiteOk); @@ -176,8 +176,8 @@ TEST_P(BufferAndPlanClearingTest, TestClearBufferAndClearPlan) { SimpleMemoryArena arena(64); ArenaAlloc allocs[9]; - arena.Allocate(&context, 32, 2047, &allocs[0]); - arena.Allocate(&context, 32, 2047, &allocs[1]); + arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]); + arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]); ASSERT_EQ(arena.Commit(&context), kTfLiteOk); @@ -195,8 +195,8 @@ TEST_P(BufferAndPlanClearingTest, TestClearBufferAndClearPlan) { ASSERT_NE(arena.ResolveAlloc(&context, allocs[0], &resolved_ptr), kTfLiteOk); // Re-allocate tensors & commit. - arena.Allocate(&context, 32, 2047, &allocs[0]); - arena.Allocate(&context, 32, 2047, &allocs[1]); + arena.Allocate(&context, 32, 2047, 0, 1, &allocs[0]); + arena.Allocate(&context, 32, 2047, 1, 2, &allocs[1]); ASSERT_EQ(arena.Commit(&context), kTfLiteOk); // Pointer-resolution now works.