diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index d43defb3e80..29ee380b460 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -57,6 +57,7 @@ cc_library( "memory_management/internal.h", "memory_management/min_cost_flow_assignment.h", "memory_management/naive_assignment.h", + "memory_management/types.h", ], deps = [ ":shape", diff --git a/tensorflow/lite/delegates/gpu/common/memory_management.cc b/tensorflow/lite/delegates/gpu/common/memory_management.cc index 6c7c7283c85..87ba4251aa9 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management.cc +++ b/tensorflow/lite/delegates/gpu/common/memory_management.cc @@ -23,12 +23,8 @@ limitations under the License. #include #include -#include "tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h" #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h" #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h" -#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h" -#include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h" -#include "tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { @@ -59,6 +55,19 @@ OffsetsAssignment ObjectsToOffsets( return result; } +Status BestGreedy(const std::vector>& usage_records, + ObjectsAssignment* assignment) { + RETURN_IF_ERROR( + GreedyBySizeDistPriorityAssignment(usage_records, assignment)); + ObjectsAssignment assignment_by_breadth; + if (GreedyByBreadthAssignment(usage_records, &assignment_by_breadth).ok() && + TotalSize(assignment_by_breadth) < TotalSize(*assignment)) { + std::swap(*assignment, assignment_by_breadth); + } + return OkStatus(); +} + +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment) { @@ -66,24 +75,15 @@ Status AssignObjectsToTensors( case MemoryStrategy::NAIVE: return NaiveAssignment(usage_records, assignment); case MemoryStrategy::EQUALITY: - return EqualityAssignment(usage_records, assignment); + return EqualityAssignmentWithHash(usage_records, assignment); case MemoryStrategy::GREEDY_IN_ORDER: return GreedyInOrderAssignment(usage_records, assignment); case MemoryStrategy::GREEDY_BY_BREADTH: return GreedyByBreadthAssignment(usage_records, assignment); case MemoryStrategy::GREEDY_BY_SIZE: return GreedyBySizeDistPriorityAssignment(usage_records, assignment); - case MemoryStrategy::GREEDY_BEST: { - RETURN_IF_ERROR( - GreedyBySizeDistPriorityAssignment(usage_records, assignment)); - ObjectsAssignment assignment_by_breadth; - if (GreedyByBreadthAssignment(usage_records, &assignment_by_breadth) - .ok() && - TotalSize(assignment_by_breadth) < TotalSize(*assignment)) { - std::swap(*assignment, assignment_by_breadth); - } - return OkStatus(); - } + case MemoryStrategy::GREEDY_BEST: + return BestGreedy(usage_records, assignment); case MemoryStrategy::MINCOSTFLOW: return MinCostFlowAssignment(usage_records, assignment); default: @@ -93,6 +93,7 @@ Status AssignObjectsToTensors( return OkStatus(); } +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment) { @@ -100,7 +101,7 @@ Status AssignObjectsToTensors( case MemoryStrategy::NAIVE: return NaiveAssignment(usage_records, assignment); case MemoryStrategy::EQUALITY: - return EqualityAssignment(usage_records, assignment); + return EqualityAssignmentWithHash(usage_records, assignment); default: return InternalError( "MemoryStrategy is not supported with current tensor size type."); @@ -108,12 +109,15 @@ Status AssignObjectsToTensors( return OkStatus(); } +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment) { switch (strategy) { case MemoryStrategy::NAIVE: return NaiveAssignment(usage_records, assignment); + case MemoryStrategy::EQUALITY: + return EqualityAssignment(usage_records, assignment); case MemoryStrategy::GREEDY_IN_ORDER: return GreedyInOrderAssignmentMultidimensional(usage_records, assignment); default: @@ -123,12 +127,15 @@ Status AssignObjectsToTensors( return OkStatus(); } +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment) { switch (strategy) { case MemoryStrategy::NAIVE: return NaiveAssignment(usage_records, assignment); + case MemoryStrategy::EQUALITY: + return EqualityAssignment(usage_records, assignment); case MemoryStrategy::GREEDY_IN_ORDER: return GreedyInOrderAssignmentMultidimensional(usage_records, assignment); default: diff --git a/tensorflow/lite/delegates/gpu/common/memory_management.h b/tensorflow/lite/delegates/gpu/common/memory_management.h index fb2e3f9eb01..53d7a170d90 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management.h @@ -21,6 +21,13 @@ limitations under the License. #include #include "absl/memory/memory.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" @@ -30,41 +37,6 @@ namespace gpu { using TaskId = size_t; -// Record, containing tensor size/shape and IDs of the first and the last task, -// that use this tensor as input or output. For example: tensor #3 with size -// tensor_size=65536 is first introduced in program #2 (first_task=2) and used -// for the last time in program #7 (last_task=7). -template -struct TensorUsageRecord { - TensorSizeT tensor_size; - TaskId first_task; - TaskId last_task; - - TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last) - : tensor_size(size), first_task(first), last_task(last) {} - - // Default order of tensor usage records is increasing order of first_task. - bool operator<(const TensorUsageRecord& other) const { - return first_task < other.first_task; - } -}; - -// Information about assignment of tensors to shared objects -template -struct ObjectsAssignment { - // shared_object_ids_[i] is ID of shared object, that tensor i will be using. - std::vector object_ids; - // shared_object_sizes_[i] is a size of shared object with ID equal to i. - std::vector object_sizes; -}; - -// Information about assignment of tensors to offsets for the case, when all of -// them are going to be allocated in one continuous memory block. -struct OffsetsAssignment { - std::vector offsets; - size_t total_size; -}; - // Converts given assignment of tensors to shared objects to the assignment of // the same tensors to offsets in continuous memory block. OffsetsAssignment ObjectsToOffsets( @@ -105,30 +77,46 @@ enum class MemoryStrategy { MINCOSTFLOW, }; +// Chooses greedy algorithm with the lowest memory consumption for given usage +// records and returns corresponding shared objects assignment. +Status BestGreedy(const std::vector>& usage_records, + ObjectsAssignment* assignment); + // Calculates the assignement of shared objects to given tensors, including -// objects' sizes. Initial tensor sizes are given as size_t. This function is -// intended to use with GPU buffers and one-dimensional textures. +// objects' sizes. Below there are specializations for different types, that +// support more memory strategies. +template +Status AssignObjectsToTensors( + const std::vector>& usage_records, + MemoryStrategy strategy, ObjectsAssignment* assignment) { + switch (strategy) { + case MemoryStrategy::NAIVE: + return NaiveAssignment(usage_records, assignment); + case MemoryStrategy::EQUALITY: + return EqualityAssignment(usage_records, assignment); + default: + return InternalError( + "MemoryStrategy is not supported with current tensor size type."); + } + return OkStatus(); +} + +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment); -// Calculates the assignement of shared objects to given tensors, including -// objects' sizes. Initial tensor sizes are given as BHWC. This function is -// intended to use with OpenCL textures. +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment); -// Calculates the assignement of shared objects to given tensors, including -// objects' sizes. Initial tensor sizes are given as uint2. This function is -// intended to use with OpenGL textures. +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment); -// Calculates the assignement of shared objects to given tensors, including -// objects' sizes. Initial tensor sizes are given as uint3. This function is -// intended to use with OpenGL textures. +template <> Status AssignObjectsToTensors( const std::vector>& usage_records, MemoryStrategy strategy, ObjectsAssignment* assignment); diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h b/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h index a5e6c3a85eb..0955393e00c 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h @@ -20,15 +20,16 @@ limitations under the License. #include #include "absl/container/flat_hash_map.h" -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { namespace gpu { +// Fast version of Equality Assignments for hashable types. template -Status EqualityAssignment( +Status EqualityAssignmentWithHash( const std::vector>& usage_records, ObjectsAssignment* assignment) { size_t num_records = usage_records.size(); @@ -50,7 +51,7 @@ Status EqualityAssignment( objects_in_use.pop(); } - TensorSizeT tensor_size = usage_records[i].tensor_size; + const TensorSizeT tensor_size = usage_records[i].tensor_size; auto pool_it = pool.find(tensor_size); if (pool_it == pool.end() || pool_it->second.empty()) { // No free shared object with size equal to tensor_size. Create a new one, @@ -71,6 +72,46 @@ Status EqualityAssignment( return OkStatus(); } +// Slower version of Equality Assignments for unhashable types. +template +Status EqualityAssignment( + const std::vector>& usage_records, + ObjectsAssignment* assignment) { + size_t num_records = usage_records.size(); + assignment->object_sizes.clear(); + assignment->object_ids.assign(num_records, kNotAssigned); + + // Index of operation, after execution of which the shared object can be + // deallocated. + std::vector dealloc_task; + for (size_t i = 0; i < num_records; ++i) { + const TensorSizeT tensor_size = usage_records[i].tensor_size; + size_t best_obj = kNotAssigned; + for (size_t obj = 0; obj < assignment->object_sizes.size(); ++obj) { + // Find a shared object, that has equal size with current tensor and has + // been deallocated before the execution of its first_task. + if (dealloc_task[obj] < usage_records[i].first_task && + assignment->object_sizes[obj] == tensor_size) { + best_obj = obj; + break; + } + } + if (best_obj == kNotAssigned) { + // No free shared object with size equal to tensor_size. Create a new one, + // assign i-th tensor to it and save its last task as deallocation task. + assignment->object_ids[i] = assignment->object_sizes.size(); + assignment->object_sizes.push_back(tensor_size); + dealloc_task.push_back(usage_records[i].last_task); + } else { + // Shared object with id it->second has size equal to tensor_size. Reuse + // this object and update its deallocation task. + assignment->object_ids[i] = best_obj; + dealloc_task[best_obj] = usage_records[i].last_task; + } + } + return OkStatus(); +} + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h index b073c505837..c139ba0fe0f 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h @@ -19,7 +19,7 @@ limitations under the License. #include #include -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h index ba77a83cfc8..2cb8ceee0e1 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h @@ -18,7 +18,7 @@ limitations under the License. #include -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h index 7acf81afd29..102171f783c 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h @@ -17,12 +17,13 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_ #include +#include #include #include #include -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/internal.h b/tensorflow/lite/delegates/gpu/common/memory_management/internal.h index 35050fd2b1d..58d9571d9f9 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/internal.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/internal.h @@ -22,7 +22,7 @@ limitations under the License. #include #include "absl/memory/memory.h" -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/types.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h b/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h index 494dbf9abb8..7e45f83c79e 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h @@ -18,7 +18,7 @@ limitations under the License. #include -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h b/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h index 0d637934974..94cd41ed9a5 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h +++ b/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h @@ -18,8 +18,8 @@ limitations under the License. #include -#include "tensorflow/lite/delegates/gpu/common/memory_management.h" #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { diff --git a/tensorflow/lite/delegates/gpu/common/memory_management/types.h b/tensorflow/lite/delegates/gpu/common/memory_management/types.h new file mode 100644 index 00000000000..079a14d1069 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/common/memory_management/types.h @@ -0,0 +1,66 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_ + +#include +#include +#include + +namespace tflite { +namespace gpu { + +using TaskId = size_t; + +// Record, containing tensor size/shape and IDs of the first and the last task, +// that use this tensor as input or output. For example: tensor #3 with size +// tensor_size=65536 is first introduced in program #2 (first_task=2) and used +// for the last time in program #7 (last_task=7). +template +struct TensorUsageRecord { + TensorSizeT tensor_size; + TaskId first_task; + TaskId last_task; + + TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last) + : tensor_size(size), first_task(first), last_task(last) {} + + // Default order of tensor usage records is increasing order of first_task. + bool operator<(const TensorUsageRecord& other) const { + return first_task < other.first_task; + } +}; + +// Information about assignment of tensors to shared objects +template +struct ObjectsAssignment { + // shared_object_ids_[i] is ID of shared object, that tensor i will be using. + std::vector object_ids; + // shared_object_sizes_[i] is a size of shared object with ID equal to i. + std::vector object_sizes; +}; + +// Information about assignment of tensors to offsets for the case, when all of +// them are going to be allocated in one continuous memory block. +struct OffsetsAssignment { + std::vector offsets; + size_t total_size; +}; + +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_ diff --git a/tensorflow/lite/delegates/gpu/common/memory_management_test.cc b/tensorflow/lite/delegates/gpu/common/memory_management_test.cc index 6b915e2caed..12f5b6ebe6c 100644 --- a/tensorflow/lite/delegates/gpu/common/memory_management_test.cc +++ b/tensorflow/lite/delegates/gpu/common/memory_management_test.cc @@ -317,6 +317,14 @@ TEST(Model, UInt2Records) { uint2(8, 2), uint2(2, 8), uint2(1, 8), uint2(2, 8), uint2(4, 1))); + ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::EQUALITY, + &assignment) + .ok()); + EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 2, 0, 3, 1, 4, 0, 5)); + EXPECT_THAT(assignment.object_sizes, + ElementsAre(uint2(2, 8), uint2(2, 8), uint2(1, 12), uint2(8, 2), + uint2(1, 8), uint2(4, 1))); + ASSERT_TRUE(AssignObjectsToTensors( usage_records, MemoryStrategy::GREEDY_IN_ORDER, &assignment) .ok()); @@ -347,6 +355,15 @@ TEST(Model, UInt3Records) { uint3(2, 4, 1), uint3(2, 2, 2), uint3(8, 1, 2), uint3(1, 2, 1), uint3(1, 1, 1), uint3(2, 2, 2))); + ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::EQUALITY, + &assignment) + .ok()); + EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 2, 3, 4, 5, 6, 2, 4)); + EXPECT_THAT(assignment.object_sizes, + ElementsAre(uint3(1, 2, 8), uint3(4, 3, 2), uint3(1, 1, 1), + uint3(2, 4, 1), uint3(2, 2, 2), uint3(8, 1, 2), + uint3(1, 2, 1))); + ASSERT_TRUE(AssignObjectsToTensors( usage_records, MemoryStrategy::GREEDY_IN_ORDER, &assignment) .ok()); diff --git a/tensorflow/lite/delegates/gpu/gl/runtime.cc b/tensorflow/lite/delegates/gpu/gl/runtime.cc index d3678864cae..14e30389cf0 100644 --- a/tensorflow/lite/delegates/gpu/gl/runtime.cc +++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" #include "tensorflow/lite/delegates/gpu/common/memory_management.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/types.h" #include "tensorflow/lite/delegates/gpu/gl/gl_call.h" diff --git a/tensorflow/lite/delegates/gpu/metal/inference_context.mm b/tensorflow/lite/delegates/gpu/metal/inference_context.mm index 2bbb5e01559..fb3a51f4694 100644 --- a/tensorflow/lite/delegates/gpu/metal/inference_context.mm +++ b/tensorflow/lite/delegates/gpu/metal/inference_context.mm @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/substitute.h" #include "tensorflow/lite/delegates/gpu/common/memory_management.h" +#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/status.h"