Create template version of AssignObjectsToTensors.

Add version of Equality memory management algorithm for unhashable types. PiperOrigin-RevId: 268586124
2019-09-11 18:19:17 -07:00 · 2019-09-11 18:19:17 -07:00 · da53d5960e
commit da53d5960e
parent 6cb5fb444c
14 changed files with 195 additions and 72 deletions
--- a/tensorflow/lite/delegates/gpu/common/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/BUILD
@ -57,6 +57,7 @@ cc_library(
        "memory_management/internal.h",
        "memory_management/min_cost_flow_assignment.h",
        "memory_management/naive_assignment.h",
        "memory_management/types.h",
    ],
    deps = [
        ":shape",
--- a/tensorflow/lite/delegates/gpu/common/memory_management.cc
+++ b/tensorflow/lite/delegates/gpu/common/memory_management.cc
@ -23,12 +23,8 @@ limitations under the License.
 #include <type_traits>
 #include <vector>
 #include "tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
@ -59,6 +55,19 @@ OffsetsAssignment ObjectsToOffsets(
  return result;
 }
 Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
                  ObjectsAssignment<size_t>* assignment) {
  RETURN_IF_ERROR(
      GreedyBySizeDistPriorityAssignment(usage_records, assignment));
  ObjectsAssignment<size_t> assignment_by_breadth;
  if (GreedyByBreadthAssignment(usage_records, &assignment_by_breadth).ok() &&
      TotalSize(assignment_by_breadth) < TotalSize(*assignment)) {
    std::swap(*assignment, assignment_by_breadth);
  }
  return OkStatus();
 }
 template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<size_t>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<size_t>* assignment) {
@ -66,24 +75,15 @@ Status AssignObjectsToTensors(
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
-      return EqualityAssignment(usage_records, assignment);
+      return EqualityAssignmentWithHash(usage_records, assignment);
    case MemoryStrategy::GREEDY_IN_ORDER:
      return GreedyInOrderAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_BY_BREADTH:
      return GreedyByBreadthAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_BY_SIZE:
      return GreedyBySizeDistPriorityAssignment(usage_records, assignment);
-    case MemoryStrategy::GREEDY_BEST: {
+    case MemoryStrategy::GREEDY_BEST:
-      RETURN_IF_ERROR(
+      return BestGreedy(usage_records, assignment);
          GreedyBySizeDistPriorityAssignment(usage_records, assignment));
      ObjectsAssignment<size_t> assignment_by_breadth;
      if (GreedyByBreadthAssignment(usage_records, &assignment_by_breadth)
              .ok() &&
          TotalSize(assignment_by_breadth) < TotalSize(*assignment)) {
        std::swap(*assignment, assignment_by_breadth);
      }
      return OkStatus();
    }
    case MemoryStrategy::MINCOSTFLOW:
      return MinCostFlowAssignment(usage_records, assignment);
    default:
@ -93,6 +93,7 @@ Status AssignObjectsToTensors(
  return OkStatus();
 }
 template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<BHWC>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<BHWC>* assignment) {
@ -100,7 +101,7 @@ Status AssignObjectsToTensors(
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
-      return EqualityAssignment(usage_records, assignment);
+      return EqualityAssignmentWithHash(usage_records, assignment);
    default:
      return InternalError(
          "MemoryStrategy is not supported with current tensor size type.");
@ -108,12 +109,15 @@ Status AssignObjectsToTensors(
  return OkStatus();
 }
 template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint2>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint2>* assignment) {
  switch (strategy) {
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
      return EqualityAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_IN_ORDER:
      return GreedyInOrderAssignmentMultidimensional(usage_records, assignment);
    default:
@ -123,12 +127,15 @@ Status AssignObjectsToTensors(
  return OkStatus();
 }
 template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint3>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment) {
  switch (strategy) {
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
      return EqualityAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_IN_ORDER:
      return GreedyInOrderAssignmentMultidimensional(usage_records, assignment);
    default:
--- a/tensorflow/lite/delegates/gpu/common/memory_management.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management.h
@ -21,6 +21,13 @@ limitations under the License.
 #include <vector>
 #include "absl/memory/memory.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
@ -30,41 +37,6 @@ namespace gpu {
 using TaskId = size_t;
 // Record, containing tensor size/shape and IDs of the first and the last task,
 // that use this tensor as input or output. For example: tensor #3 with size
 // tensor_size=65536 is first introduced in program #2 (first_task=2) and used
 // for the last time in program #7 (last_task=7).
 template <typename TensorSizeT>
 struct TensorUsageRecord {
  TensorSizeT tensor_size;
  TaskId first_task;
  TaskId last_task;
  TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last)
      : tensor_size(size), first_task(first), last_task(last) {}
  // Default order of tensor usage records is increasing order of first_task.
  bool operator<(const TensorUsageRecord<TensorSizeT>& other) const {
    return first_task < other.first_task;
  }
 };
 // Information about assignment of tensors to shared objects
 template <typename TensorSizeT>
 struct ObjectsAssignment {
  // shared_object_ids_[i] is ID of shared object, that tensor i will be using.
  std::vector<size_t> object_ids;
  // shared_object_sizes_[i] is a size of shared object with ID equal to i.
  std::vector<TensorSizeT> object_sizes;
 };
 // Information about assignment of tensors to offsets for the case, when all of
 // them are going to be allocated in one continuous memory block.
 struct OffsetsAssignment {
  std::vector<size_t> offsets;
  size_t total_size;
 };
 // Converts given assignment of tensors to shared objects to the assignment of
 // the same tensors to offsets in continuous memory block.
 OffsetsAssignment ObjectsToOffsets(
@ -105,30 +77,46 @@ enum class MemoryStrategy {
  MINCOSTFLOW,
 };
 // Chooses greedy algorithm with the lowest memory consumption for given usage
 // records and returns corresponding shared objects assignment.
 Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
                  ObjectsAssignment<size_t>* assignment);
 // Calculates the assignement of shared objects to given tensors, including
-// objects' sizes. Initial tensor sizes are given as size_t. This function is
+// objects' sizes. Below there are specializations for different types, that
-// intended to use with GPU buffers and one-dimensional textures.
+// support more memory strategies.
 template <typename TensorSizeT>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<TensorSizeT>* assignment) {
  switch (strategy) {
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
      return EqualityAssignment(usage_records, assignment);
    default:
      return InternalError(
          "MemoryStrategy is not supported with current tensor size type.");
  }
  return OkStatus();
 }
 template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<size_t>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<size_t>* assignment);
-// Calculates the assignement of shared objects to given tensors, including
+template <>
 // objects' sizes. Initial tensor sizes are given as BHWC. This function is
 // intended to use with OpenCL textures.
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<BHWC>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<BHWC>* assignment);
-// Calculates the assignement of shared objects to given tensors, including
+template <>
 // objects' sizes. Initial tensor sizes are given as uint2. This function is
 // intended to use with OpenGL textures.
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint2>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint2>* assignment);
-// Calculates the assignement of shared objects to given tensors, including
+template <>
 // objects' sizes. Initial tensor sizes are given as uint3. This function is
 // intended to use with OpenGL textures.
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint3>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment);
--- a/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h
@ -20,15 +20,16 @@ limitations under the License.
 #include <vector>
 #include "absl/container/flat_hash_map.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
 namespace gpu {
 // Fast version of Equality Assignments for hashable types.
 template <typename TensorSizeT>
-Status EqualityAssignment(
+Status EqualityAssignmentWithHash(
    const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
    ObjectsAssignment<TensorSizeT>* assignment) {
  size_t num_records = usage_records.size();
@ -50,7 +51,7 @@ Status EqualityAssignment(
      objects_in_use.pop();
    }
-    TensorSizeT tensor_size = usage_records[i].tensor_size;
+    const TensorSizeT tensor_size = usage_records[i].tensor_size;
    auto pool_it = pool.find(tensor_size);
    if (pool_it == pool.end() || pool_it->second.empty()) {
      // No free shared object with size equal to tensor_size. Create a new one,
@ -71,6 +72,46 @@ Status EqualityAssignment(
  return OkStatus();
 }
 // Slower version of Equality Assignments for unhashable types.
 template <typename TensorSizeT>
 Status EqualityAssignment(
    const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
    ObjectsAssignment<TensorSizeT>* assignment) {
  size_t num_records = usage_records.size();
  assignment->object_sizes.clear();
  assignment->object_ids.assign(num_records, kNotAssigned);
  // Index of operation, after execution of which the shared object can be
  // deallocated.
  std::vector<size_t> dealloc_task;
  for (size_t i = 0; i < num_records; ++i) {
    const TensorSizeT tensor_size = usage_records[i].tensor_size;
    size_t best_obj = kNotAssigned;
    for (size_t obj = 0; obj < assignment->object_sizes.size(); ++obj) {
      // Find a shared object, that has equal size with current tensor and has
      // been deallocated before the execution of its first_task.
      if (dealloc_task[obj] < usage_records[i].first_task &&
          assignment->object_sizes[obj] == tensor_size) {
        best_obj = obj;
        break;
      }
    }
    if (best_obj == kNotAssigned) {
      // No free shared object with size equal to tensor_size. Create a new one,
      // assign i-th tensor to it and save its last task as deallocation task.
      assignment->object_ids[i] = assignment->object_sizes.size();
      assignment->object_sizes.push_back(tensor_size);
      dealloc_task.push_back(usage_records[i].last_task);
    } else {
      // Shared object with id it->second has size equal to tensor_size. Reuse
      // this object and update its deallocation task.
      assignment->object_ids[i] = best_obj;
      dealloc_task[best_obj] = usage_records[i].last_task;
    }
  }
  return OkStatus();
 }
 }  // namespace gpu
 }  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h
@ -19,7 +19,7 @@ limitations under the License.
 #include <cstdint>
 #include <vector>
-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h
@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>
-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h
@ -17,12 +17,13 @@ limitations under the License.
 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
 #include <algorithm>
 #include <list>
 #include <queue>
 #include <set>
 #include <vector>
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/internal.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/internal.h
@ -22,7 +22,7 @@ limitations under the License.
 #include <vector>
 #include "absl/memory/memory.h"
-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h
@ -18,7 +18,7 @@ limitations under the License.
 #include <vector>
-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h
@ -18,8 +18,8 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/types.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/types.h
@ -0,0 +1,66 @@
 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_
 #include <cstdint>
 #include <memory>
 #include <vector>
 namespace tflite {
 namespace gpu {
 using TaskId = size_t;
 // Record, containing tensor size/shape and IDs of the first and the last task,
 // that use this tensor as input or output. For example: tensor #3 with size
 // tensor_size=65536 is first introduced in program #2 (first_task=2) and used
 // for the last time in program #7 (last_task=7).
 template <typename TensorSizeT>
 struct TensorUsageRecord {
  TensorSizeT tensor_size;
  TaskId first_task;
  TaskId last_task;
  TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last)
      : tensor_size(size), first_task(first), last_task(last) {}
  // Default order of tensor usage records is increasing order of first_task.
  bool operator<(const TensorUsageRecord<TensorSizeT>& other) const {
    return first_task < other.first_task;
  }
 };
 // Information about assignment of tensors to shared objects
 template <typename TensorSizeT>
 struct ObjectsAssignment {
  // shared_object_ids_[i] is ID of shared object, that tensor i will be using.
  std::vector<size_t> object_ids;
  // shared_object_sizes_[i] is a size of shared object with ID equal to i.
  std::vector<TensorSizeT> object_sizes;
 };
 // Information about assignment of tensors to offsets for the case, when all of
 // them are going to be allocated in one continuous memory block.
 struct OffsetsAssignment {
  std::vector<size_t> offsets;
  size_t total_size;
 };
 }  // namespace gpu
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_
--- a/tensorflow/lite/delegates/gpu/common/memory_management_test.cc
+++ b/tensorflow/lite/delegates/gpu/common/memory_management_test.cc
@ -317,6 +317,14 @@ TEST(Model, UInt2Records) {
                          uint2(8, 2), uint2(2, 8), uint2(1, 8), uint2(2, 8),
                          uint2(4, 1)));
  ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::EQUALITY,
                                     &assignment)
                  .ok());
  EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 2, 0, 3, 1, 4, 0, 5));
  EXPECT_THAT(assignment.object_sizes,
              ElementsAre(uint2(2, 8), uint2(2, 8), uint2(1, 12), uint2(8, 2),
                          uint2(1, 8), uint2(4, 1)));
  ASSERT_TRUE(AssignObjectsToTensors(
                  usage_records, MemoryStrategy::GREEDY_IN_ORDER, &assignment)
                  .ok());
@ -347,6 +355,15 @@ TEST(Model, UInt3Records) {
                          uint3(2, 4, 1), uint3(2, 2, 2), uint3(8, 1, 2),
                          uint3(1, 2, 1), uint3(1, 1, 1), uint3(2, 2, 2)));
  ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::EQUALITY,
                                     &assignment)
                  .ok());
  EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 2, 3, 4, 5, 6, 2, 4));
  EXPECT_THAT(assignment.object_sizes,
              ElementsAre(uint3(1, 2, 8), uint3(4, 3, 2), uint3(1, 1, 1),
                          uint3(2, 4, 1), uint3(2, 2, 2), uint3(8, 1, 2),
                          uint3(1, 2, 1)));
  ASSERT_TRUE(AssignObjectsToTensors(
                  usage_records, MemoryStrategy::GREEDY_IN_ORDER, &assignment)
                  .ok());
--- a/tensorflow/lite/delegates/gpu/gl/runtime.cc
+++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc
@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
--- a/tensorflow/lite/delegates/gpu/metal/inference_context.mm
+++ b/tensorflow/lite/delegates/gpu/metal/inference_context.mm
@ -20,6 +20,7 @@ limitations under the License.
 #include "absl/strings/substitute.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"