Create template version of AssignObjectsToTensors.

Add version of Equality memory management algorithm for unhashable types. PiperOrigin-RevId: 268586124
2019-09-11 18:19:17 -07:00 · 2019-09-11 18:19:17 -07:00 · da53d5960e
commit da53d5960e
parent 6cb5fb444c
14 changed files with 195 additions and 72 deletions
--- a/tensorflow/lite/delegates/gpu/common/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/BUILD
@ -57,6 +57,7 @@ cc_library(
        "memory_management/internal.h",
        "memory_management/min_cost_flow_assignment.h",
        "memory_management/naive_assignment.h",
+        "memory_management/types.h",
    ],
    deps = [
        ":shape",
--- a/tensorflow/lite/delegates/gpu/common/memory_management.cc
+++ b/tensorflow/lite/delegates/gpu/common/memory_management.cc
@ -23,12 +23,8 @@ limitations under the License.
 #include <type_traits>
 #include <vector>

-#include "tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
-#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h"
-#include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h"
-#include "tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
@ -59,6 +55,19 @@ OffsetsAssignment ObjectsToOffsets(
  return result;
 }

+Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
+                  ObjectsAssignment<size_t>* assignment) {
+  RETURN_IF_ERROR(
+      GreedyBySizeDistPriorityAssignment(usage_records, assignment));
+  ObjectsAssignment<size_t> assignment_by_breadth;
+  if (GreedyByBreadthAssignment(usage_records, &assignment_by_breadth).ok() &&
+      TotalSize(assignment_by_breadth) < TotalSize(*assignment)) {
+    std::swap(*assignment, assignment_by_breadth);
+  }
+  return OkStatus();
+}
+
+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<size_t>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<size_t>* assignment) {
@ -66,24 +75,15 @@ Status AssignObjectsToTensors(
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
-      return EqualityAssignment(usage_records, assignment);
+      return EqualityAssignmentWithHash(usage_records, assignment);
    case MemoryStrategy::GREEDY_IN_ORDER:
      return GreedyInOrderAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_BY_BREADTH:
      return GreedyByBreadthAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_BY_SIZE:
      return GreedyBySizeDistPriorityAssignment(usage_records, assignment);
-    case MemoryStrategy::GREEDY_BEST: {
-      RETURN_IF_ERROR(
-          GreedyBySizeDistPriorityAssignment(usage_records, assignment));
-      ObjectsAssignment<size_t> assignment_by_breadth;
-      if (GreedyByBreadthAssignment(usage_records, &assignment_by_breadth)
-              .ok() &&
-          TotalSize(assignment_by_breadth) < TotalSize(*assignment)) {
-        std::swap(*assignment, assignment_by_breadth);
-      }
-      return OkStatus();
-    }
+    case MemoryStrategy::GREEDY_BEST:
+      return BestGreedy(usage_records, assignment);
    case MemoryStrategy::MINCOSTFLOW:
      return MinCostFlowAssignment(usage_records, assignment);
    default:
@ -93,6 +93,7 @@ Status AssignObjectsToTensors(
  return OkStatus();
 }

+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<BHWC>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<BHWC>* assignment) {
@ -100,7 +101,7 @@ Status AssignObjectsToTensors(
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
    case MemoryStrategy::EQUALITY:
-      return EqualityAssignment(usage_records, assignment);
+      return EqualityAssignmentWithHash(usage_records, assignment);
    default:
      return InternalError(
          "MemoryStrategy is not supported with current tensor size type.");
@ -108,12 +109,15 @@ Status AssignObjectsToTensors(
  return OkStatus();
 }

+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint2>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint2>* assignment) {
  switch (strategy) {
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
+    case MemoryStrategy::EQUALITY:
+      return EqualityAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_IN_ORDER:
      return GreedyInOrderAssignmentMultidimensional(usage_records, assignment);
    default:
@ -123,12 +127,15 @@ Status AssignObjectsToTensors(
  return OkStatus();
 }

+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint3>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment) {
  switch (strategy) {
    case MemoryStrategy::NAIVE:
      return NaiveAssignment(usage_records, assignment);
+    case MemoryStrategy::EQUALITY:
+      return EqualityAssignment(usage_records, assignment);
    case MemoryStrategy::GREEDY_IN_ORDER:
      return GreedyInOrderAssignmentMultidimensional(usage_records, assignment);
    default:
--- a/tensorflow/lite/delegates/gpu/common/memory_management.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management.h
@ -21,6 +21,13 @@ limitations under the License.
 #include <vector>

 #include "absl/memory/memory.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
@ -30,41 +37,6 @@ namespace gpu {

 using TaskId = size_t;

-// Record, containing tensor size/shape and IDs of the first and the last task,
-// that use this tensor as input or output. For example: tensor #3 with size
-// tensor_size=65536 is first introduced in program #2 (first_task=2) and used
-// for the last time in program #7 (last_task=7).
-template <typename TensorSizeT>
-struct TensorUsageRecord {
-  TensorSizeT tensor_size;
-  TaskId first_task;
-  TaskId last_task;
-
-  TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last)
-      : tensor_size(size), first_task(first), last_task(last) {}
-
-  // Default order of tensor usage records is increasing order of first_task.
-  bool operator<(const TensorUsageRecord<TensorSizeT>& other) const {
-    return first_task < other.first_task;
-  }
-};
-
-// Information about assignment of tensors to shared objects
-template <typename TensorSizeT>
-struct ObjectsAssignment {
-  // shared_object_ids_[i] is ID of shared object, that tensor i will be using.
-  std::vector<size_t> object_ids;
-  // shared_object_sizes_[i] is a size of shared object with ID equal to i.
-  std::vector<TensorSizeT> object_sizes;
-};
-
-// Information about assignment of tensors to offsets for the case, when all of
-// them are going to be allocated in one continuous memory block.
-struct OffsetsAssignment {
-  std::vector<size_t> offsets;
-  size_t total_size;
-};
-
 // Converts given assignment of tensors to shared objects to the assignment of
 // the same tensors to offsets in continuous memory block.
 OffsetsAssignment ObjectsToOffsets(
@ -105,30 +77,46 @@ enum class MemoryStrategy {
  MINCOSTFLOW,
 };

+// Chooses greedy algorithm with the lowest memory consumption for given usage
+// records and returns corresponding shared objects assignment.
+Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
+                  ObjectsAssignment<size_t>* assignment);
+
 // Calculates the assignement of shared objects to given tensors, including
-// objects' sizes. Initial tensor sizes are given as size_t. This function is
-// intended to use with GPU buffers and one-dimensional textures.
+// objects' sizes. Below there are specializations for different types, that
+// support more memory strategies.
+template <typename TensorSizeT>
+Status AssignObjectsToTensors(
+    const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
+    MemoryStrategy strategy, ObjectsAssignment<TensorSizeT>* assignment) {
+  switch (strategy) {
+    case MemoryStrategy::NAIVE:
+      return NaiveAssignment(usage_records, assignment);
+    case MemoryStrategy::EQUALITY:
+      return EqualityAssignment(usage_records, assignment);
+    default:
+      return InternalError(
+          "MemoryStrategy is not supported with current tensor size type.");
+  }
+  return OkStatus();
+}
+
+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<size_t>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<size_t>* assignment);

-// Calculates the assignement of shared objects to given tensors, including
-// objects' sizes. Initial tensor sizes are given as BHWC. This function is
-// intended to use with OpenCL textures.
+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<BHWC>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<BHWC>* assignment);

-// Calculates the assignement of shared objects to given tensors, including
-// objects' sizes. Initial tensor sizes are given as uint2. This function is
-// intended to use with OpenGL textures.
+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint2>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint2>* assignment);

-// Calculates the assignement of shared objects to given tensors, including
-// objects' sizes. Initial tensor sizes are given as uint3. This function is
-// intended to use with OpenGL textures.
+template <>
 Status AssignObjectsToTensors(
    const std::vector<TensorUsageRecord<uint3>>& usage_records,
    MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment);
--- a/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/equality_assignment.h
@ -20,15 +20,16 @@ limitations under the License.
 #include <vector>

 #include "absl/container/flat_hash_map.h"
-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
 namespace gpu {

+// Fast version of Equality Assignments for hashable types.
 template <typename TensorSizeT>
-Status EqualityAssignment(
+Status EqualityAssignmentWithHash(
    const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
    ObjectsAssignment<TensorSizeT>* assignment) {
  size_t num_records = usage_records.size();
@ -50,7 +51,7 @@ Status EqualityAssignment(
      objects_in_use.pop();
    }

-    TensorSizeT tensor_size = usage_records[i].tensor_size;
+    const TensorSizeT tensor_size = usage_records[i].tensor_size;
    auto pool_it = pool.find(tensor_size);
    if (pool_it == pool.end() || pool_it->second.empty()) {
      // No free shared object with size equal to tensor_size. Create a new one,
@ -71,6 +72,46 @@ Status EqualityAssignment(
  return OkStatus();
 }

+// Slower version of Equality Assignments for unhashable types.
+template <typename TensorSizeT>
+Status EqualityAssignment(
+    const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
+    ObjectsAssignment<TensorSizeT>* assignment) {
+  size_t num_records = usage_records.size();
+  assignment->object_sizes.clear();
+  assignment->object_ids.assign(num_records, kNotAssigned);
+
+  // Index of operation, after execution of which the shared object can be
+  // deallocated.
+  std::vector<size_t> dealloc_task;
+  for (size_t i = 0; i < num_records; ++i) {
+    const TensorSizeT tensor_size = usage_records[i].tensor_size;
+    size_t best_obj = kNotAssigned;
+    for (size_t obj = 0; obj < assignment->object_sizes.size(); ++obj) {
+      // Find a shared object, that has equal size with current tensor and has
+      // been deallocated before the execution of its first_task.
+      if (dealloc_task[obj] < usage_records[i].first_task &&
+          assignment->object_sizes[obj] == tensor_size) {
+        best_obj = obj;
+        break;
+      }
+    }
+    if (best_obj == kNotAssigned) {
+      // No free shared object with size equal to tensor_size. Create a new one,
+      // assign i-th tensor to it and save its last task as deallocation task.
+      assignment->object_ids[i] = assignment->object_sizes.size();
+      assignment->object_sizes.push_back(tensor_size);
+      dealloc_task.push_back(usage_records[i].last_task);
+    } else {
+      // Shared object with id it->second has size equal to tensor_size. Reuse
+      // this object and update its deallocation task.
+      assignment->object_ids[i] = best_obj;
+      dealloc_task[best_obj] = usage_records[i].last_task;
+    }
+  }
+  return OkStatus();
+}
+
 }  // namespace gpu
 }  // namespace tflite

--- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_breadth_assignment.h
@ -19,7 +19,7 @@ limitations under the License.
 #include <cstdint>
 #include <vector>

-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h
@ -18,7 +18,7 @@ limitations under the License.

 #include <vector>

-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/greedy_in_order_assignment.h
@ -17,12 +17,13 @@ limitations under the License.
 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_

 #include <algorithm>
+#include <list>
 #include <queue>
 #include <set>
 #include <vector>

-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/internal.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/internal.h
@ -22,7 +22,7 @@ limitations under the License.
 #include <vector>

 #include "absl/memory/memory.h"
-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"

 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/min_cost_flow_assignment.h
@ -18,7 +18,7 @@ limitations under the License.

 #include <vector>

-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/naive_assignment.h
@ -18,8 +18,8 @@ limitations under the License.

 #include <vector>

-#include "tensorflow/lite/delegates/gpu/common/memory_management.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"

 namespace tflite {
--- a/tensorflow/lite/delegates/gpu/common/memory_management/types.h
+++ b/tensorflow/lite/delegates/gpu/common/memory_management/types.h
@ -0,0 +1,66 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+namespace tflite {
+namespace gpu {
+
+using TaskId = size_t;
+
+// Record, containing tensor size/shape and IDs of the first and the last task,
+// that use this tensor as input or output. For example: tensor #3 with size
+// tensor_size=65536 is first introduced in program #2 (first_task=2) and used
+// for the last time in program #7 (last_task=7).
+template <typename TensorSizeT>
+struct TensorUsageRecord {
+  TensorSizeT tensor_size;
+  TaskId first_task;
+  TaskId last_task;
+
+  TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last)
+      : tensor_size(size), first_task(first), last_task(last) {}
+
+  // Default order of tensor usage records is increasing order of first_task.
+  bool operator<(const TensorUsageRecord<TensorSizeT>& other) const {
+    return first_task < other.first_task;
+  }
+};
+
+// Information about assignment of tensors to shared objects
+template <typename TensorSizeT>
+struct ObjectsAssignment {
+  // shared_object_ids_[i] is ID of shared object, that tensor i will be using.
+  std::vector<size_t> object_ids;
+  // shared_object_sizes_[i] is a size of shared object with ID equal to i.
+  std::vector<TensorSizeT> object_sizes;
+};
+
+// Information about assignment of tensors to offsets for the case, when all of
+// them are going to be allocated in one continuous memory block.
+struct OffsetsAssignment {
+  std::vector<size_t> offsets;
+  size_t total_size;
+};
+
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_TYPES_H_
--- a/tensorflow/lite/delegates/gpu/common/memory_management_test.cc
+++ b/tensorflow/lite/delegates/gpu/common/memory_management_test.cc
@ -317,6 +317,14 @@ TEST(Model, UInt2Records) {
                          uint2(8, 2), uint2(2, 8), uint2(1, 8), uint2(2, 8),
                          uint2(4, 1)));

+  ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::EQUALITY,
+                                     &assignment)
+                  .ok());
+  EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 2, 0, 3, 1, 4, 0, 5));
+  EXPECT_THAT(assignment.object_sizes,
+              ElementsAre(uint2(2, 8), uint2(2, 8), uint2(1, 12), uint2(8, 2),
+                          uint2(1, 8), uint2(4, 1)));
+
  ASSERT_TRUE(AssignObjectsToTensors(
                  usage_records, MemoryStrategy::GREEDY_IN_ORDER, &assignment)
                  .ok());
@ -347,6 +355,15 @@ TEST(Model, UInt3Records) {
                          uint3(2, 4, 1), uint3(2, 2, 2), uint3(8, 1, 2),
                          uint3(1, 2, 1), uint3(1, 1, 1), uint3(2, 2, 2)));

+  ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::EQUALITY,
+                                     &assignment)
+                  .ok());
+  EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 2, 3, 4, 5, 6, 2, 4));
+  EXPECT_THAT(assignment.object_sizes,
+              ElementsAre(uint3(1, 2, 8), uint3(4, 3, 2), uint3(1, 1, 1),
+                          uint3(2, 4, 1), uint3(2, 2, 2), uint3(8, 1, 2),
+                          uint3(1, 2, 1)));
+
  ASSERT_TRUE(AssignObjectsToTensors(
                  usage_records, MemoryStrategy::GREEDY_IN_ORDER, &assignment)
                  .ok());
--- a/tensorflow/lite/delegates/gpu/gl/runtime.cc
+++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc
@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
 #include "tensorflow/lite/delegates/gpu/common/types.h"
 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
--- a/tensorflow/lite/delegates/gpu/metal/inference_context.mm
+++ b/tensorflow/lite/delegates/gpu/metal/inference_context.mm
@ -20,6 +20,7 @@ limitations under the License.

 #include "absl/strings/substitute.h"
 #include "tensorflow/lite/delegates/gpu/common/memory_management.h"
+#include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
 #include "tensorflow/lite/delegates/gpu/common/model.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"