Allow different tensor size type in TensorUsageRecord and in the naive algorithm.
Use size_t instead of uint32_t in all memory management algorithms. PiperOrigin-RevId: 253913153
This commit is contained in:
parent
e982459f83
commit
ee21809f1b
@ -29,7 +29,7 @@ namespace gpu {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct PoolRecord {
|
struct PoolRecord {
|
||||||
PoolRecord(uint32_t size, size_t obj_id)
|
PoolRecord(size_t size, size_t obj_id)
|
||||||
: object_size(size), object_id(obj_id) {}
|
: object_size(size), object_id(obj_id) {}
|
||||||
|
|
||||||
// Objects in pool are ordered by size.
|
// Objects in pool are ordered by size.
|
||||||
@ -38,7 +38,7 @@ struct PoolRecord {
|
|||||||
(object_size == other.object_size && object_id < other.object_id);
|
(object_size == other.object_size && object_id < other.object_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t object_size;
|
size_t object_size;
|
||||||
size_t object_id;
|
size_t object_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -61,8 +61,10 @@ struct QueueRecord {
|
|||||||
//
|
//
|
||||||
// The problem of memory management is NP-complete. This implements a
|
// The problem of memory management is NP-complete. This implements a
|
||||||
// naive algorithm that assigns each tensor to a separate object in memory.
|
// naive algorithm that assigns each tensor to a separate object in memory.
|
||||||
Status NaiveAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
template <typename TensorSizeT>
|
||||||
ObjectsAssignment* assignment) {
|
Status NaiveAssignment(
|
||||||
|
const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
|
||||||
|
ObjectsAssignment<TensorSizeT>* assignment) {
|
||||||
assignment->object_sizes.resize(usage_records.size());
|
assignment->object_sizes.resize(usage_records.size());
|
||||||
assignment->object_ids.resize(usage_records.size());
|
assignment->object_ids.resize(usage_records.size());
|
||||||
for (size_t i = 0; i < usage_records.size(); i++) {
|
for (size_t i = 0; i < usage_records.size(); i++) {
|
||||||
@ -79,7 +81,8 @@ Status NaiveAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
|||||||
// greedy algorithm that approximates an optimal solution with following
|
// greedy algorithm that approximates an optimal solution with following
|
||||||
// heuristic:
|
// heuristic:
|
||||||
//
|
//
|
||||||
// 1. Iterates through all tensor usage records and for every object reference
|
// 1. Iterates through all tensor usage records and for every object
|
||||||
|
// reference
|
||||||
// assigns shared object from the pool. When object reference is used
|
// assigns shared object from the pool. When object reference is used
|
||||||
// for the last time, corresponding shared object is returned back to
|
// for the last time, corresponding shared object is returned back to
|
||||||
// the pool.
|
// the pool.
|
||||||
@ -88,8 +91,9 @@ Status NaiveAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
|||||||
// available.
|
// available.
|
||||||
//
|
//
|
||||||
// 3. Shared object size may increase when tensor requests larger size.
|
// 3. Shared object size may increase when tensor requests larger size.
|
||||||
Status GreedyAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
Status GreedyAssignment(
|
||||||
ObjectsAssignment* assignment) {
|
const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||||
|
ObjectsAssignment<size_t>* assignment) {
|
||||||
assignment->object_sizes.clear();
|
assignment->object_sizes.clear();
|
||||||
assignment->object_ids.resize(usage_records.size());
|
assignment->object_ids.resize(usage_records.size());
|
||||||
|
|
||||||
@ -108,7 +112,7 @@ Status GreedyAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
|||||||
pool.insert({assignment->object_sizes[object_id], object_id});
|
pool.insert({assignment->object_sizes[object_id], object_id});
|
||||||
objects_in_use.pop();
|
objects_in_use.pop();
|
||||||
}
|
}
|
||||||
uint32_t tensor_size = usage_records[i].tensor_size;
|
size_t tensor_size = usage_records[i].tensor_size;
|
||||||
if (pool.empty()) {
|
if (pool.empty()) {
|
||||||
// No free shared object, creating a new one, assign i-th tensor to
|
// No free shared object, creating a new one, assign i-th tensor to
|
||||||
// it and add to the queue of objects in use.
|
// it and add to the queue of objects in use.
|
||||||
@ -121,7 +125,7 @@ Status GreedyAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
|||||||
// Find shared object from pool, that will waste the least possible
|
// Find shared object from pool, that will waste the least possible
|
||||||
// amount of memory when reused for current tensor.
|
// amount of memory when reused for current tensor.
|
||||||
auto pool_it = pool.lower_bound({tensor_size, 0});
|
auto pool_it = pool.lower_bound({tensor_size, 0});
|
||||||
uint32_t size_diff = 0;
|
size_t size_diff = 0;
|
||||||
if (pool_it != pool.end()) {
|
if (pool_it != pool.end()) {
|
||||||
// Try smallest shared object from pool with size >= tensor_size.
|
// Try smallest shared object from pool with size >= tensor_size.
|
||||||
size_diff = pool_it->object_size - tensor_size;
|
size_diff = pool_it->object_size - tensor_size;
|
||||||
@ -139,7 +143,8 @@ Status GreedyAssignment(const std::vector<TensorUsageRecord>& usage_records,
|
|||||||
// best_it can't be equal to pool.end(), because pool is not empty
|
// best_it can't be equal to pool.end(), because pool is not empty
|
||||||
if (best_it == pool.end()) {
|
if (best_it == pool.end()) {
|
||||||
return InternalError(
|
return InternalError(
|
||||||
"No shared object is found in non-empty pool in GreedyAssignment.");
|
"No shared object is found in non-empty pool in "
|
||||||
|
"GreedyAssignment.");
|
||||||
}
|
}
|
||||||
size_t shared_id = best_it->object_id;
|
size_t shared_id = best_it->object_id;
|
||||||
pool.erase(best_it);
|
pool.erase(best_it);
|
||||||
@ -158,7 +163,7 @@ class MinCostFlowSolver {
|
|||||||
public:
|
public:
|
||||||
// Build auxiliary flow graph, based on information about intermediate
|
// Build auxiliary flow graph, based on information about intermediate
|
||||||
// tensors.
|
// tensors.
|
||||||
void Build(const std::vector<TensorUsageRecord>& usage_records) {
|
void Build(const std::vector<TensorUsageRecord<size_t>>& usage_records) {
|
||||||
usage_records_ = &usage_records;
|
usage_records_ = &usage_records;
|
||||||
num_tensors_ = usage_records.size();
|
num_tensors_ = usage_records.size();
|
||||||
source_ = 2 * num_tensors_;
|
source_ = 2 * num_tensors_;
|
||||||
@ -167,8 +172,8 @@ class MinCostFlowSolver {
|
|||||||
std::vector<size_t> old_record_ids;
|
std::vector<size_t> old_record_ids;
|
||||||
std::priority_queue<QueueRecord> objects_in_use;
|
std::priority_queue<QueueRecord> objects_in_use;
|
||||||
for (size_t i = 0; i < usage_records.size(); i++) {
|
for (size_t i = 0; i < usage_records.size(); i++) {
|
||||||
// Pop from the queue all objects that are no longer in use at the time of
|
// Pop from the queue all objects that are no longer in use at the time
|
||||||
// execution of the first_task of i-th intermediate tensor.
|
// of execution of the first_task of i-th intermediate tensor.
|
||||||
while (!objects_in_use.empty() &&
|
while (!objects_in_use.empty() &&
|
||||||
objects_in_use.top().last_task < usage_records[i].first_task) {
|
objects_in_use.top().last_task < usage_records[i].first_task) {
|
||||||
old_record_ids.push_back(objects_in_use.top().object_id);
|
old_record_ids.push_back(objects_in_use.top().object_id);
|
||||||
@ -186,8 +191,8 @@ class MinCostFlowSolver {
|
|||||||
// Edges from vertices of the left part of flow graph, corresponding to
|
// Edges from vertices of the left part of flow graph, corresponding to
|
||||||
// old_record_ids, to i-th vertex in the right part of flow graph are
|
// old_record_ids, to i-th vertex in the right part of flow graph are
|
||||||
// added for the case of reusing previously created shared objects for
|
// added for the case of reusing previously created shared objects for
|
||||||
// i-th tensor. Cost of these edges is an approximation of the size of new
|
// i-th tensor. Cost of these edges is an approximation of the size of
|
||||||
// allocated memory.
|
// new allocated memory.
|
||||||
for (auto record_id : old_record_ids) {
|
for (auto record_id : old_record_ids) {
|
||||||
int cost = 0;
|
int cost = 0;
|
||||||
if (usage_records[i].tensor_size >
|
if (usage_records[i].tensor_size >
|
||||||
@ -251,7 +256,7 @@ class MinCostFlowSolver {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CalculateAssignment(ObjectsAssignment* assignment) {
|
void CalculateAssignment(ObjectsAssignment<size_t>* assignment) {
|
||||||
assignment->object_sizes.clear();
|
assignment->object_sizes.clear();
|
||||||
assignment->object_ids.resize(num_tensors_);
|
assignment->object_ids.resize(num_tensors_);
|
||||||
is_tensor_assigned_.resize(num_tensors_);
|
is_tensor_assigned_.resize(num_tensors_);
|
||||||
@ -273,9 +278,9 @@ class MinCostFlowSolver {
|
|||||||
int cost;
|
int cost;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add edge from vertex src to vertex dst with given capacity and cost and its
|
// Add edge from vertex src to vertex dst with given capacity and cost and
|
||||||
// reversed edge to the flow graph. If some edge has index idx, its reversed
|
// its reversed edge to the flow graph. If some edge has index idx, its
|
||||||
// edge has index idx^1.
|
// reversed edge has index idx^1.
|
||||||
void AddEdge(size_t src, size_t dst, int cap, int cost) {
|
void AddEdge(size_t src, size_t dst, int cap, int cost) {
|
||||||
edges_from_[src].push_back(edges_.size());
|
edges_from_[src].push_back(edges_.size());
|
||||||
edges_.emplace_back(dst, cap, cost);
|
edges_.emplace_back(dst, cap, cost);
|
||||||
@ -288,8 +293,8 @@ class MinCostFlowSolver {
|
|||||||
return vertex_id >= num_tensors_ && vertex_id < 2 * num_tensors_;
|
return vertex_id >= num_tensors_ && vertex_id < 2 * num_tensors_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return vertex from another part of the graph, that corresponds to the same
|
// Return vertex from another part of the graph, that corresponds to the
|
||||||
// intermediate tensor.
|
// same intermediate tensor.
|
||||||
size_t LeftPartTwin(size_t vertex_id) const {
|
size_t LeftPartTwin(size_t vertex_id) const {
|
||||||
return vertex_id - num_tensors_;
|
return vertex_id - num_tensors_;
|
||||||
}
|
}
|
||||||
@ -299,13 +304,13 @@ class MinCostFlowSolver {
|
|||||||
|
|
||||||
// This function uses recursive implementation of depth-first search and
|
// This function uses recursive implementation of depth-first search and
|
||||||
// returns maximum size from tensor tensor_id and all tensors, that will be
|
// returns maximum size from tensor tensor_id and all tensors, that will be
|
||||||
// allocated at the same place with it after all operations that use tensor_id
|
// allocated at the same place with it after all operations that use
|
||||||
// are executed. Next tensor to be allocated at the same place with tensor_id
|
// tensor_id are executed. Next tensor to be allocated at the same place
|
||||||
// is a left part twin of such vertex v, that the edge tensor_id->v is
|
// with tensor_id is a left part twin of such vertex v, that the edge
|
||||||
// saturated (has zero residual capacity).
|
// tensor_id->v is saturated (has zero residual capacity).
|
||||||
uint32_t AssignTensorsToNewSharedObject(size_t tensor_id,
|
size_t AssignTensorsToNewSharedObject(size_t tensor_id,
|
||||||
ObjectsAssignment* assignment) {
|
ObjectsAssignment<size_t>* assignment) {
|
||||||
uint32_t cost = (*usage_records_)[tensor_id].tensor_size;
|
size_t cost = (*usage_records_)[tensor_id].tensor_size;
|
||||||
is_tensor_assigned_[tensor_id] = true;
|
is_tensor_assigned_[tensor_id] = true;
|
||||||
assignment->object_ids[tensor_id] = assignment->object_sizes.size();
|
assignment->object_ids[tensor_id] = assignment->object_sizes.size();
|
||||||
for (const auto& edge_id : edges_from_[tensor_id]) {
|
for (const auto& edge_id : edges_from_[tensor_id]) {
|
||||||
@ -324,7 +329,7 @@ class MinCostFlowSolver {
|
|||||||
size_t source_;
|
size_t source_;
|
||||||
size_t sink_;
|
size_t sink_;
|
||||||
size_t num_tensors_;
|
size_t num_tensors_;
|
||||||
const std::vector<TensorUsageRecord>* usage_records_;
|
const std::vector<TensorUsageRecord<size_t>>* usage_records_;
|
||||||
std::vector<Edge> edges_;
|
std::vector<Edge> edges_;
|
||||||
std::vector<std::vector<size_t>> edges_from_;
|
std::vector<std::vector<size_t>> edges_from_;
|
||||||
std::vector<bool> is_tensor_assigned_;
|
std::vector<bool> is_tensor_assigned_;
|
||||||
@ -337,8 +342,8 @@ class MinCostFlowSolver {
|
|||||||
// assignment of shared objects to tensors, using the result of the flow
|
// assignment of shared objects to tensors, using the result of the flow
|
||||||
// algorithm.
|
// algorithm.
|
||||||
Status MinCostFlowAssignment(
|
Status MinCostFlowAssignment(
|
||||||
const std::vector<TensorUsageRecord>& usage_records,
|
const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||||
ObjectsAssignment* assignment) {
|
ObjectsAssignment<size_t>* assignment) {
|
||||||
MinCostFlowSolver solver;
|
MinCostFlowSolver solver;
|
||||||
solver.Build(usage_records);
|
solver.Build(usage_records);
|
||||||
solver.Solve();
|
solver.Solve();
|
||||||
@ -349,11 +354,11 @@ Status MinCostFlowAssignment(
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
Status AssignObjectsToTensors(
|
Status AssignObjectsToTensors(
|
||||||
const std::vector<TensorUsageRecord>& usage_records,
|
const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||||
const MemoryStrategy& strategy, ObjectsAssignment* assignment) {
|
const MemoryStrategy& strategy, ObjectsAssignment<size_t>* assignment) {
|
||||||
switch (strategy) {
|
switch (strategy) {
|
||||||
case MemoryStrategy::NAIVE:
|
case MemoryStrategy::NAIVE:
|
||||||
return NaiveAssignment(usage_records, assignment);
|
return NaiveAssignment<size_t>(usage_records, assignment);
|
||||||
case MemoryStrategy::GREEDY:
|
case MemoryStrategy::GREEDY:
|
||||||
return GreedyAssignment(usage_records, assignment);
|
return GreedyAssignment(usage_records, assignment);
|
||||||
case MemoryStrategy::MINCOSTFLOW:
|
case MemoryStrategy::MINCOSTFLOW:
|
||||||
|
@ -28,31 +28,32 @@ namespace gpu {
|
|||||||
|
|
||||||
using TaskId = size_t;
|
using TaskId = size_t;
|
||||||
|
|
||||||
// Record, containing tensor size and IDs of the first and the last task, that
|
// Record, containing tensor size and IDs of the first and the last task,
|
||||||
// use this tensor as input or output.
|
// that use this tensor as input or output. For example: tensor #3 with size
|
||||||
// For example: tensor #3 with size tensor_size=65536 is first introduced in
|
// tensor_size=65536 is first introduced in program #2 (first_task=2) and used
|
||||||
// program #2 (first_task=2) and used for the last time in program #7
|
// for the last time in program #7 (last_task=7).
|
||||||
// (last_task=7).
|
template <typename TensorSizeT>
|
||||||
struct TensorUsageRecord {
|
struct TensorUsageRecord {
|
||||||
uint32_t tensor_size;
|
TensorSizeT tensor_size;
|
||||||
TaskId first_task;
|
TaskId first_task;
|
||||||
TaskId last_task;
|
TaskId last_task;
|
||||||
|
|
||||||
TensorUsageRecord(uint32_t size, TaskId first, TaskId last)
|
TensorUsageRecord(TensorSizeT size, TaskId first, TaskId last)
|
||||||
: tensor_size(size), first_task(first), last_task(last) {}
|
: tensor_size(size), first_task(first), last_task(last) {}
|
||||||
|
|
||||||
// Default order of tensor usage records is increasing order of first_task.
|
// Default order of tensor usage records is increasing order of first_task.
|
||||||
bool operator<(const TensorUsageRecord& other) const {
|
bool operator<(const TensorUsageRecord<TensorSizeT>& other) const {
|
||||||
return first_task < other.first_task;
|
return first_task < other.first_task;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Information about assignment of tensors to shared objects
|
// Information about assignment of tensors to shared objects
|
||||||
|
template <typename TensorSizeT>
|
||||||
struct ObjectsAssignment {
|
struct ObjectsAssignment {
|
||||||
// shared_object_ids_[i] is ID of shared object, that tensor i will be using.
|
// shared_object_ids_[i] is ID of shared object, that tensor i will be using.
|
||||||
std::vector<size_t> object_ids;
|
std::vector<size_t> object_ids;
|
||||||
// shared_object_sizes_[i] is a size of shared object with ID equal to i.
|
// shared_object_sizes_[i] is a size of shared object with ID equal to i.
|
||||||
std::vector<uint32_t> object_sizes;
|
std::vector<TensorSizeT> object_sizes;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class MemoryStrategy {
|
enum class MemoryStrategy {
|
||||||
@ -71,10 +72,11 @@ enum class MemoryStrategy {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Calculates the assignement of shared objects to given tensors, including
|
// Calculates the assignement of shared objects to given tensors, including
|
||||||
// objects' sizes.
|
// objects' sizes. Initial tensor sizes are given as size_t. This function is
|
||||||
|
// intended to use with GPU buffers.
|
||||||
Status AssignObjectsToTensors(
|
Status AssignObjectsToTensors(
|
||||||
const std::vector<TensorUsageRecord>& usage_records,
|
const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||||
const MemoryStrategy& strategy, ObjectsAssignment* assignment);
|
const MemoryStrategy& strategy, ObjectsAssignment<size_t>* assignment);
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
@ -25,7 +25,7 @@ namespace {
|
|||||||
using ::testing::ElementsAre;
|
using ::testing::ElementsAre;
|
||||||
|
|
||||||
TEST(Model, EmptyRecords) {
|
TEST(Model, EmptyRecords) {
|
||||||
ObjectsAssignment assignment;
|
ObjectsAssignment<size_t> assignment;
|
||||||
ASSERT_TRUE(
|
ASSERT_TRUE(
|
||||||
AssignObjectsToTensors({}, MemoryStrategy::NAIVE, &assignment).ok());
|
AssignObjectsToTensors({}, MemoryStrategy::NAIVE, &assignment).ok());
|
||||||
EXPECT_TRUE(assignment.object_ids.empty());
|
EXPECT_TRUE(assignment.object_ids.empty());
|
||||||
@ -42,9 +42,9 @@ TEST(Model, EmptyRecords) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(Model, OneRecord) {
|
TEST(Model, OneRecord) {
|
||||||
std::vector<TensorUsageRecord> usage_records{
|
std::vector<TensorUsageRecord<size_t>> usage_records{
|
||||||
{/*size=*/16, /*first=*/0, /*last=*/1}};
|
{/*size=*/16, /*first=*/0, /*last=*/1}};
|
||||||
ObjectsAssignment assignment;
|
ObjectsAssignment<size_t> assignment;
|
||||||
ASSERT_TRUE(
|
ASSERT_TRUE(
|
||||||
AssignObjectsToTensors(usage_records, MemoryStrategy::NAIVE, &assignment)
|
AssignObjectsToTensors(usage_records, MemoryStrategy::NAIVE, &assignment)
|
||||||
.ok());
|
.ok());
|
||||||
@ -63,14 +63,14 @@ TEST(Model, OneRecord) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(Model, ChainRecords) {
|
TEST(Model, ChainRecords) {
|
||||||
std::vector<TensorUsageRecord> usage_records{
|
std::vector<TensorUsageRecord<size_t>> usage_records{
|
||||||
{/*size=*/16, /*first=*/0, /*last=*/1},
|
{/*size=*/16, /*first=*/0, /*last=*/1},
|
||||||
{/*size=*/8, /*first=*/1, /*last=*/2},
|
{/*size=*/8, /*first=*/1, /*last=*/2},
|
||||||
{/*size=*/64, /*first=*/2, /*last=*/3},
|
{/*size=*/64, /*first=*/2, /*last=*/3},
|
||||||
{/*size=*/32, /*first=*/3, /*last=*/4},
|
{/*size=*/32, /*first=*/3, /*last=*/4},
|
||||||
{/*size=*/8, /*first=*/4, /*last=*/5},
|
{/*size=*/8, /*first=*/4, /*last=*/5},
|
||||||
};
|
};
|
||||||
ObjectsAssignment assignment;
|
ObjectsAssignment<size_t> assignment;
|
||||||
ASSERT_TRUE(
|
ASSERT_TRUE(
|
||||||
AssignObjectsToTensors(usage_records, MemoryStrategy::NAIVE, &assignment)
|
AssignObjectsToTensors(usage_records, MemoryStrategy::NAIVE, &assignment)
|
||||||
.ok());
|
.ok());
|
||||||
@ -89,7 +89,7 @@ TEST(Model, ChainRecords) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(Model, ComplexRecords) {
|
TEST(Model, ComplexRecords) {
|
||||||
std::vector<TensorUsageRecord> usage_records{
|
std::vector<TensorUsageRecord<size_t>> usage_records{
|
||||||
{/*size=*/32, /*first=*/0, /*last=*/1},
|
{/*size=*/32, /*first=*/0, /*last=*/1},
|
||||||
{/*size=*/32, /*first=*/1, /*last=*/4},
|
{/*size=*/32, /*first=*/1, /*last=*/4},
|
||||||
{/*size=*/8, /*first=*/2, /*last=*/5},
|
{/*size=*/8, /*first=*/2, /*last=*/5},
|
||||||
@ -99,7 +99,7 @@ TEST(Model, ComplexRecords) {
|
|||||||
{/*size=*/8, /*first=*/6, /*last=*/8},
|
{/*size=*/8, /*first=*/6, /*last=*/8},
|
||||||
{/*size=*/8, /*first=*/7, /*last=*/8},
|
{/*size=*/8, /*first=*/7, /*last=*/8},
|
||||||
{/*size=*/16, /*first=*/8, /*last=*/9}};
|
{/*size=*/16, /*first=*/8, /*last=*/9}};
|
||||||
ObjectsAssignment assignment;
|
ObjectsAssignment<size_t> assignment;
|
||||||
ASSERT_TRUE(
|
ASSERT_TRUE(
|
||||||
AssignObjectsToTensors(usage_records, MemoryStrategy::NAIVE, &assignment)
|
AssignObjectsToTensors(usage_records, MemoryStrategy::NAIVE, &assignment)
|
||||||
.ok());
|
.ok());
|
||||||
@ -111,6 +111,7 @@ TEST(Model, ComplexRecords) {
|
|||||||
.ok());
|
.ok());
|
||||||
EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 0, 2, 3, 1, 3, 2, 0));
|
EXPECT_THAT(assignment.object_ids, ElementsAre(0, 1, 0, 2, 3, 1, 3, 2, 0));
|
||||||
EXPECT_THAT(assignment.object_sizes, ElementsAre(32, 64, 16, 8));
|
EXPECT_THAT(assignment.object_sizes, ElementsAre(32, 64, 16, 8));
|
||||||
|
|
||||||
ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::MINCOSTFLOW,
|
ASSERT_TRUE(AssignObjectsToTensors(usage_records, MemoryStrategy::MINCOSTFLOW,
|
||||||
&assignment)
|
&assignment)
|
||||||
.ok());
|
.ok());
|
||||||
|
@ -88,7 +88,7 @@ using ::tflite::gpu::TensorUsageRecord;
|
|||||||
|
|
||||||
// TODO(ypisarchyk): it make sense to move it to separate function
|
// TODO(ypisarchyk): it make sense to move it to separate function
|
||||||
// Generate usage records for each intermediate tensor in order of their first_task
|
// Generate usage records for each intermediate tensor in order of their first_task
|
||||||
std::vector<TensorUsageRecord> usageRecords;
|
std::vector<TensorUsageRecord<size_t>> usageRecords;
|
||||||
std::map<ValueId, size_t> usageRecordIds;
|
std::map<ValueId, size_t> usageRecordIds;
|
||||||
for (uint32_t i = 0; i < taskDescriptors.size(); ++i) {
|
for (uint32_t i = 0; i < taskDescriptors.size(); ++i) {
|
||||||
auto outputId = taskDescriptors[i]->output_buffer.id;
|
auto outputId = taskDescriptors[i]->output_buffer.id;
|
||||||
@ -111,7 +111,7 @@ using ::tflite::gpu::TensorUsageRecord;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tflite::gpu::ObjectsAssignment assignment;
|
tflite::gpu::ObjectsAssignment<size_t> assignment;
|
||||||
RETURN_IF_ERROR(AssignObjectsToTensors(usageRecords, MemoryStrategy::GREEDY, &assignment));
|
RETURN_IF_ERROR(AssignObjectsToTensors(usageRecords, MemoryStrategy::GREEDY, &assignment));
|
||||||
auto objectsCount = assignment.object_sizes.size();
|
auto objectsCount = assignment.object_sizes.size();
|
||||||
std::vector<id<MTLBuffer>> sharedBuffers(objectsCount);
|
std::vector<id<MTLBuffer>> sharedBuffers(objectsCount);
|
||||||
|
Loading…
Reference in New Issue
Block a user