TFLM: Add an interpreter API to inspect the actual number of bytes used in the arena.

This helps to choose the optimal arena size.

- I've also used this tool to adjust the arena size for a few test cases.
- This CL changes the GreedyMemoryPlanner by exposing the per buffer size requirement so that we can estimate if the remaining arena is enough for planning the entire buffer.

PiperOrigin-RevId: 307628733
Change-Id: Id47f578a0bd0b67a3bbbd2a2ef7103d2336b17aa
This commit is contained in:
Tiezhen WANG 2020-04-21 10:14:03 -07:00 committed by TensorFlower Gardener
parent e71f97fd6b
commit 298b24151e
9 changed files with 67 additions and 13 deletions

View File

@ -43,8 +43,8 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
tflite::ops::micro::AllOpsResolver resolver;
// Create an area of memory to use for input, output, and intermediate arrays.
// Finding the minimum value for your model may require some trial and error.
const int tensor_arena_size = 2 * 1024;
// `arena_used_bytes` can be used to retrieve the optimal size.
const int tensor_arena_size = 2208 + 16 + 100 /* some reserved space */;
uint8_t tensor_arena[tensor_arena_size];
// Build an interpreter to run the model with
@ -53,6 +53,10 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
// Allocate memory from the tensor_arena for the model's tensors
TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
// At the time of writing, the hello world model uses 2208 bytes, we leave
// 100 bytes head room here to make the test less fragile and in the same
// time, alert for substantial increase.
TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2208 + 100);
// Obtain a pointer to the model's input tensor
TfLiteTensor* input = interpreter.input(0);

View File

@ -41,13 +41,8 @@ void ReverseSortInPlace(int* values, int* ids, int size) {
GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
int scratch_buffer_size)
: buffer_count_(0), need_to_calculate_offsets_(true) {
const int per_buffer_size = sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_by_size_
sizeof(int) + // buffer_ids_sorted_by_size_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
// Allocate the arrays we need within the scratch buffer arena.
max_buffer_count_ = scratch_buffer_size / per_buffer_size;
max_buffer_count_ = scratch_buffer_size / per_buffer_size();
unsigned char* next_free = scratch_buffer;
requirements_ = reinterpret_cast<BufferRequirements*>(next_free);

View File

@ -86,6 +86,17 @@ class GreedyMemoryPlanner : public MemoryPlanner {
int next_entry_index;
};
// Number of bytes required in order to plan a buffer.
static size_t per_buffer_size() {
const int per_buffer_size =
sizeof(BufferRequirements) + // requirements_
sizeof(int) + // buffer_sizes_sorted_by_size_
sizeof(int) + // buffer_ids_sorted_by_size_
sizeof(ListEntry) + // buffers_sorted_by_offset_
sizeof(int); // buffer_offsets_;
return per_buffer_size;
}
private:
// Whether a buffer is active in a given time range.
bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,

View File

@ -440,6 +440,13 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
ErrorReporter* error_reporter)
: model_(model), error_reporter_(error_reporter), context_(context) {
uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
if (aligned_arena != tensor_arena) {
TF_LITE_REPORT_ERROR(
error_reporter_,
"%d bytes lost due to alignment. To avoid this loss, please make sure "
"the tensor_arena is 16 bytes aligned.",
aligned_arena - tensor_arena);
}
size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
// Creates a root memory allocator managing the arena. The allocator itself
// also locates in the arena buffer. This allocator doesn't need to be

View File

@ -64,9 +64,10 @@ typedef struct {
// This information could change in the future version.
// ************** .memory_allocator->GetBuffer()
// Tensors/Scratch buffers (head)
// **************
// ************** .head_watermark
// unused memory
// ************** .memory_allocator->GetBuffer() + ->GetDataSize()
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
// - ->GetDataSize()
// persistent area (tail)
// ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
class MicroAllocator {
@ -88,6 +89,15 @@ class MicroAllocator {
// called in this class.
TfLiteStatus FinishTensorAllocation();
// Returns the arena usage in bytes, only available after
// `FinishTensorAllocation`. Otherwise, it will return 0.
size_t used_bytes() const {
if (active_) {
return 0;
}
return memory_allocator_->GetUsedBytes();
}
// Run through the model to allocate nodes and registrations. We need to keep
// them for the entire life time of the model to allow persistent tensors.
// This method needs to be called before FinishTensorAllocation method.
@ -115,6 +125,7 @@ class MicroAllocator {
TfLiteStatus Init();
const Model* model_;
// A simple memory allocator that always allocate from the arena tail.
SimpleMemoryAllocator* memory_allocator_;
ErrorReporter* error_reporter_;
TfLiteContext* context_;

View File

@ -142,11 +142,15 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) {
TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
const tflite::Model* model = tflite::testing::GetSimpleMockModel();
TfLiteContext context;
constexpr size_t arena_size = 1024;
constexpr size_t arena_size =
760 /* minimal arena size at the time of writting */ +
16 /* alignment */ + 100 /* leave some headroom for future proof */;
uint8_t arena[arena_size];
tflite::MicroAllocator allocator(&context, model, arena, arena_size,
micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size);
// Memory planning hasn't been finalized, so the used bytes is unknown.
TF_LITE_MICRO_EXPECT_EQ(0, allocator.used_bytes());
TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation());
// No allocation to be done afterwards.
@ -170,6 +174,7 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) {
context.tensors[1].data.raw);
TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw,
context.tensors[2].data.raw);
TF_LITE_MICRO_EXPECT_LE(allocator.used_bytes(), 760 + 100);
}
TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) {

View File

@ -139,6 +139,14 @@ class MicroInterpreter {
return node_and_registrations_[node_index];
}
// For debugging only.
// Returns the actual used arena in bytes. This method gives the optimal arena
// size. It's only available after `AllocateTensors` has been called.
// Note that normally `tensor_arena` requires 16 bytes alignment to fully
// utilize the space. If it's not the case, the optimial arena size would be
// arena_used_bytes() + 16.
size_t arena_used_bytes() const { return allocator_.used_bytes(); }
private:
void CorrectTensorEndianness(TfLiteTensor* tensorCorr);

View File

@ -174,7 +174,9 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
const tflite::Model* model = tflite::testing::GetSimpleMockModel();
TF_LITE_MICRO_EXPECT_NE(nullptr, model);
tflite::MockOpResolver mock_resolver;
constexpr size_t allocator_buffer_size = 1024;
constexpr size_t allocator_buffer_size =
928 /* optimal arena size at the time of writting. */ +
16 /* alignment */ + 100 /* some headroom */;
uint8_t allocator_buffer[allocator_buffer_size];
// Create a new scope so that we can test the destructor.
@ -183,6 +185,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) {
allocator_buffer_size,
micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 928 + 100);
TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
TF_LITE_MICRO_EXPECT_EQ(2, interpreter.outputs_size());
@ -266,12 +269,15 @@ TF_LITE_MICRO_TEST(TestVariableTensorReset) {
TF_LITE_MICRO_EXPECT_NE(nullptr, model);
tflite::MockOpResolver mock_resolver;
constexpr size_t allocator_buffer_size = 2048;
constexpr size_t allocator_buffer_size =
2096 /* optimal arena size at the time of writting. */ +
16 /* alignment */ + 100 /* some headroom */;
uint8_t allocator_buffer[allocator_buffer_size];
tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer,
allocator_buffer_size,
micro_test::reporter);
TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2096 + 100);
TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size());
TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size());

View File

@ -31,6 +31,8 @@ class SimpleMemoryAllocator {
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
uint8_t* buffer_tail)
: error_reporter_(error_reporter),
buffer_head_(buffer_head),
buffer_tail_(buffer_tail),
head_(buffer_head),
tail_(buffer_tail) {}
SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
@ -47,9 +49,14 @@ class SimpleMemoryAllocator {
uint8_t* GetHead() const { return head_; }
uint8_t* GetTail() const { return tail_; }
size_t GetAvailableMemory() const { return tail_ - head_; }
size_t GetUsedBytes() const { return GetBufferSize() - GetAvailableMemory(); }
private:
size_t GetBufferSize() const { return buffer_tail_ - buffer_head_; }
ErrorReporter* error_reporter_;
uint8_t* buffer_head_;
uint8_t* buffer_tail_;
uint8_t* head_;
uint8_t* tail_;
};