This change: * Improved the per-op profiling to the interpreter. * Maintains state within the MicroProfiler object such that all the logging can happen external to the interpreter. * Refactors the benchmarks to make use of this new functionality.
410 lines
15 KiB
C++
410 lines
15 KiB
C++
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include "tensorflow/lite/micro/micro_interpreter.h"
|
|
|
|
#include <cstdarg>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
#include "flatbuffers/flatbuffers.h" // from @flatbuffers
|
|
#include "tensorflow/lite/c/common.h"
|
|
#include "tensorflow/lite/core/api/error_reporter.h"
|
|
#include "tensorflow/lite/core/api/tensor_utils.h"
|
|
#include "tensorflow/lite/micro/memory_helpers.h"
|
|
#include "tensorflow/lite/micro/micro_allocator.h"
|
|
#include "tensorflow/lite/micro/micro_error_reporter.h"
|
|
#include "tensorflow/lite/micro/micro_op_resolver.h"
|
|
#include "tensorflow/lite/micro/micro_profiler.h"
|
|
#include "tensorflow/lite/schema/schema_generated.h"
|
|
|
|
namespace tflite {
|
|
namespace {
|
|
|
|
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
|
const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
|
|
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
|
|
return registration->custom_name;
|
|
} else {
|
|
return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
|
|
}
|
|
}
|
|
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
|
|
|
} // namespace
|
|
|
|
namespace internal {
|
|
|
|
ContextHelper::ContextHelper(ErrorReporter* error_reporter,
|
|
MicroAllocator* allocator, const Model* model)
|
|
: allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
|
|
|
|
void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
|
|
size_t bytes) {
|
|
return reinterpret_cast<ContextHelper*>(ctx->impl_)
|
|
->allocator_->AllocatePersistentBuffer(bytes);
|
|
}
|
|
|
|
TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
|
|
size_t bytes,
|
|
int* buffer_idx) {
|
|
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
|
|
return helper->allocator_->RequestScratchBufferInArena(bytes, buffer_idx);
|
|
}
|
|
|
|
void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
|
|
ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
|
|
ScratchBufferHandle* handle = helper->scratch_buffer_handles_ + buffer_idx;
|
|
return handle->data;
|
|
}
|
|
|
|
void ContextHelper::ReportOpError(struct TfLiteContext* context,
|
|
const char* format, ...) {
|
|
#ifndef TF_LITE_STRIP_ERROR_STRINGS
|
|
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
|
|
va_list args;
|
|
va_start(args, format);
|
|
TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
|
|
va_end(args);
|
|
#endif
|
|
}
|
|
|
|
TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
|
|
int tensor_idx) {
|
|
ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
|
|
return helper->allocator_->AllocateTempTfLiteTensor(
|
|
helper->model_, helper->eval_tensors_, tensor_idx);
|
|
}
|
|
|
|
TfLiteEvalTensor* ContextHelper::GetEvalTensor(
|
|
const struct TfLiteContext* context, int tensor_idx) {
|
|
ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
|
|
return &helper->eval_tensors_[tensor_idx];
|
|
}
|
|
|
|
void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
|
|
eval_tensors_ = eval_tensors;
|
|
}
|
|
|
|
void ContextHelper::SetScratchBufferHandles(
|
|
ScratchBufferHandle* scratch_buffer_handles) {
|
|
scratch_buffer_handles_ = scratch_buffer_handles;
|
|
}
|
|
|
|
} // namespace internal
|
|
|
|
MicroInterpreter::MicroInterpreter(const Model* model,
|
|
const MicroOpResolver& op_resolver,
|
|
uint8_t* tensor_arena,
|
|
size_t tensor_arena_size,
|
|
ErrorReporter* error_reporter,
|
|
MicroProfiler* profiler)
|
|
: model_(model),
|
|
op_resolver_(op_resolver),
|
|
error_reporter_(error_reporter),
|
|
allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
|
|
error_reporter)),
|
|
tensors_allocated_(false),
|
|
initialization_status_(kTfLiteError),
|
|
eval_tensors_(nullptr),
|
|
context_helper_(error_reporter_, &allocator_, model),
|
|
input_tensors_(nullptr),
|
|
output_tensors_(nullptr) {
|
|
Init(profiler);
|
|
}
|
|
|
|
MicroInterpreter::MicroInterpreter(const Model* model,
|
|
const MicroOpResolver& op_resolver,
|
|
MicroAllocator* allocator,
|
|
ErrorReporter* error_reporter,
|
|
MicroProfiler* profiler)
|
|
: model_(model),
|
|
op_resolver_(op_resolver),
|
|
error_reporter_(error_reporter),
|
|
allocator_(*allocator),
|
|
tensors_allocated_(false),
|
|
initialization_status_(kTfLiteError),
|
|
eval_tensors_(nullptr),
|
|
context_helper_(error_reporter_, &allocator_, model),
|
|
input_tensors_(nullptr),
|
|
output_tensors_(nullptr) {
|
|
Init(profiler);
|
|
}
|
|
|
|
MicroInterpreter::~MicroInterpreter() {
|
|
if (node_and_registrations_ != nullptr) {
|
|
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
|
TfLiteNode* node = &(node_and_registrations_[i].node);
|
|
const TfLiteRegistration* registration =
|
|
node_and_registrations_[i].registration;
|
|
// registration is allocated outside the interpreter, so double check to
|
|
// make sure it's not nullptr;
|
|
if (registration != nullptr && registration->free != nullptr) {
|
|
registration->free(&context_, node->user_data);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void MicroInterpreter::Init(MicroProfiler* profiler) {
|
|
const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
|
|
model_->subgraphs();
|
|
if (subgraphs->size() != 1) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Only 1 subgraph is currently supported.\n");
|
|
initialization_status_ = kTfLiteError;
|
|
return;
|
|
}
|
|
subgraph_ = (*subgraphs)[0];
|
|
|
|
context_.impl_ = static_cast<void*>(&context_helper_);
|
|
context_.ReportError = context_helper_.ReportOpError;
|
|
context_.GetTensor = context_helper_.GetTensor;
|
|
context_.GetEvalTensor = context_helper_.GetEvalTensor;
|
|
context_.recommended_num_threads = 1;
|
|
context_.profiler = profiler;
|
|
|
|
initialization_status_ = kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus MicroInterpreter::AllocateTensors() {
|
|
if (allocator_.StartModelAllocation(model_, op_resolver_,
|
|
&node_and_registrations_,
|
|
&eval_tensors_) != kTfLiteOk) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Failed starting model allocation.\n");
|
|
initialization_status_ = kTfLiteError;
|
|
return kTfLiteError;
|
|
}
|
|
|
|
// Update the pointer now that TfLiteEvalTensor allocation has completed on
|
|
// the context helper.
|
|
// TODO(b/16157777): This call would not be needed if ContextHelper rolled
|
|
// into the interpreter.
|
|
context_helper_.SetTfLiteEvalTensors(eval_tensors_);
|
|
context_.tensors_size = subgraph_->tensors()->size();
|
|
|
|
// Only allow AllocatePersistentBuffer in Init stage.
|
|
context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
|
|
context_.RequestScratchBufferInArena = nullptr;
|
|
context_.GetScratchBuffer = nullptr;
|
|
|
|
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
|
auto* node = &(node_and_registrations_[i].node);
|
|
auto* registration = node_and_registrations_[i].registration;
|
|
size_t init_data_size;
|
|
const char* init_data;
|
|
if (registration->builtin_code == BuiltinOperator_CUSTOM) {
|
|
init_data = reinterpret_cast<const char*>(node->custom_initial_data);
|
|
init_data_size = node->custom_initial_data_size;
|
|
} else {
|
|
init_data = reinterpret_cast<const char*>(node->builtin_data);
|
|
init_data_size = 0;
|
|
}
|
|
if (registration->init) {
|
|
node->user_data =
|
|
registration->init(&context_, init_data, init_data_size);
|
|
}
|
|
}
|
|
|
|
// Both AllocatePersistentBuffer and RequestScratchBufferInArena is
|
|
// available in Prepare stage.
|
|
context_.RequestScratchBufferInArena =
|
|
context_helper_.RequestScratchBufferInArena;
|
|
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
|
auto* node = &(node_and_registrations_[i].node);
|
|
auto* registration = node_and_registrations_[i].registration;
|
|
if (registration->prepare) {
|
|
TfLiteStatus prepare_status = registration->prepare(&context_, node);
|
|
if (prepare_status != kTfLiteOk) {
|
|
TF_LITE_REPORT_ERROR(
|
|
error_reporter_,
|
|
"Node %s (number %df) failed to prepare with status %d",
|
|
OpNameFromRegistration(registration), i, prepare_status);
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
allocator_.FinishPrepareNodeAllocations(/*node_id=*/i);
|
|
}
|
|
|
|
// Prepare is done, we're ready for Invoke. Memory allocation is no longer
|
|
// allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
|
|
context_.AllocatePersistentBuffer = nullptr;
|
|
context_.RequestScratchBufferInArena = nullptr;
|
|
context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
|
|
|
|
TF_LITE_ENSURE_OK(&context_,
|
|
allocator_.FinishModelAllocation(model_, eval_tensors_,
|
|
&scratch_buffer_handles_));
|
|
// TODO(b/16157777): Remove this when ContextHelper is rolled into this class.
|
|
context_helper_.SetScratchBufferHandles(scratch_buffer_handles_);
|
|
|
|
// TODO(b/162311891): Drop these allocations when the interpreter supports
|
|
// handling buffers from TfLiteEvalTensor.
|
|
input_tensors_ =
|
|
reinterpret_cast<TfLiteTensor**>(allocator_.AllocatePersistentBuffer(
|
|
sizeof(TfLiteTensor*) * inputs_size()));
|
|
if (input_tensors_ == nullptr) {
|
|
TF_LITE_REPORT_ERROR(
|
|
error_reporter_,
|
|
"Failed to allocate memory for context->input_tensors_, "
|
|
"%d bytes required",
|
|
sizeof(TfLiteTensor*) * inputs_size());
|
|
return kTfLiteError;
|
|
}
|
|
|
|
for (size_t i = 0; i < inputs_size(); ++i) {
|
|
input_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
|
|
model_, eval_tensors_, inputs().Get(i));
|
|
if (input_tensors_[i] == nullptr) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Failed to initialize input tensor %d", i);
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
|
|
// TODO(b/162311891): Drop these allocations when the interpreter supports
|
|
// handling buffers from TfLiteEvalTensor.
|
|
output_tensors_ =
|
|
reinterpret_cast<TfLiteTensor**>(allocator_.AllocatePersistentBuffer(
|
|
sizeof(TfLiteTensor*) * outputs_size()));
|
|
if (output_tensors_ == nullptr) {
|
|
TF_LITE_REPORT_ERROR(
|
|
error_reporter_,
|
|
"Failed to allocate memory for context->output_tensors_, "
|
|
"%d bytes required",
|
|
sizeof(TfLiteTensor*) * outputs_size());
|
|
return kTfLiteError;
|
|
}
|
|
|
|
for (size_t i = 0; i < outputs_size(); ++i) {
|
|
output_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
|
|
model_, eval_tensors_, outputs().Get(i));
|
|
if (output_tensors_[i] == nullptr) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Failed to initialize output tensor %d", i);
|
|
return kTfLiteError;
|
|
}
|
|
}
|
|
|
|
TF_LITE_ENSURE_STATUS(ResetVariableTensors());
|
|
|
|
tensors_allocated_ = true;
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteStatus MicroInterpreter::Invoke() {
|
|
if (initialization_status_ != kTfLiteOk) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Invoke() called after initialization failed\n");
|
|
return kTfLiteError;
|
|
}
|
|
|
|
// Ensure tensors are allocated before the interpreter is invoked to avoid
|
|
// difficult to debug segfaults.
|
|
if (!tensors_allocated_) {
|
|
TF_LITE_ENSURE_OK(&context_, AllocateTensors());
|
|
}
|
|
|
|
for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
|
|
auto* node = &(node_and_registrations_[i].node);
|
|
auto* registration = node_and_registrations_[i].registration;
|
|
|
|
// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
|
|
// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
|
|
// only defined for builds with the error strings.
|
|
#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
|
|
ScopedMicroProfiler scoped_profiler(
|
|
OpNameFromRegistration(registration),
|
|
reinterpret_cast<MicroProfiler*>(context_.profiler));
|
|
#endif
|
|
|
|
TFLITE_DCHECK(registration->invoke);
|
|
TfLiteStatus invoke_status = registration->invoke(&context_, node);
|
|
|
|
// All TfLiteTensor structs used in the kernel are allocated from temp
|
|
// memory in the allocator. This creates a chain of allocations in the
|
|
// temp section. The call below resets the chain of allocations to
|
|
// prepare for the next call.
|
|
allocator_.ResetTempAllocations();
|
|
|
|
if (invoke_status == kTfLiteError) {
|
|
TF_LITE_REPORT_ERROR(
|
|
error_reporter_,
|
|
"Node %s (number %d) failed to invoke with status %d",
|
|
OpNameFromRegistration(registration), i, invoke_status);
|
|
return kTfLiteError;
|
|
} else if (invoke_status != kTfLiteOk) {
|
|
return invoke_status;
|
|
}
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
TfLiteTensor* MicroInterpreter::input(size_t index) {
|
|
const size_t length = inputs_size();
|
|
if (index >= length) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Input index %d out of range (length is %d)", index,
|
|
length);
|
|
return nullptr;
|
|
}
|
|
return input_tensors_[index];
|
|
}
|
|
|
|
TfLiteTensor* MicroInterpreter::output(size_t index) {
|
|
const size_t length = outputs_size();
|
|
if (index >= length) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Output index %d out of range (length is %d)", index,
|
|
length);
|
|
return nullptr;
|
|
}
|
|
return output_tensors_[index];
|
|
}
|
|
|
|
TfLiteTensor* MicroInterpreter::tensor(size_t index) {
|
|
const size_t length = tensors_size();
|
|
if (index >= length) {
|
|
TF_LITE_REPORT_ERROR(error_reporter_,
|
|
"Tensor index %d out of range (length is %d)", index,
|
|
length);
|
|
return nullptr;
|
|
}
|
|
return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
|
|
index);
|
|
}
|
|
|
|
TfLiteStatus MicroInterpreter::ResetVariableTensors() {
|
|
for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
|
|
auto* tensor = subgraph_->tensors()->Get(i);
|
|
if (tensor->is_variable()) {
|
|
size_t buffer_size;
|
|
TF_LITE_ENSURE_STATUS(
|
|
TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
|
|
|
|
int value = 0;
|
|
if (tensor->type() == tflite::TensorType_INT8) {
|
|
value = tensor->quantization()->zero_point()->Get(0);
|
|
}
|
|
memset(eval_tensors_[i].data.raw, value, buffer_size);
|
|
}
|
|
}
|
|
|
|
return kTfLiteOk;
|
|
}
|
|
|
|
} // namespace tflite
|