Adds API for users to provide custom allocations for TFLite tensors

PiperOrigin-RevId: 325511199
Change-Id: Ia8c0550375d508db3fa75b6b5df5a70088b7470b
This commit is contained in:
Sachin Joglekar 2020-08-07 14:39:18 -07:00 committed by TensorFlower Gardener
parent 1e6fa32dfe
commit 7a93fd22f7
10 changed files with 383 additions and 1 deletions

View File

@ -412,9 +412,11 @@ cc_test(
"tflite_smoke_test",
],
deps = [
":builtin_op_data",
":external_cpu_backend_context",
":framework",
":string_util",
":util",
":version",
"//tensorflow/lite/core/api",
"//tensorflow/lite/kernels:builtin_ops",

View File

@ -358,6 +358,8 @@ typedef union TfLitePtrUnion {
// * kTfLitePersistentRo: Allocated and populated during prepare. This is
// useful for tensors that can be computed during prepare and treated
// as constant inputs for downstream ops (also in prepare).
// * kTfLiteCustom: Custom memory allocation provided by the user. See
// TfLiteCustomAllocation below.
typedef enum TfLiteAllocationType {
kTfLiteMemNone = 0,
kTfLiteMmapRo,
@ -365,6 +367,7 @@ typedef enum TfLiteAllocationType {
kTfLiteArenaRwPersistent,
kTfLiteDynamic,
kTfLitePersistentRo,
kTfLiteCustom,
} TfLiteAllocationType;
// The delegates should use zero or positive integers to represent handles.
@ -397,6 +400,15 @@ typedef struct TfLiteSparsity {
int dim_metadata_size;
} TfLiteSparsity;
// Defines a custom memory allocation not owned by the runtime.
// `data` should be aligned to kDefaultTensorAlignment defined in
// lite/util.h. (Currently 64 bytes)
// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
typedef struct TfLiteCustomAllocation {
void* data;
size_t bytes;
} TfLiteCustomAllocation;
// An tensor in the interpreter system which is a wrapper around a buffer of
// data including a dimensionality (or NULL if not currently defined).
#ifndef TF_LITE_STATIC_MEMORY

View File

@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/lite/core/subgraph.h"
#include <algorithm>
#include <cstdint>
#include "tensorflow/lite/arena_planner.h"
#include "tensorflow/lite/c/common.h"
@ -140,6 +141,17 @@ const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) {
return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code];
}
TfLiteStatus ValidateCustomAllocationForTensor(
TfLiteContext* context, const TfLiteTensor* tensor,
const TfLiteCustomAllocation& allocation) {
TF_LITE_ENSURE(context, allocation.data != nullptr);
TF_LITE_ENSURE(context, allocation.bytes >= tensor->bytes);
// Ensure provided memory is aligned to what TFLite requires.
const intptr_t data_ptr_value = reinterpret_cast<intptr_t>(allocation.data);
TF_LITE_ENSURE(context, data_ptr_value % kDefaultTensorAlignment == 0);
return kTfLiteOk;
}
} // namespace
// A trivial implementation of GraphInfo around the Interpreter.
@ -898,9 +910,24 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() {
execution_plan_, &last_exec_plan_index_prepared));
next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
// Execute arena allocations.
TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
next_execution_plan_index_to_plan_allocation_,
last_exec_plan_index_prepared));
// Ensure custom allocations are still valid for applicable tensors.
// This causes some extra validations for cases with dynamic tensors, but the
// overhead should be minimal since the number of custom-allocated tensors
// will typically be low.
for (int i = 0; i < custom_allocations_.size(); ++i) {
auto idx_and_alloc = custom_allocations_[i];
auto& tensor = tensors()[idx_and_alloc.first];
const auto& alloc = idx_and_alloc.second;
TF_LITE_ENSURE(context(), tensor.allocation_type == kTfLiteCustom);
TF_LITE_ENSURE_STATUS(
ValidateCustomAllocationForTensor(context(), &tensor, alloc));
}
next_execution_plan_index_to_plan_allocation_ =
last_exec_plan_index_prepared + 1;
@ -1218,7 +1245,8 @@ TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
if (tensor->allocation_type == kTfLiteArenaRw ||
tensor->allocation_type == kTfLiteDynamic ||
tensor->allocation_type == kTfLiteArenaRwPersistent ||
tensor->allocation_type == kTfLitePersistentRo) {
tensor->allocation_type == kTfLitePersistentRo ||
tensor->allocation_type == kTfLiteCustom) {
tensor_resized_since_op_invoke_ |=
TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
if (tensor->type != kTfLiteString) {
@ -1455,6 +1483,33 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
return status;
}
TfLiteStatus Subgraph::SetCustomAllocationForTensor(
int tensor_index, const TfLiteCustomAllocation& allocation) {
TfLiteTensor* tensor = &context_.tensors[tensor_index];
TF_LITE_ENSURE(context(), tensor->allocation_type == kTfLiteArenaRw ||
tensor->allocation_type == kTfLiteCustom);
TF_LITE_ENSURE_STATUS(
ValidateCustomAllocationForTensor(context(), tensor, allocation));
// If tensor already has a custom alloc, just reassign.
const auto alloc_it = std::find_if(
custom_allocations_.begin(), custom_allocations_.end(),
[tensor_index](
const std::pair<int, TfLiteCustomAllocation>& existing_alloc) {
return existing_alloc.first == tensor_index;
});
if (alloc_it == custom_allocations_.end()) {
custom_allocations_.emplace_back(tensor_index, allocation);
} else {
alloc_it->second = allocation;
}
tensor->allocation_type = kTfLiteCustom;
tensor->data.data = allocation.data;
return kTfLiteOk;
}
} // namespace impl
} // namespace tflite

View File

@ -332,6 +332,29 @@ class Subgraph {
// Before `AllocateTensors` is called, this will always return true;
bool HasDynamicTensors() { return has_dynamic_tensors_; }
// Assigns (or reassigns) a custom memory allocation for the given tensor.
// If AllocateTensors() is called after this, the runtime does not consider
// the tensor during internal memory planning and will continue using the
// provided allocation for the tensor (assuming it satisfies the expected
// tensor byte length).
// The runtime does NOT take ownership of the underlying memory.
// Note that while this function can be called again to set a new allocation
// for the tensor, it can no longer be reset to the TFLite arena memory.
//
// Parameters should satisfy the following conditions:
// 1. tensor->allocation_type == kTfLiteArenaRw
// In general, this is true for all non-constants such as I/O tensors.
// 2. allocation->data has the appropriate permissions for runtime access
// (Read-only for inputs, Read-Write for others), and outlives Interpreter.
// 3. allocation->bytes >= tensor->bytes.
// This condition is checked again if any tensors are resized.
// 4. allocation->data should be aligned to kDefaultTensorAlignment
// defined in lite/util.h. (Currently 64 bytes)
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus SetCustomAllocationForTensor(
int tensor_index, const TfLiteCustomAllocation& allocation);
private:
// SubgraphAwareProfiler wraps an actual TFLite profiler, such as a
// BufferedProfiler instance, and takes care of event profiling/tracing in a
@ -680,6 +703,9 @@ class Subgraph {
std::unique_ptr<MemoryPlanner> memory_planner_;
// Contains <tensor idx, custom allocation> pairs for all applicable tensors.
std::vector<std::pair<int, TfLiteCustomAllocation>> custom_allocations_;
// Tracking bit for whether a tensor was resized in the course of an op
// invocation. This is a useful hint to ensure that dynamic tensor outputs
// trigger downstream reallocation after op invocation.

View File

@ -163,6 +163,12 @@ void Interpreter::SetExternalContext(TfLiteExternalContextType type,
primary_subgraph().SetExternalContext(type, ctx);
}
TfLiteStatus Interpreter::SetCustomAllocationForTensor(
int tensor_index, const TfLiteCustomAllocation& allocation) {
return primary_subgraph().SetCustomAllocationForTensor(tensor_index,
allocation);
}
TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
return primary_subgraph().SetInputs(std::move(inputs));
}

View File

@ -504,6 +504,29 @@ class Interpreter {
void SetExternalContext(TfLiteExternalContextType type,
TfLiteExternalContext* ctx);
// Assigns (or reassigns) a custom memory allocation for the given tensor.
// If AllocateTensors() is called after this, the runtime does not consider
// the tensor during internal memory planning and will continue using the
// provided allocation for the tensor (assuming it satisfies the expected
// tensor byte length).
// The runtime does NOT take ownership of the underlying memory.
// Note that while this function can be called again to set a new allocation
// for the tensor, it can no longer be reset to the TFLite arena memory.
//
// Parameters should satisfy the following conditions:
// 1. tensor->allocation_type == kTfLiteArenaRw
// In general, this is true for all non-constants such as I/O tensors.
// 2. allocation->data has the appropriate permissions for runtime access
// (Read-only for inputs, Read-Write for others), and outlives Interpreter.
// 3. allocation->bytes >= tensor->bytes.
// This condition is checked again if any tensors are resized.
// 4. allocation->data should be aligned to kDefaultTensorAlignment
// defined in lite/util.h. (Currently 64 bytes)
//
// WARNING: This is an experimental interface that is subject to change.
TfLiteStatus SetCustomAllocationForTensor(
int tensor_index, const TfLiteCustomAllocation& allocation);
#ifndef DOXYGEN_SKIP
/// Adds `subgraphs_to_add` subgraphs, preserving pre-existing Subgraph
/// entries. The value pointed to by `first_new_subgraph_index` will be set to

View File

@ -22,8 +22,10 @@ limitations under the License.
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "third_party/eigen3/Eigen/Core"
#include "tensorflow/lite/builtin_op_data.h"
#include "tensorflow/lite/core/api/error_reporter.h"
#include "tensorflow/lite/external_cpu_backend_context.h"
#include "tensorflow/lite/kernels/builtin_op_kernels.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/kernel_util.h"
@ -1480,6 +1482,245 @@ TEST_F(CancellationTest, CancelDuringInvoke) {
ASSERT_EQ(invoke_error_code, kTfLiteError);
}
// Tests functionality related to custom memory allocations in TFLite.
class TestCustomAllocation : public ::testing::Test {
protected:
void SetUp() override {
// Simple model with two custom ops that add 2 float tensors each.
interpreter_.reset(new Interpreter);
interpreter_->AddTensors(5);
interpreter_->SetInputs({0, 1});
interpreter_->SetOutputs({3, 4});
TfLiteQuantizationParams quant;
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3},
quant);
auto* add_reg = ops::builtin::Register_ADD();
TfLiteAddParams* builtin_data0 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
TfLiteAddParams* builtin_data1 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
TfLiteAddParams* builtin_data2 =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data0->activation = kTfLiteActNone;
builtin_data1->activation = kTfLiteActNone;
builtin_data2->activation = kTfLiteActNone;
interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, builtin_data0,
add_reg);
interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, builtin_data1,
add_reg);
interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, builtin_data2,
add_reg);
}
void AssignCustomAllocForTensor(int tensor_idx, int required_alignment) {
const TfLiteTensor* tensor = interpreter_->tensor(tensor_idx);
auto tensor_alloc = NewCustomAlloc(tensor->bytes, required_alignment);
ASSERT_EQ(
interpreter_->SetCustomAllocationForTensor(tensor_idx, tensor_alloc),
kTfLiteOk);
}
void VerifyInvoke() {
std::vector<float> input = {1.0f, 2.0f, 3.0f};
std::vector<float> expected_output = {2.0f, 4.0f, 6.0f};
TfLiteTensor* tensor = interpreter_->tensor(interpreter_->outputs()[0]);
// typed_tensor<...> should work irrespective of custom alloc, since it
// accesses tensor.data.
memcpy(interpreter_->typed_tensor<float>(0), input.data(),
3 * sizeof(float));
memcpy(interpreter_->typed_tensor<float>(1), input.data(),
3 * sizeof(float));
ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
}
}
// Actual initialized allocation is more than num_bytes, to account for
// required_allocation.
TfLiteCustomAllocation NewCustomAlloc(size_t num_bytes,
int required_alignment) {
// Extra memory to ensure alignment.
char* new_alloc = new char[num_bytes + required_alignment];
char* new_underlying_buffer_aligned_ptr = reinterpret_cast<char*>(
AlignTo(required_alignment, reinterpret_cast<intptr_t>(new_alloc)));
custom_alloc_buffers_.emplace_back(new_alloc);
return TfLiteCustomAllocation(
{new_underlying_buffer_aligned_ptr, num_bytes});
}
intptr_t AlignTo(size_t alignment, intptr_t offset) {
return offset % alignment == 0 ? offset
: offset + (alignment - offset % alignment);
}
void TearDown() override {
interpreter_.reset();
custom_alloc_buffers_.clear();
}
protected:
TfLiteAddParams add_params_;
std::unique_ptr<Interpreter> interpreter_;
std::vector<std::unique_ptr<char[]>> custom_alloc_buffers_;
};
TEST_F(TestCustomAllocation, InvalidAlignment) {
const TfLiteTensor* input_tensor =
interpreter_->tensor(interpreter_->inputs()[0]);
auto input_alloc =
NewCustomAlloc(input_tensor->bytes, kDefaultTensorAlignment - 1);
ASSERT_EQ(interpreter_->SetCustomAllocationForTensor(
interpreter_->inputs()[0], input_alloc),
kTfLiteError);
// Allocate tensors & Invoke should still work.
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
}
TEST_F(TestCustomAllocation, InsufficientBytes) {
auto input_alloc = NewCustomAlloc(4, kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->SetCustomAllocationForTensor(
interpreter_->inputs()[0], input_alloc),
kTfLiteError);
// Allocate tensors & Invoke should still work.
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
}
TEST_F(TestCustomAllocation, CustomInputAlloc) {
// Set custom allocation for one input tensor.
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
}
TEST_F(TestCustomAllocation, CustomInputAlloc_MultipleAssigns) {
// Set custom allocation for one input tensor.
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
}
TEST_F(TestCustomAllocation, CustomInputAlloc_AllocateTensorsBefore) {
// Allocate tensors.
// Allocating now will cause TFLite to reserve some extra memory, but nothing
// should break.
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
VerifyInvoke();
}
TEST_F(TestCustomAllocation, CustomInputAndOutputAllocs) {
// Set custom allocations for all IO tensors.
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
AssignCustomAllocForTensor(interpreter_->inputs()[1],
/*required_alignment=*/kDefaultTensorAlignment);
AssignCustomAllocForTensor(interpreter_->outputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
AssignCustomAllocForTensor(interpreter_->outputs()[1],
/*required_alignment=*/kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
}
TEST_F(TestCustomAllocation, ResizeTensorsWithoutEnoughMemory) {
// Set custom allocations for all input tensors.
AssignCustomAllocForTensor(interpreter_->inputs()[0],
/*required_alignment=*/kDefaultTensorAlignment);
AssignCustomAllocForTensor(interpreter_->inputs()[1],
/*required_alignment=*/kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// Now resize tensors to double the size.
ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {2, 3}),
kTfLiteOk);
ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {2, 3}),
kTfLiteOk);
// Since the custom memory previously allocated isn't enough,
// AllocateTensors() will fail.
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteError);
// Interpreter should no longer be in invokable state, so expect failure.
ASSERT_EQ(interpreter_->Invoke(), kTfLiteError);
}
TEST_F(TestCustomAllocation, ResizeTensorsWithEnoughMemory) {
// Set custom allocations for all input tensors, with double the required
// memory.
const TfLiteTensor* input0_tensor =
interpreter_->tensor(interpreter_->inputs()[0]);
auto input0_alloc =
NewCustomAlloc(2 * input0_tensor->bytes, kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->SetCustomAllocationForTensor(
interpreter_->inputs()[0], input0_alloc),
kTfLiteOk);
const TfLiteTensor* input1_tensor =
interpreter_->tensor(interpreter_->inputs()[1]);
auto input1_alloc =
NewCustomAlloc(2 * input1_tensor->bytes, kDefaultTensorAlignment);
ASSERT_EQ(interpreter_->SetCustomAllocationForTensor(
interpreter_->inputs()[1], input1_alloc),
kTfLiteOk);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
// Now resize tensors to double the size.
ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {6, 1}),
kTfLiteOk);
ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {6, 1}),
kTfLiteOk);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
std::vector<float> expected_output = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
TfLiteTensor* tensor = interpreter_->tensor(interpreter_->outputs()[0]);
memcpy(interpreter_->typed_tensor<float>(0), input.data(), 6 * sizeof(float));
memcpy(interpreter_->typed_tensor<float>(1), input.data(), 6 * sizeof(float));
ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
for (int i = 0; i < 6; ++i) {
EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i;
}
ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {3, 1}),
kTfLiteOk);
ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {3, 1}),
kTfLiteOk);
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
VerifyInvoke();
}
} // namespace
} // namespace tflite

View File

@ -109,6 +109,8 @@ const char* AllocTypeName(TfLiteAllocationType type) {
return "kTfLiteArenaRwPersistent";
case kTfLitePersistentRo:
return "kTfLitePersistentRo";
case kTfLiteCustom:
return "kTfLiteCustom";
}
return "(invalid)";
}

View File

@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/lite/optional_debug_tools.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/schema/schema_generated.h"
namespace tflite {
@ -81,6 +82,8 @@ const char* AllocTypeName(TfLiteAllocationType type) {
return "kTfLiteArenaRwPersistent";
case kTfLitePersistentRo:
return "kTfLitePersistentRo";
case kTfLiteCustom:
return "kTfLiteCustom";
}
return "(invalid)";
}

View File

@ -358,6 +358,8 @@ typedef union TfLitePtrUnion {
// * kTfLitePersistentRo: Allocated and populated during prepare. This is
// useful for tensors that can be computed during prepare and treated
// as constant inputs for downstream ops (also in prepare).
// * kTfLiteCustom: Custom memory allocation provided by the user. See
// TfLiteCustomAllocation below.
typedef enum TfLiteAllocationType {
kTfLiteMemNone = 0,
kTfLiteMmapRo,
@ -365,6 +367,7 @@ typedef enum TfLiteAllocationType {
kTfLiteArenaRwPersistent,
kTfLiteDynamic,
kTfLitePersistentRo,
kTfLiteCustom,
} TfLiteAllocationType;
// The delegates should use zero or positive integers to represent handles.
@ -397,6 +400,15 @@ typedef struct TfLiteSparsity {
int dim_metadata_size;
} TfLiteSparsity;
// Defines a custom memory allocation not owned by the runtime.
// `data` should be aligned to kDefaultTensorAlignment defined in
// lite/util.h. (Currently 64 bytes)
// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
typedef struct TfLiteCustomAllocation {
void* data;
size_t bytes;
} TfLiteCustomAllocation;
// An tensor in the interpreter system which is a wrapper around a buffer of
// data including a dimensionality (or NULL if not currently defined).
#ifndef TF_LITE_STATIC_MEMORY