Reduce the size of TfLiteTensor for the TF Micro runtime.

This change uses the existing micro-specific build flag (TF_LITE_STATIC_MEMORY) to reduce the size of TfLiteTensor. In this build setting, only the minimum number of fields required for preparing and initializing a model in TFLM are used. This build define is opt-in only for internal builds and continues to be enabled by default in Makefile builds./

All TFLM internal targets can be built with this flag by adding '--copt=-DTF_LITE_STATIC_MEMORY'.

This change reduces the sizeof(TfLiteTensor) to 64 bytes (64bit systems) down from 112 bytes (64 bit systems).

TfLiteTensor struct reduced by 1.75x (~43% reduction)
Tail allocation reduced by: 2,592kb (~12.5% reduction)
Total allocation reduced by: 2,592kb (~12% reduction)

Optimized results from memory_arena_threshold_test:
Keyword Model:
--------------
[RecordingMicroAllocator] Arena allocation total 18448 bytes
[RecordingMicroAllocator] Arena allocation head 672 bytes
[RecordingMicroAllocator] Arena allocation tail 17776 bytes
[RecordingMicroAllocator] 'TfLiteTensor struct' used 3456 bytes with alignment overhead (requested 3456 bytes for 54 tensors)
[RecordingMicroAllocator] 'TfLiteTensor quantization data' used 1728 bytes with alignment overhead (requested 1728 bytes for 108 allocations)
[RecordingMicroAllocator] 'TfLiteTensor variable buffer data' used 10240 bytes with alignment overhead (requested 10240 bytes for 7 allocations)
[RecordingMicroAllocator] 'NodeAndRegistration struct' used 1200 bytes with alignment overhead (requested 1200 bytes for 15 NodeAndRegistration structs)
[RecordingMicroAllocator] 'Operator runtime data' used 148 bytes with alignment overhead (requested 148 bytes for 13 OpData structs)

Test Conv Model:
----------------
[RecordingMicroAllocator] Arena allocation total 10960 bytes
[RecordingMicroAllocator] Arena allocation head 7744 bytes
[RecordingMicroAllocator] Arena allocation tail 3216 bytes
[RecordingMicroAllocator] 'TfLiteTensor struct' used 960 bytes with alignment overhead (requested 960 bytes for 15 tensors)
[RecordingMicroAllocator] 'TfLiteTensor quantization data' used 768 bytes with alignment overhead (requested 752 bytes for 24 allocations)
[RecordingMicroAllocator] 'TfLiteTensor variable buffer data' used 0 bytes with alignment overhead (requested 0 bytes for 0 allocations)
[RecordingMicroAllocator] 'NodeAndRegistration struct' used 560 bytes with alignment overhead (requested 560 bytes for 7 NodeAndRegistration structs)
[RecordingMicroAllocator] 'Operator runtime data' used 136 bytes with alignment overhead (requested 136 bytes for 5 OpData structs)

PiperOrigin-RevId: 317335359
Change-Id: Ic3d4d2c3e62249f072ece8f621f9ef94eaa28589
This commit is contained in:
Nick Kreeger 2020-06-19 10:40:30 -07:00 committed by TensorFlower Gardener
parent e0780ef031
commit fbf407383c
5 changed files with 111 additions and 4 deletions

View File

@ -375,6 +375,7 @@ typedef struct TfLiteSparsity {
// An tensor in the interpreter system which is a wrapper around a buffer of
// data including a dimensionality (or NULL if not currently defined).
#ifndef TF_LITE_STATIC_MEMORY
typedef struct TfLiteTensor {
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
@ -439,6 +440,51 @@ typedef struct TfLiteTensor {
// `dims_signature` contains [1, -1, -1, 3]).
const TfLiteIntArray* dims_signature;
} TfLiteTensor;
#else
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
// contains only the minimum fields required to initialize and prepare a micro
// inference graph. The fields in this struct have been ordered from
// largest-to-smallest for optimal struct sizeof.
//
// NOTE: This flag is opt-in only at compile time.
typedef struct TfLiteTensor {
// TODO(b/155784997): Consider consolidating these quantization fields:
// Quantization information. Replaces params field above.
TfLiteQuantization quantization;
// Quantization information.
TfLiteQuantizationParams params;
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have. NOTE: the product of elements of `dims`
// and the element datatype size should be equal to `bytes` below.
TfLiteIntArray* dims;
// The number of bytes required to store the data of this Tensor. I.e.
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
// type is kTfLiteFloat32 and dims = {3, 2} then
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
size_t bytes;
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
// How memory is mapped
// kTfLiteMmapRo: Memory mapped read only.
// i.e. weights
// kTfLiteArenaRw: Arena allocated read write memory
// (i.e. temporaries, outputs).
TfLiteAllocationType allocation_type;
// True if the tensor is a variable.
bool is_variable;
} TfLiteTensor;
#endif // TF_LITE_STATIC_MEMORY
#ifndef TF_LITE_STATIC_MEMORY
// Free data memory of tensor `t`.

View File

@ -41,9 +41,17 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15;
// NOTE: These values are measured on x86-64:
// TODO(b/158651472): Consider auditing these values on non-64 bit systems.
//
// Run this test with '--copt=-DTF_LITE_MICRO_OPTIMIZED_RUNTIME' to get
// optimized memory runtime values:
#ifdef TF_LITE_STATIC_MEMORY
constexpr int kKeywordModelTotalSize = 18448;
constexpr int kKeywordModelTailSize = 17776;
#else
constexpr int kKeywordModelTotalSize = 21040;
constexpr int kKeywordModelHeadSize = 672;
constexpr int kKeywordModelTailSize = 20368;
#endif
constexpr int kKeywordModelHeadSize = 672;
constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240;
constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 1728;
constexpr int kKeywordModelOpRuntimeDataSize = 148;
@ -56,9 +64,14 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7;
// NOTE: These values are measured on x86-64:
// TODO(b/158651472): Consider auditing these values on non-64 bit systems.
#ifdef TF_LITE_STATIC_MEMORY
constexpr int kTestConvModelTotalSize = 10960;
constexpr int kTestConvModelTailSize = 3216;
#else
constexpr int kTestConvModelTotalSize = 11680;
constexpr int kTestConvModelHeadSize = 7744;
constexpr int kTestConvModelTailSize = 3936;
#endif
constexpr int kTestConvModelHeadSize = 7744;
constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 768;
constexpr int kTestConvModelOpRuntimeDataSize = 136;
@ -81,7 +94,7 @@ void EnsureAllocatedSizeThreshold(const char* allocation_type, size_t actual,
TF_LITE_MICRO_EXPECT_NEAR(actual, expected, kAllocationThreshold);
if (actual != expected) {
TF_LITE_REPORT_ERROR(micro_test::reporter,
"%s threshold failed: %ld != %ld", allocation_type,
"%s threshold failed: %d != %d", allocation_type,
actual, expected);
}
} else {

View File

@ -284,7 +284,7 @@ TF_LITE_MICRO_TEST(TestIncompleteInitializationAllocationsWithSmallArena) {
tflite::testing::MockOpResolver mock_resolver;
// 1kb is too small for the ComplexMockModel:
constexpr size_t allocator_buffer_size = 1048;
constexpr size_t allocator_buffer_size = 500;
uint8_t allocator_buffer[allocator_buffer_size];
tflite::RecordingMicroAllocator* allocator =

View File

@ -75,6 +75,8 @@ TEST_SCRIPT := tensorflow/lite/micro/testing/test_linux_binary.sh
MICROLITE_LIBS := -lm
# TODO(b/150240249): Add in -fno-rtti once that works for the Xtensa toolchain.
# TODO(b/159155203): Consider TF_LITE_STATIC_MEMORY to align more with the fact
# this flag is for an optimized micro runtime.
CXXFLAGS := -std=c++11 -DTF_LITE_STATIC_MEMORY
CCFLAGS := -std=c11 -DTF_LITE_STATIC_MEMORY
ARFLAGS := -r

View File

@ -375,6 +375,7 @@ typedef struct TfLiteSparsity {
// An tensor in the interpreter system which is a wrapper around a buffer of
// data including a dimensionality (or NULL if not currently defined).
#ifndef TF_LITE_STATIC_MEMORY
typedef struct TfLiteTensor {
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
@ -439,6 +440,51 @@ typedef struct TfLiteTensor {
// `dims_signature` contains [1, -1, -1, 3]).
const TfLiteIntArray* dims_signature;
} TfLiteTensor;
#else
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
// contains only the minimum fields required to initialize and prepare a micro
// inference graph. The fields in this struct have been ordered from
// largest-to-smallest for optimal struct sizeof.
//
// NOTE: This flag is opt-in only at compile time.
typedef struct TfLiteTensor {
// TODO(b/155784997): Consider consolidating these quantization fields:
// Quantization information. Replaces params field above.
TfLiteQuantization quantization;
// Quantization information.
TfLiteQuantizationParams params;
// A union of data pointers. The appropriate type should be used for a typed
// tensor based on `type`.
TfLitePtrUnion data;
// A pointer to a structure representing the dimensionality interpretation
// that the buffer should have. NOTE: the product of elements of `dims`
// and the element datatype size should be equal to `bytes` below.
TfLiteIntArray* dims;
// The number of bytes required to store the data of this Tensor. I.e.
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
// type is kTfLiteFloat32 and dims = {3, 2} then
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
size_t bytes;
// The data type specification for data stored in `data`. This affects
// what member of `data` union should be used.
TfLiteType type;
// How memory is mapped
// kTfLiteMmapRo: Memory mapped read only.
// i.e. weights
// kTfLiteArenaRw: Arena allocated read write memory
// (i.e. temporaries, outputs).
TfLiteAllocationType allocation_type;
// True if the tensor is a variable.
bool is_variable;
} TfLiteTensor;
#endif // TF_LITE_STATIC_MEMORY
#ifndef TF_LITE_STATIC_MEMORY
// Free data memory of tensor `t`.