Reduce the size of TfLiteTensor for the TF Micro runtime.
This change uses the existing micro-specific build flag (TF_LITE_STATIC_MEMORY) to reduce the size of TfLiteTensor. In this build setting, only the minimum number of fields required for preparing and initializing a model in TFLM are used. This build define is opt-in only for internal builds and continues to be enabled by default in Makefile builds./ All TFLM internal targets can be built with this flag by adding '--copt=-DTF_LITE_STATIC_MEMORY'. This change reduces the sizeof(TfLiteTensor) to 64 bytes (64bit systems) down from 112 bytes (64 bit systems). TfLiteTensor struct reduced by 1.75x (~43% reduction) Tail allocation reduced by: 2,592kb (~12.5% reduction) Total allocation reduced by: 2,592kb (~12% reduction) Optimized results from memory_arena_threshold_test: Keyword Model: -------------- [RecordingMicroAllocator] Arena allocation total 18448 bytes [RecordingMicroAllocator] Arena allocation head 672 bytes [RecordingMicroAllocator] Arena allocation tail 17776 bytes [RecordingMicroAllocator] 'TfLiteTensor struct' used 3456 bytes with alignment overhead (requested 3456 bytes for 54 tensors) [RecordingMicroAllocator] 'TfLiteTensor quantization data' used 1728 bytes with alignment overhead (requested 1728 bytes for 108 allocations) [RecordingMicroAllocator] 'TfLiteTensor variable buffer data' used 10240 bytes with alignment overhead (requested 10240 bytes for 7 allocations) [RecordingMicroAllocator] 'NodeAndRegistration struct' used 1200 bytes with alignment overhead (requested 1200 bytes for 15 NodeAndRegistration structs) [RecordingMicroAllocator] 'Operator runtime data' used 148 bytes with alignment overhead (requested 148 bytes for 13 OpData structs) Test Conv Model: ---------------- [RecordingMicroAllocator] Arena allocation total 10960 bytes [RecordingMicroAllocator] Arena allocation head 7744 bytes [RecordingMicroAllocator] Arena allocation tail 3216 bytes [RecordingMicroAllocator] 'TfLiteTensor struct' used 960 bytes with alignment overhead (requested 960 bytes for 15 tensors) [RecordingMicroAllocator] 'TfLiteTensor quantization data' used 768 bytes with alignment overhead (requested 752 bytes for 24 allocations) [RecordingMicroAllocator] 'TfLiteTensor variable buffer data' used 0 bytes with alignment overhead (requested 0 bytes for 0 allocations) [RecordingMicroAllocator] 'NodeAndRegistration struct' used 560 bytes with alignment overhead (requested 560 bytes for 7 NodeAndRegistration structs) [RecordingMicroAllocator] 'Operator runtime data' used 136 bytes with alignment overhead (requested 136 bytes for 5 OpData structs) PiperOrigin-RevId: 317335359 Change-Id: Ic3d4d2c3e62249f072ece8f621f9ef94eaa28589
This commit is contained in:
parent
e0780ef031
commit
fbf407383c
@ -375,6 +375,7 @@ typedef struct TfLiteSparsity {
|
||||
|
||||
// An tensor in the interpreter system which is a wrapper around a buffer of
|
||||
// data including a dimensionality (or NULL if not currently defined).
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
typedef struct TfLiteTensor {
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
@ -439,6 +440,51 @@ typedef struct TfLiteTensor {
|
||||
// `dims_signature` contains [1, -1, -1, 3]).
|
||||
const TfLiteIntArray* dims_signature;
|
||||
} TfLiteTensor;
|
||||
#else
|
||||
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
|
||||
// contains only the minimum fields required to initialize and prepare a micro
|
||||
// inference graph. The fields in this struct have been ordered from
|
||||
// largest-to-smallest for optimal struct sizeof.
|
||||
//
|
||||
// NOTE: This flag is opt-in only at compile time.
|
||||
typedef struct TfLiteTensor {
|
||||
// TODO(b/155784997): Consider consolidating these quantization fields:
|
||||
// Quantization information. Replaces params field above.
|
||||
TfLiteQuantization quantization;
|
||||
|
||||
// Quantization information.
|
||||
TfLiteQuantizationParams params;
|
||||
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have. NOTE: the product of elements of `dims`
|
||||
// and the element datatype size should be equal to `bytes` below.
|
||||
TfLiteIntArray* dims;
|
||||
|
||||
// The number of bytes required to store the data of this Tensor. I.e.
|
||||
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
|
||||
// type is kTfLiteFloat32 and dims = {3, 2} then
|
||||
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
|
||||
size_t bytes;
|
||||
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
|
||||
// How memory is mapped
|
||||
// kTfLiteMmapRo: Memory mapped read only.
|
||||
// i.e. weights
|
||||
// kTfLiteArenaRw: Arena allocated read write memory
|
||||
// (i.e. temporaries, outputs).
|
||||
TfLiteAllocationType allocation_type;
|
||||
|
||||
// True if the tensor is a variable.
|
||||
bool is_variable;
|
||||
} TfLiteTensor;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
|
@ -41,9 +41,17 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15;
|
||||
|
||||
// NOTE: These values are measured on x86-64:
|
||||
// TODO(b/158651472): Consider auditing these values on non-64 bit systems.
|
||||
//
|
||||
// Run this test with '--copt=-DTF_LITE_MICRO_OPTIMIZED_RUNTIME' to get
|
||||
// optimized memory runtime values:
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
constexpr int kKeywordModelTotalSize = 18448;
|
||||
constexpr int kKeywordModelTailSize = 17776;
|
||||
#else
|
||||
constexpr int kKeywordModelTotalSize = 21040;
|
||||
constexpr int kKeywordModelHeadSize = 672;
|
||||
constexpr int kKeywordModelTailSize = 20368;
|
||||
#endif
|
||||
constexpr int kKeywordModelHeadSize = 672;
|
||||
constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240;
|
||||
constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 1728;
|
||||
constexpr int kKeywordModelOpRuntimeDataSize = 148;
|
||||
@ -56,9 +64,14 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7;
|
||||
|
||||
// NOTE: These values are measured on x86-64:
|
||||
// TODO(b/158651472): Consider auditing these values on non-64 bit systems.
|
||||
#ifdef TF_LITE_STATIC_MEMORY
|
||||
constexpr int kTestConvModelTotalSize = 10960;
|
||||
constexpr int kTestConvModelTailSize = 3216;
|
||||
#else
|
||||
constexpr int kTestConvModelTotalSize = 11680;
|
||||
constexpr int kTestConvModelHeadSize = 7744;
|
||||
constexpr int kTestConvModelTailSize = 3936;
|
||||
#endif
|
||||
constexpr int kTestConvModelHeadSize = 7744;
|
||||
constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 768;
|
||||
constexpr int kTestConvModelOpRuntimeDataSize = 136;
|
||||
|
||||
@ -81,7 +94,7 @@ void EnsureAllocatedSizeThreshold(const char* allocation_type, size_t actual,
|
||||
TF_LITE_MICRO_EXPECT_NEAR(actual, expected, kAllocationThreshold);
|
||||
if (actual != expected) {
|
||||
TF_LITE_REPORT_ERROR(micro_test::reporter,
|
||||
"%s threshold failed: %ld != %ld", allocation_type,
|
||||
"%s threshold failed: %d != %d", allocation_type,
|
||||
actual, expected);
|
||||
}
|
||||
} else {
|
||||
|
@ -284,7 +284,7 @@ TF_LITE_MICRO_TEST(TestIncompleteInitializationAllocationsWithSmallArena) {
|
||||
|
||||
tflite::testing::MockOpResolver mock_resolver;
|
||||
// 1kb is too small for the ComplexMockModel:
|
||||
constexpr size_t allocator_buffer_size = 1048;
|
||||
constexpr size_t allocator_buffer_size = 500;
|
||||
uint8_t allocator_buffer[allocator_buffer_size];
|
||||
|
||||
tflite::RecordingMicroAllocator* allocator =
|
||||
|
@ -75,6 +75,8 @@ TEST_SCRIPT := tensorflow/lite/micro/testing/test_linux_binary.sh
|
||||
MICROLITE_LIBS := -lm
|
||||
|
||||
# TODO(b/150240249): Add in -fno-rtti once that works for the Xtensa toolchain.
|
||||
# TODO(b/159155203): Consider TF_LITE_STATIC_MEMORY to align more with the fact
|
||||
# this flag is for an optimized micro runtime.
|
||||
CXXFLAGS := -std=c++11 -DTF_LITE_STATIC_MEMORY
|
||||
CCFLAGS := -std=c11 -DTF_LITE_STATIC_MEMORY
|
||||
ARFLAGS := -r
|
||||
|
@ -375,6 +375,7 @@ typedef struct TfLiteSparsity {
|
||||
|
||||
// An tensor in the interpreter system which is a wrapper around a buffer of
|
||||
// data including a dimensionality (or NULL if not currently defined).
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
typedef struct TfLiteTensor {
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
@ -439,6 +440,51 @@ typedef struct TfLiteTensor {
|
||||
// `dims_signature` contains [1, -1, -1, 3]).
|
||||
const TfLiteIntArray* dims_signature;
|
||||
} TfLiteTensor;
|
||||
#else
|
||||
// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
|
||||
// contains only the minimum fields required to initialize and prepare a micro
|
||||
// inference graph. The fields in this struct have been ordered from
|
||||
// largest-to-smallest for optimal struct sizeof.
|
||||
//
|
||||
// NOTE: This flag is opt-in only at compile time.
|
||||
typedef struct TfLiteTensor {
|
||||
// TODO(b/155784997): Consider consolidating these quantization fields:
|
||||
// Quantization information. Replaces params field above.
|
||||
TfLiteQuantization quantization;
|
||||
|
||||
// Quantization information.
|
||||
TfLiteQuantizationParams params;
|
||||
|
||||
// A union of data pointers. The appropriate type should be used for a typed
|
||||
// tensor based on `type`.
|
||||
TfLitePtrUnion data;
|
||||
|
||||
// A pointer to a structure representing the dimensionality interpretation
|
||||
// that the buffer should have. NOTE: the product of elements of `dims`
|
||||
// and the element datatype size should be equal to `bytes` below.
|
||||
TfLiteIntArray* dims;
|
||||
|
||||
// The number of bytes required to store the data of this Tensor. I.e.
|
||||
// (bytes of each element) * dims[0] * ... * dims[n-1]. For example, if
|
||||
// type is kTfLiteFloat32 and dims = {3, 2} then
|
||||
// bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
|
||||
size_t bytes;
|
||||
|
||||
// The data type specification for data stored in `data`. This affects
|
||||
// what member of `data` union should be used.
|
||||
TfLiteType type;
|
||||
|
||||
// How memory is mapped
|
||||
// kTfLiteMmapRo: Memory mapped read only.
|
||||
// i.e. weights
|
||||
// kTfLiteArenaRw: Arena allocated read write memory
|
||||
// (i.e. temporaries, outputs).
|
||||
TfLiteAllocationType allocation_type;
|
||||
|
||||
// True if the tensor is a variable.
|
||||
bool is_variable;
|
||||
} TfLiteTensor;
|
||||
#endif // TF_LITE_STATIC_MEMORY
|
||||
|
||||
#ifndef TF_LITE_STATIC_MEMORY
|
||||
// Free data memory of tensor `t`.
|
||||
|
Loading…
Reference in New Issue
Block a user