diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 41f3fde803b..86ceef5a7f9 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -205,6 +205,7 @@ cc_library( ], hdrs = [ "recording_micro_allocator.h", + "recording_micro_interpreter.h", "recording_simple_memory_allocator.h", ], build_for_embedded = True, @@ -361,3 +362,16 @@ tflite_micro_cc_test( "//tensorflow/lite/micro/testing:micro_test", ], ) + +tflite_micro_cc_test( + name = "memory_arena_threshold_test", + srcs = [ + "memory_arena_threshold_test.cc", + ], + deps = [ + ":op_resolvers", + ":recording_allocators", + "//tensorflow/lite/micro/benchmarks:keyword_scrambled_model_data", + "//tensorflow/lite/micro/testing:micro_test", + ], +) diff --git a/tensorflow/lite/micro/benchmarks/BUILD b/tensorflow/lite/micro/benchmarks/BUILD index cdd394e8bcb..637f7dd17f5 100644 --- a/tensorflow/lite/micro/benchmarks/BUILD +++ b/tensorflow/lite/micro/benchmarks/BUILD @@ -1,5 +1,10 @@ licenses(["notice"]) # Apache 2.0 +package_group( + name = "micro_top_level", + packages = ["//tensorflow/lite/micro"], +) + cc_library( name = "micro_benchmark", hdrs = [ @@ -47,6 +52,9 @@ cc_library( hdrs = [ "keyword_scrambled_model_data.h", ], + visibility = [ + ":micro_top_level", + ], ) cc_binary( diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc new file mode 100644 index 00000000000..3f0bbe9ad32 --- /dev/null +++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc @@ -0,0 +1,110 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <stdint.h> + +#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.h" +#include "tensorflow/lite/micro/recording_micro_allocator.h" +#include "tensorflow/lite/micro/recording_micro_interpreter.h" +#include "tensorflow/lite/micro/testing/micro_test.h" + +/** + * Tests to ensure arena memory allocation does not regress by more than 3%. + */ + +namespace { + +// Ensure memory doesn't expand more that 3%: +constexpr float kAllocationThreshold = 0.03; +const bool kIs64BitSystem = sizeof(void*) == 8; + +constexpr int kKeywordTensorArenaSize = 22 * 1024; +uint8_t tensor_arena[kKeywordTensorArenaSize]; + +constexpr int kKeywordModelTensorCount = 54; +constexpr int kKeywordModelNodeAndRegistrationCount = 15; + +// NOTE: These values are measured on x86-64: +// TODO(b/158651472): Consider auditing these values on non-64 bit systems. +constexpr int kKeywordModelTotalSize = 21440; +constexpr int kKeywordModelHeadSize = 672; +constexpr int kKeywordModelTailSize = 20768; +constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 2160; +constexpr int kKeywordModelOpRuntimeDataSize = 148; + +void EnsureAllocatedSizeThreshold(size_t actual, size_t expected) { + // TODO(b/158651472): Better auditing of non-64 bit systems: + if (kIs64BitSystem) { + // 64-bit systems should check floor and ceiling to catch memory savings: + TF_LITE_MICRO_EXPECT_NEAR(actual, expected, kAllocationThreshold); + } else { + // Non-64 bit systems should just expect allocation does not exceed the + // ceiling: + TF_LITE_MICRO_EXPECT_LE(actual, expected + expected * kAllocationThreshold); + } +} + +} // namespace + +TF_LITE_MICRO_TESTS_BEGIN + +TF_LITE_MICRO_TEST(TestKeywordModelMemoryThreshold) { + tflite::AllOpsResolver all_ops_resolver; + tflite::RecordingMicroInterpreter interpreter( + tflite::GetModel(g_keyword_scrambled_model_data), &all_ops_resolver, + tensor_arena, kKeywordTensorArenaSize, micro_test::reporter); + + interpreter.AllocateTensors(); + + const tflite::RecordingMicroAllocator& allocator = + interpreter.GetMicroAllocator(); + allocator.PrintAllocations(); + + EnsureAllocatedSizeThreshold( + allocator.GetSimpleMemoryAllocator()->GetUsedBytes(), + kKeywordModelTotalSize); + EnsureAllocatedSizeThreshold( + allocator.GetSimpleMemoryAllocator()->GetHeadUsedBytes(), + kKeywordModelHeadSize); + EnsureAllocatedSizeThreshold( + allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(), + kKeywordModelTailSize); + EnsureAllocatedSizeThreshold( + allocator + .GetRecordedAllocation( + tflite::RecordedAllocationType::kTfLiteTensorArray) + .used_bytes, + sizeof(TfLiteTensor) * kKeywordModelTensorCount); + EnsureAllocatedSizeThreshold( + allocator + .GetRecordedAllocation(tflite::RecordedAllocationType:: + kTfLiteTensorArrayQuantizationData) + .used_bytes, + kKeywordModelTfLiteTensorQuantizationDataSize); + EnsureAllocatedSizeThreshold( + allocator + .GetRecordedAllocation( + tflite::RecordedAllocationType::kNodeAndRegistrationArray) + .used_bytes, + sizeof(tflite::NodeAndRegistration) * + kKeywordModelNodeAndRegistrationCount); + EnsureAllocatedSizeThreshold( + allocator.GetRecordedAllocation(tflite::RecordedAllocationType::kOpData) + .used_bytes, + kKeywordModelOpRuntimeDataSize); +} + +TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 13838ae66a6..be0cd9c436b 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -718,7 +718,9 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( return kTfLiteOk; } -ErrorReporter* MicroAllocator::error_reporter() { return error_reporter_; } +ErrorReporter* MicroAllocator::error_reporter() const { + return error_reporter_; +} TfLiteStatus MicroAllocator::InitGraphAndContextTensorData( const Model* model, TfLiteContext* context, const SubGraph* subgraph) { diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index 7fc091196a5..de1e4c715cb 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -172,7 +172,7 @@ class MicroAllocator { const MicroOpResolver& op_resolver, NodeAndRegistration* node_and_registrations); - ErrorReporter* error_reporter(); + ErrorReporter* error_reporter() const; private: // Initializes the graph and allocates TfLiteContext tensor data. diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index bffaca9a292..34358f051d3 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -159,6 +159,10 @@ class MicroInterpreter { // arena_used_bytes() + 16. size_t arena_used_bytes() const { return allocator_.used_bytes(); } + protected: + const MicroAllocator& allocator() const { return allocator_; } + const TfLiteContext& context() const { return context_; } + private: // TODO(b/158263161): Consider switching to Create() function to enable better // error reporting during initialization. diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc index 7cf38d1df98..f3a246747d1 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/tensorflow/lite/micro/recording_micro_allocator.cc @@ -29,18 +29,23 @@ RecordingMicroAllocator::RecordingMicroAllocator( recording_memory_allocator_(recording_memory_allocator) {} RecordingMicroAllocator* RecordingMicroAllocator::Create( - RecordingSimpleMemoryAllocator* memory_allocator, - ErrorReporter* error_reporter) { - TFLITE_DCHECK(memory_allocator != nullptr); - uint8_t* allocator_buffer = memory_allocator->AllocateFromTail( + uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) { + TFLITE_DCHECK(error_reporter != nullptr); + + RecordingSimpleMemoryAllocator* simple_memory_allocator = + RecordingSimpleMemoryAllocator::Create(error_reporter, tensor_arena, + arena_size); + TFLITE_DCHECK(simple_memory_allocator != nullptr); + + uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail( sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator)); RecordingMicroAllocator* allocator = new (allocator_buffer) - RecordingMicroAllocator(memory_allocator, error_reporter); + RecordingMicroAllocator(simple_memory_allocator, error_reporter); return allocator; } RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation( - RecordedAllocationType allocation_type) { + RecordedAllocationType allocation_type) const { switch (allocation_type) { case RecordedAllocationType::kTfLiteTensorArray: return recorded_tflite_tensor_array_data_; @@ -56,7 +61,12 @@ RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation( return RecordedAllocation(); } -void RecordingMicroAllocator::PrintAllocations() { +const RecordingSimpleMemoryAllocator* +RecordingMicroAllocator::GetSimpleMemoryAllocator() const { + return recording_memory_allocator_; +} + +void RecordingMicroAllocator::PrintAllocations() const { TF_LITE_REPORT_ERROR( error_reporter(), "[RecordingMicroAllocator] Arena allocation total %d bytes", @@ -70,24 +80,27 @@ void RecordingMicroAllocator::PrintAllocations() { "[RecordingMicroAllocator] Arena allocation tail %d bytes", recording_memory_allocator_->GetTailUsedBytes()); PrintRecordedAllocation(RecordedAllocationType::kTfLiteTensorArray, - "TfLiteTensor struct allocation"); + "TfLiteTensor struct allocation", "tensors"); PrintRecordedAllocation( RecordedAllocationType::kTfLiteTensorArrayQuantizationData, - "TfLiteTensor quantization data allocations"); + "TfLiteTensor quantization data allocations", "allocations"); PrintRecordedAllocation(RecordedAllocationType::kNodeAndRegistrationArray, - "NodeAndRegistration struct allocation"); + "NodeAndRegistration struct allocation", + "NodeAndRegistration structs"); PrintRecordedAllocation(RecordedAllocationType::kOpData, - "Operator runtime data allocation"); + "Operator runtime data allocation", "OpData structs"); } void RecordingMicroAllocator::PrintRecordedAllocation( - RecordedAllocationType allocation_type, const char* allocation_name) { + RecordedAllocationType allocation_type, const char* allocation_name, + const char* allocation_description) const { RecordedAllocation allocation = GetRecordedAllocation(allocation_type); - TF_LITE_REPORT_ERROR(error_reporter(), - "[RecordingMicroAllocator] '%s' used %d bytes " - "(requested %d bytes %d times)", - allocation_name, allocation.used_bytes, - allocation.requested_bytes, allocation.count); + TF_LITE_REPORT_ERROR( + error_reporter(), + "[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead " + "(requested %d bytes for %d %s)", + allocation_name, allocation.used_bytes, allocation.requested_bytes, + allocation.count, allocation_description); } TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray( diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h index 25ff82d8dbd..6c33331f05b 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.h +++ b/tensorflow/lite/micro/recording_micro_allocator.h @@ -51,17 +51,19 @@ typedef struct RecordedAllocation { // auditing memory usage or integration testing. class RecordingMicroAllocator : public MicroAllocator { public: - static RecordingMicroAllocator* Create( - RecordingSimpleMemoryAllocator* memory_allocator, - ErrorReporter* error_reporter); + static RecordingMicroAllocator* Create(uint8_t* tensor_arena, + size_t arena_size, + ErrorReporter* error_reporter); // Returns the recorded allocations information for a given allocation type. RecordedAllocation GetRecordedAllocation( - RecordedAllocationType allocation_type); + RecordedAllocationType allocation_type) const; + + const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const; // Logs out through the ErrorReporter all allocation recordings by type // defined in RecordedAllocationType. - void PrintAllocations(); + void PrintAllocations() const; protected: TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context, @@ -85,9 +87,10 @@ class RecordingMicroAllocator : public MicroAllocator { ErrorReporter* error_reporter); void PrintRecordedAllocation(RecordedAllocationType allocation_type, - const char* allocation_name); + const char* allocation_name, + const char* allocation_description) const; - RecordingSimpleMemoryAllocator* recording_memory_allocator_; + const RecordingSimpleMemoryAllocator* recording_memory_allocator_; RecordedAllocation recorded_tflite_tensor_array_data_; RecordedAllocation recorded_tflite_tensor_array_quantization_data_; diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index b256cc9a86b..9bbe0f405d4 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -39,11 +39,9 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { tflite::NodeAndRegistration* node_and_registration; const tflite::Model* model = tflite::GetModel(kTestConvModelData); uint8_t arena[kTestConvArenaSize]; - tflite::RecordingSimpleMemoryAllocator memory_allocator( - micro_test::reporter, arena, kTestConvArenaSize); tflite::RecordingMicroAllocator* micro_allocator = - tflite::RecordingMicroAllocator::Create(&memory_allocator, + tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation( @@ -68,11 +66,9 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { tflite::NodeAndRegistration* node_and_registration; const tflite::Model* model = tflite::GetModel(kTestConvModelData); uint8_t arena[kTestConvArenaSize]; - tflite::RecordingSimpleMemoryAllocator memory_allocator( - micro_test::reporter, arena, kTestConvArenaSize); tflite::RecordingMicroAllocator* micro_allocator = - tflite::RecordingMicroAllocator::Create(&memory_allocator, + tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation( @@ -126,11 +122,9 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { tflite::NodeAndRegistration* node_and_registration; const tflite::Model* model = tflite::GetModel(kTestConvModelData); uint8_t arena[kTestConvArenaSize]; - tflite::RecordingSimpleMemoryAllocator memory_allocator( - micro_test::reporter, arena, kTestConvArenaSize); tflite::RecordingMicroAllocator* micro_allocator = - tflite::RecordingMicroAllocator::Create(&memory_allocator, + tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); TF_LITE_MICRO_EXPECT_GE(kTfLiteOk, micro_allocator->StartModelAllocation( diff --git a/tensorflow/lite/micro/recording_micro_interpreter.h b/tensorflow/lite/micro/recording_micro_interpreter.h new file mode 100644 index 00000000000..dcb0b431f29 --- /dev/null +++ b/tensorflow/lite/micro/recording_micro_interpreter.h @@ -0,0 +1,58 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ +#define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ + +#include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/micro/recording_micro_allocator.h" + +namespace tflite { + +// Utility subclass that enables internal recordings of the MicroInterpreter. +// This class should be used to audit and analyze memory arena usage for a given +// model and interpreter. +// +// After construction and the first Invoke() or AllocateTensors() call - the +// memory usage is recorded and available through the GetMicroAllocator() +// function. See RecordingMicroAlloctor for more details on what is currently +// recorded from arena allocations. +// +// It is recommended for users to increase the tensor arena size by at least 1kb +// to ensure enough additional memory is available for internal recordings. +class RecordingMicroInterpreter : public MicroInterpreter { + public: + RecordingMicroInterpreter(const Model* model, + const MicroOpResolver* op_resolver, + uint8_t* tensor_arena, size_t tensor_arena_size, + ErrorReporter* error_reporter) + : MicroInterpreter(model, op_resolver, + RecordingMicroAllocator::Create( + tensor_arena, tensor_arena_size, error_reporter), + error_reporter), + recording_micro_allocator_( + static_cast<const RecordingMicroAllocator&>(allocator())) {} + + const RecordingMicroAllocator& GetMicroAllocator() const { + return recording_micro_allocator_; + } + + private: + const RecordingMicroAllocator& recording_micro_allocator_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_ diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.cc b/tensorflow/lite/micro/recording_simple_memory_allocator.cc index 934fa260e30..5e7eb5754e7 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator.cc +++ b/tensorflow/lite/micro/recording_simple_memory_allocator.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/lite/micro/recording_simple_memory_allocator.h" +#include <new> + +#include "tensorflow/lite/kernels/internal/compatibility.h" + namespace tflite { RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator( @@ -26,6 +30,20 @@ RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator( RecordingSimpleMemoryAllocator::~RecordingSimpleMemoryAllocator() {} +RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create( + ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) { + TFLITE_DCHECK(error_reporter != nullptr); + TFLITE_DCHECK(buffer_head != nullptr); + RecordingSimpleMemoryAllocator tmp = + RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size); + + uint8_t* allocator_buffer = + tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator), + alignof(RecordingSimpleMemoryAllocator)); + // Use the default copy constructor to populate internal states. + return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp); +} + size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const { return requested_bytes_; } diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.h b/tensorflow/lite/micro/recording_simple_memory_allocator.h index 77edadb35be..270d9543404 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator.h +++ b/tensorflow/lite/micro/recording_simple_memory_allocator.h @@ -32,6 +32,10 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator { // functions. ~RecordingSimpleMemoryAllocator() override; + static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter, + uint8_t* buffer_head, + size_t buffer_size); + // Returns the number of bytes requested from the head or tail. size_t GetRequestedBytes() const; diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 93bda8d9139..8b6cba06a0b 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -111,13 +111,14 @@ $(wildcard tensorflow/lite/micro/memory_planner/*test.cc) # TODO(b/152645559): move all benchmarks to benchmarks directory. MICROLITE_BENCHMARK_SRCS := \ -$(wildcard tensorflow/lite/micro/benchmarks/*.cc) +$(wildcard tensorflow/lite/micro/benchmarks/*benchmark.cc) MICROLITE_TEST_HDRS := \ $(wildcard tensorflow/lite/micro/testing/*.h) MICROLITE_CC_BASE_SRCS := \ $(wildcard tensorflow/lite/micro/*.cc) \ +$(wildcard tensorflow/lite/micro/benchmarks/*model_data.cc) \ $(wildcard tensorflow/lite/micro/kernels/*.cc) \ $(wildcard tensorflow/lite/micro/memory_planner/*.cc) \ $(wildcard tensorflow/lite/micro/testing/*model.cc) \ @@ -135,6 +136,7 @@ MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_BENCHMARK_SRCS), $(MICROLITE_CC_SR MICROLITE_CC_HDRS := \ $(wildcard tensorflow/lite/micro/*.h) \ +$(wildcard tensorflow/lite/micro/benchmarks/*model_data.h) \ $(wildcard tensorflow/lite/micro/kernels/*.h) \ $(wildcard tensorflow/lite/micro/memory_planner/*.h) \ LICENSE \