diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc index 19c3d0f1e06..58d3eff8df5 100644 --- a/tensorflow/lite/micro/memory_arena_threshold_test.cc +++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc @@ -41,11 +41,11 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15; // NOTE: These values are measured on x86-64: // TODO(b/158651472): Consider auditing these values on non-64 bit systems. -constexpr int kKeywordModelTotalSize = 21472; +constexpr int kKeywordModelTotalSize = 21040; constexpr int kKeywordModelHeadSize = 672; -constexpr int kKeywordModelTailSize = 20800; +constexpr int kKeywordModelTailSize = 20368; constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240; -constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 2160; +constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 1728; constexpr int kKeywordModelOpRuntimeDataSize = 148; constexpr int kTestConvModelArenaSize = 12 * 1024; @@ -56,10 +56,10 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7; // NOTE: These values are measured on x86-64: // TODO(b/158651472): Consider auditing these values on non-64 bit systems. -constexpr int kTestConvModelTotalSize = 12128; +constexpr int kTestConvModelTotalSize = 11680; constexpr int kTestConvModelHeadSize = 7744; -constexpr int kTestConvModelTailSize = 4384; -constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 1216; +constexpr int kTestConvModelTailSize = 3936; +constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 768; constexpr int kTestConvModelOpRuntimeDataSize = 136; struct ModelAllocationThresholds { @@ -73,11 +73,17 @@ struct ModelAllocationThresholds { size_t op_runtime_data_size = 0; }; -void EnsureAllocatedSizeThreshold(size_t actual, size_t expected) { +void EnsureAllocatedSizeThreshold(const char* allocation_type, size_t actual, + size_t expected) { // TODO(b/158651472): Better auditing of non-64 bit systems: if (kIs64BitSystem) { // 64-bit systems should check floor and ceiling to catch memory savings: TF_LITE_MICRO_EXPECT_NEAR(actual, expected, kAllocationThreshold); + if (actual != expected) { + TF_LITE_REPORT_ERROR(micro_test::reporter, + "%s threshold failed: %ld != %ld", allocation_type, + actual, expected); + } } else { // Non-64 bit systems should just expect allocation does not exceed the // ceiling: @@ -91,33 +97,37 @@ void ValidateModelAllocationThresholds( allocator.PrintAllocations(); EnsureAllocatedSizeThreshold( - allocator.GetSimpleMemoryAllocator()->GetUsedBytes(), + "Total", allocator.GetSimpleMemoryAllocator()->GetUsedBytes(), thresholds.total_alloc_size); EnsureAllocatedSizeThreshold( - allocator.GetSimpleMemoryAllocator()->GetHeadUsedBytes(), + "Head", allocator.GetSimpleMemoryAllocator()->GetHeadUsedBytes(), thresholds.head_alloc_size); EnsureAllocatedSizeThreshold( - allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(), + "Tail", allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(), thresholds.tail_alloc_size); EnsureAllocatedSizeThreshold( + "TfLiteTensor", allocator .GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorArray) .used_bytes, sizeof(TfLiteTensor) * thresholds.tensor_count); EnsureAllocatedSizeThreshold( + "VariableBufferData", allocator .GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData) .used_bytes, thresholds.tensor_variable_buffer_data_size); EnsureAllocatedSizeThreshold( + "QuantizationData", allocator .GetRecordedAllocation(tflite::RecordedAllocationType:: kTfLiteTensorArrayQuantizationData) .used_bytes, thresholds.tensor_quantization_data_size); EnsureAllocatedSizeThreshold( + "NodeAndRegistration", allocator .GetRecordedAllocation( tflite::RecordedAllocationType::kNodeAndRegistrationArray) @@ -125,6 +135,7 @@ void ValidateModelAllocationThresholds( sizeof(tflite::NodeAndRegistration) * thresholds.node_and_registration_count); EnsureAllocatedSizeThreshold( + "OpData", allocator.GetRecordedAllocation(tflite::RecordedAllocationType::kOpData) .used_bytes, thresholds.op_runtime_data_size); diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index bfe44cab73a..f3b64bc9f39 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -466,6 +466,8 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( TF_LITE_ENSURE_STATUS(BytesRequiredForTensor( flatbuffer_tensor, &result->bytes, &type_size, error_reporter)); + // TODO(b/159043126): Cleanup endian casting by doing all endian casting in + // one spot: if (flatbuffer_tensor.shape() == nullptr) { // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar // tensor. @@ -513,6 +515,10 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( "Unable to allocate TfLiteAffineQuantization.\n"); return kTfLiteError; } + + // TODO(b/153688719): Reduce tail allocation by using a global zero-point + // buffer. This value can not be reused from the flatbuffer since the + // zero_point is stored as a int64_t. quantization->zero_point = reinterpret_cast(allocator->AllocateFromTail( TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray))); @@ -522,22 +528,14 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( return kTfLiteError; } - quantization->scale = reinterpret_cast( - allocator->AllocateFromTail(TfLiteFloatArrayGetSizeInBytes(channels), - alignof(TfLiteFloatArray))); - if (quantization->scale == nullptr) { - TF_LITE_REPORT_ERROR(error_reporter, - "Unable to allocate quantization->scale.\n"); - return kTfLiteError; - } + // TODO(b/159043126): Check for big endian before casting flatbuffer values. + quantization->scale = const_cast( + reinterpret_cast(src_quantization->scale())); quantization->zero_point->size = channels; - quantization->scale->size = channels; int* zero_point_data = quantization->zero_point->data; - float* scale_data = quantization->scale->data; for (int i = 0; i < channels; i++) { zero_point_data[i] = src_quantization->zero_point()->Get(i); - scale_data[i] = src_quantization->scale()->Get(i); } // TODO(rocky): Need to add a micro_allocator test case that fails when // this is not copied: @@ -815,8 +813,10 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( } // Disregard const qualifier to workaround with existing API. + // TODO(b/159043126): Check for big endian before casting flatbuffer values. TfLiteIntArray* inputs_array = const_cast( reinterpret_cast(op->inputs())); + // TODO(b/159043126): Check for big endian before casting flatbuffer values. TfLiteIntArray* outputs_array = const_cast( reinterpret_cast(op->outputs())); diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index 9bbe0f405d4..775a2de2dfd 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -93,7 +93,6 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { quantized_tensor_count++; size_t num_channels = quantization_params->scale()->size(); quantized_channel_bytes += TfLiteIntArrayGetSizeInBytes(num_channels); - quantized_channel_bytes += TfLiteFloatArrayGetSizeInBytes(num_channels); } } @@ -106,10 +105,9 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { micro_allocator->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorArrayQuantizationData); - // Each quantized tensors has 3 mallocs (quant struct, scale dimensions, zero - // point dimensions): + // Each quantized tensors has 2 mallocs (quant struct, zero point dimensions): TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, - quantized_tensor_count * 3); + quantized_tensor_count * 2); TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, expected_requested_bytes); TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,