diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc
index 19c3d0f1e06..58d3eff8df5 100644
--- a/tensorflow/lite/micro/memory_arena_threshold_test.cc
+++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc
@@ -41,11 +41,11 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15;
 
 // NOTE: These values are measured on x86-64:
 // TODO(b/158651472): Consider auditing these values on non-64 bit systems.
-constexpr int kKeywordModelTotalSize = 21472;
+constexpr int kKeywordModelTotalSize = 21040;
 constexpr int kKeywordModelHeadSize = 672;
-constexpr int kKeywordModelTailSize = 20800;
+constexpr int kKeywordModelTailSize = 20368;
 constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240;
-constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 2160;
+constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 1728;
 constexpr int kKeywordModelOpRuntimeDataSize = 148;
 
 constexpr int kTestConvModelArenaSize = 12 * 1024;
@@ -56,10 +56,10 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7;
 
 // NOTE: These values are measured on x86-64:
 // TODO(b/158651472): Consider auditing these values on non-64 bit systems.
-constexpr int kTestConvModelTotalSize = 12128;
+constexpr int kTestConvModelTotalSize = 11680;
 constexpr int kTestConvModelHeadSize = 7744;
-constexpr int kTestConvModelTailSize = 4384;
-constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 1216;
+constexpr int kTestConvModelTailSize = 3936;
+constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 768;
 constexpr int kTestConvModelOpRuntimeDataSize = 136;
 
 struct ModelAllocationThresholds {
@@ -73,11 +73,17 @@ struct ModelAllocationThresholds {
   size_t op_runtime_data_size = 0;
 };
 
-void EnsureAllocatedSizeThreshold(size_t actual, size_t expected) {
+void EnsureAllocatedSizeThreshold(const char* allocation_type, size_t actual,
+                                  size_t expected) {
   // TODO(b/158651472): Better auditing of non-64 bit systems:
   if (kIs64BitSystem) {
     // 64-bit systems should check floor and ceiling to catch memory savings:
     TF_LITE_MICRO_EXPECT_NEAR(actual, expected, kAllocationThreshold);
+    if (actual != expected) {
+      TF_LITE_REPORT_ERROR(micro_test::reporter,
+                           "%s threshold failed: %ld != %ld", allocation_type,
+                           actual, expected);
+    }
   } else {
     // Non-64 bit systems should just expect allocation does not exceed the
     // ceiling:
@@ -91,33 +97,37 @@ void ValidateModelAllocationThresholds(
   allocator.PrintAllocations();
 
   EnsureAllocatedSizeThreshold(
-      allocator.GetSimpleMemoryAllocator()->GetUsedBytes(),
+      "Total", allocator.GetSimpleMemoryAllocator()->GetUsedBytes(),
       thresholds.total_alloc_size);
   EnsureAllocatedSizeThreshold(
-      allocator.GetSimpleMemoryAllocator()->GetHeadUsedBytes(),
+      "Head", allocator.GetSimpleMemoryAllocator()->GetHeadUsedBytes(),
       thresholds.head_alloc_size);
   EnsureAllocatedSizeThreshold(
-      allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(),
+      "Tail", allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(),
       thresholds.tail_alloc_size);
   EnsureAllocatedSizeThreshold(
+      "TfLiteTensor",
       allocator
           .GetRecordedAllocation(
               tflite::RecordedAllocationType::kTfLiteTensorArray)
           .used_bytes,
       sizeof(TfLiteTensor) * thresholds.tensor_count);
   EnsureAllocatedSizeThreshold(
+      "VariableBufferData",
       allocator
           .GetRecordedAllocation(
               tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData)
           .used_bytes,
       thresholds.tensor_variable_buffer_data_size);
   EnsureAllocatedSizeThreshold(
+      "QuantizationData",
       allocator
           .GetRecordedAllocation(tflite::RecordedAllocationType::
                                      kTfLiteTensorArrayQuantizationData)
           .used_bytes,
       thresholds.tensor_quantization_data_size);
   EnsureAllocatedSizeThreshold(
+      "NodeAndRegistration",
       allocator
           .GetRecordedAllocation(
               tflite::RecordedAllocationType::kNodeAndRegistrationArray)
@@ -125,6 +135,7 @@ void ValidateModelAllocationThresholds(
       sizeof(tflite::NodeAndRegistration) *
           thresholds.node_and_registration_count);
   EnsureAllocatedSizeThreshold(
+      "OpData",
       allocator.GetRecordedAllocation(tflite::RecordedAllocationType::kOpData)
           .used_bytes,
       thresholds.op_runtime_data_size);
diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc
index bfe44cab73a..f3b64bc9f39 100644
--- a/tensorflow/lite/micro/micro_allocator.cc
+++ b/tensorflow/lite/micro/micro_allocator.cc
@@ -466,6 +466,8 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
   TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
       flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
 
+  // TODO(b/159043126): Cleanup endian casting by doing all endian casting in
+  // one spot:
   if (flatbuffer_tensor.shape() == nullptr) {
     // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
     // tensor.
@@ -513,6 +515,10 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
                            "Unable to allocate TfLiteAffineQuantization.\n");
       return kTfLiteError;
     }
+
+    // TODO(b/153688719): Reduce tail allocation by using a global zero-point
+    // buffer. This value can not be reused from the flatbuffer since the
+    // zero_point is stored as a int64_t.
     quantization->zero_point =
         reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
             TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray)));
@@ -522,22 +528,14 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
       return kTfLiteError;
     }
 
-    quantization->scale = reinterpret_cast<TfLiteFloatArray*>(
-        allocator->AllocateFromTail(TfLiteFloatArrayGetSizeInBytes(channels),
-                                    alignof(TfLiteFloatArray)));
-    if (quantization->scale == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter,
-                           "Unable to allocate quantization->scale.\n");
-      return kTfLiteError;
-    }
+    // TODO(b/159043126): Check for big endian before casting flatbuffer values.
+    quantization->scale = const_cast<TfLiteFloatArray*>(
+        reinterpret_cast<const TfLiteFloatArray*>(src_quantization->scale()));
 
     quantization->zero_point->size = channels;
-    quantization->scale->size = channels;
     int* zero_point_data = quantization->zero_point->data;
-    float* scale_data = quantization->scale->data;
     for (int i = 0; i < channels; i++) {
       zero_point_data[i] = src_quantization->zero_point()->Get(i);
-      scale_data[i] = src_quantization->scale()->Get(i);
     }
     // TODO(rocky): Need to add a micro_allocator test case that fails when
     // this is not copied:
@@ -815,8 +813,10 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
     }
 
     // Disregard const qualifier to workaround with existing API.
+    // TODO(b/159043126): Check for big endian before casting flatbuffer values.
     TfLiteIntArray* inputs_array = const_cast<TfLiteIntArray*>(
         reinterpret_cast<const TfLiteIntArray*>(op->inputs()));
+    // TODO(b/159043126): Check for big endian before casting flatbuffer values.
     TfLiteIntArray* outputs_array = const_cast<TfLiteIntArray*>(
         reinterpret_cast<const TfLiteIntArray*>(op->outputs()));
 
diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc
index 9bbe0f405d4..775a2de2dfd 100644
--- a/tensorflow/lite/micro/recording_micro_allocator_test.cc
+++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc
@@ -93,7 +93,6 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) {
       quantized_tensor_count++;
       size_t num_channels = quantization_params->scale()->size();
       quantized_channel_bytes += TfLiteIntArrayGetSizeInBytes(num_channels);
-      quantized_channel_bytes += TfLiteFloatArrayGetSizeInBytes(num_channels);
     }
   }
 
@@ -106,10 +105,9 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) {
       micro_allocator->GetRecordedAllocation(
           tflite::RecordedAllocationType::kTfLiteTensorArrayQuantizationData);
 
-  // Each quantized tensors has 3 mallocs (quant struct, scale dimensions, zero
-  // point dimensions):
+  // Each quantized tensors has 2 mallocs (quant struct, zero point dimensions):
   TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count,
-                          quantized_tensor_count * 3);
+                          quantized_tensor_count * 2);
   TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes,
                           expected_requested_bytes);
   TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes,