diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 3b64d755f45..c5742adce6f 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -144,18 +144,10 @@ cc_library(
     copts = TFLITE_DEFAULT_COPTS,
 )
 
-# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts.
 cc_library(
-    name = "framework",
+    name = "allocation",
     srcs = [
         "allocation.cc",
-        "core/subgraph.cc",
-        "graph_info.cc",
-        "interpreter.cc",
-        "model.cc",
-        "mutable_op_resolver.cc",
-        "optional_debug_tools.cc",
-        "stderr_reporter.cc",
     ] + select({
         "//tensorflow:android": [
             "mmap_allocation.cc",
@@ -167,6 +159,30 @@ cc_library(
             "mmap_allocation.cc",
         ],
     }),
+    hdrs = [
+        "allocation.h",
+    ],
+    copts = TFLITE_DEFAULT_COPTS,
+    deps = [
+        ":simple_memory_arena",
+        ":string",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/core/api",
+    ],
+)
+
+# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts.
+cc_library(
+    name = "framework",
+    srcs = [
+        "core/subgraph.cc",
+        "graph_info.cc",
+        "interpreter.cc",
+        "model.cc",
+        "mutable_op_resolver.cc",
+        "optional_debug_tools.cc",
+        "stderr_reporter.cc",
+    ],
     hdrs = [
         "allocation.h",
         "context.h",
@@ -183,6 +199,7 @@ cc_library(
     ],
     copts = tflite_copts() + TFLITE_DEFAULT_COPTS,
     deps = [
+        ":allocation",
         ":arena_planner",
         ":graph_info",
         ":memory_planner",
diff --git a/tensorflow/lite/allocation.cc b/tensorflow/lite/allocation.cc
index 186d9da70a6..ed5d019949f 100644
--- a/tensorflow/lite/allocation.cc
+++ b/tensorflow/lite/allocation.cc
@@ -33,7 +33,7 @@ namespace tflite {
 #ifndef TFLITE_MCU
 FileCopyAllocation::FileCopyAllocation(const char* filename,
                                        ErrorReporter* error_reporter)
-    : Allocation(error_reporter) {
+    : Allocation(error_reporter, Allocation::Type::kFileCopy) {
   // Obtain the file size, using an alternative method that is does not
   // require fstat for more compatibility.
   std::unique_ptr<FILE, decltype(&fclose)> file(fopen(filename, "rb"), fclose);
@@ -86,7 +86,7 @@ bool FileCopyAllocation::valid() const { return copied_buffer_ != nullptr; }
 
 MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes,
                                    ErrorReporter* error_reporter)
-    : Allocation(error_reporter) {
+    : Allocation(error_reporter, Allocation::Type::kMemory) {
   buffer_ = ptr;
   buffer_size_bytes_ = num_bytes;
 }
diff --git a/tensorflow/lite/allocation.h b/tensorflow/lite/allocation.h
index f25d7fa232a..baf9ac3d421 100644
--- a/tensorflow/lite/allocation.h
+++ b/tensorflow/lite/allocation.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <cstdio>
 #include <cstdlib>
 #include <vector>
+
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/simple_memory_arena.h"
@@ -30,18 +31,30 @@ namespace tflite {
 // A memory allocation handle. This could be a mmap or shared memory.
 class Allocation {
  public:
-  Allocation(ErrorReporter* error_reporter) : error_reporter_(error_reporter) {}
   virtual ~Allocation() {}
 
+  enum class Type {
+    kMMap,
+    kFileCopy,
+    kMemory,
+  };
+
   // Base pointer of this allocation
   virtual const void* base() const = 0;
   // Size in bytes of the allocation
   virtual size_t bytes() const = 0;
   // Whether the allocation is valid
   virtual bool valid() const = 0;
+  // Return the type of the Allocation.
+  Type type() const { return type_; }
 
  protected:
+  Allocation(ErrorReporter* error_reporter, Type type)
+      : error_reporter_(error_reporter), type_(type) {}
   ErrorReporter* error_reporter_;
+
+ private:
+  const Type type_;
 };
 
 class MMAPAllocation : public Allocation {
@@ -52,6 +65,8 @@ class MMAPAllocation : public Allocation {
   size_t bytes() const override;
   bool valid() const override;
 
+  int fd() const { return mmap_fd_; }
+
   static bool IsSupported();
 
  protected:
diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD
index d2f712f50b2..7cd5d146a13 100644
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@@ -22,6 +22,7 @@ cc_library(
     }),
     hdrs = ["nnapi_delegate.h"],
     deps = [
+        "//tensorflow/lite:allocation",
         "//tensorflow/lite:kernel_api",
         "//tensorflow/lite:minimal_logging",
         "//tensorflow/lite:util",
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index e22f7540e0c..dbc5b3d66ed 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -19,10 +19,12 @@ limitations under the License.
 #include <cstring>
 #include <functional>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "tensorflow/lite/allocation.h"
 #include "tensorflow/lite/builtin_op_data.h"
 #include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
@@ -417,11 +419,14 @@ class NNAPIOpBuilder {
   NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
                  OperandMapping* tensor_mapping,
                  DequantizeMapping* dequantize_mapping,
+                 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
+                     allocation_mapping,
                  ANeuralNetworksModel* nn_model)
       : nnapi_(nnapi),
         context_(context),
         operand_mapping_(tensor_mapping),
         dequantize_mapping_(dequantize_mapping),
+        allocation_memory_mapping_(allocation_mapping),
         nn_model_(nn_model) {}
 
   TfLiteStatus AddScalarBoolOperand(bool value) {
@@ -748,11 +753,34 @@ class NNAPIOpBuilder {
               nn_model_, ann_tensor_index, &ann_perchannel_params));
     }
     if (tensor->allocation_type == kTfLiteMmapRo) {
-      // TODO(b/80630405): Use NNAPIAllocation.
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context_,
-          nnapi_->ANeuralNetworksModel_setOperandValue(
-              nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
+      if (tensor->allocation &&
+          static_cast<const Allocation*>(tensor->allocation)->type() ==
+              Allocation::Type::kMMap) {
+        const MMAPAllocation* mmap_alloc =
+            static_cast<const MMAPAllocation*>(tensor->allocation);
+        if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
+          ANeuralNetworksMemory* ann_memory_handle = nullptr;
+          nnapi_->ANeuralNetworksMemory_createFromFd(
+              mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
+              &ann_memory_handle);
+          allocation_memory_mapping_->insert(
+              std::make_pair(mmap_alloc, ann_memory_handle));
+        }
+        ANeuralNetworksMemory* ann_memory_handle =
+            allocation_memory_mapping_->at(mmap_alloc);
+        // Compute the offset to the base pointer of the MMAPAllocation.
+        auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
+                      reinterpret_cast<const uint8_t*>(mmap_alloc->base());
+        RETURN_TFLITE_ERROR_IF_NN_ERROR(
+            context_, nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
+                          nn_model_, ann_tensor_index, ann_memory_handle,
+                          offset, tensor->bytes));
+      } else {
+        RETURN_TFLITE_ERROR_IF_NN_ERROR(
+            context_,
+            nnapi_->ANeuralNetworksModel_setOperandValue(
+                nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
+      }
     }
 
     indices->push_back(ann_tensor_index);
@@ -774,6 +802,9 @@ class NNAPIOpBuilder {
   // tensor #4 to a FLOAT32 tensor.
   DequantizeMapping* const dequantize_mapping_;
 
+  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
+      allocation_memory_mapping_;
+
   // The NNAPI model.
   ANeuralNetworksModel* const nn_model_;
 
@@ -804,6 +835,11 @@ ANeuralNetworksOperationType BasicMappingFn(
 class NNAPIDelegateKernel {
  public:
   NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
+  ~NNAPIDelegateKernel() {
+    for (auto content : allocation_memory_mapping_) {
+      nnapi_->ANeuralNetworksMemory_free(content.second);
+    }
+  }
 
   typedef ANeuralNetworksOperationType (*MappingFn)(
       const NNAPIOpMappingArgs& mapping_args);
@@ -2079,6 +2115,8 @@ class NNAPIDelegateKernel {
   std::vector<int> nodes_;
   // Track indices we use
   OperandMapping operand_mapping_;
+  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
+      allocation_memory_mapping_;
   // Track memory map
   const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
       tensor_memory_map_;
@@ -2148,7 +2186,8 @@ class NNAPIDelegateKernel {
     // The operand builder allows creating a single op. It is created outside
     // the for loop to avoid reallocating the vectors.
     NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
-                           &dequantize_mapping, nn_model_.get());
+                           &dequantize_mapping, &allocation_memory_mapping_,
+                           nn_model_.get());
     // Add Tensors.
     for (auto node_index : nodes_) {
       // Obtain the op and registration.
diff --git a/tensorflow/lite/mmap_allocation.cc b/tensorflow/lite/mmap_allocation.cc
index 11e59956996..b5074ba58b3 100644
--- a/tensorflow/lite/mmap_allocation.cc
+++ b/tensorflow/lite/mmap_allocation.cc
@@ -26,7 +26,8 @@ namespace tflite {
 
 MMAPAllocation::MMAPAllocation(const char* filename,
                                ErrorReporter* error_reporter)
-    : Allocation(error_reporter), mmapped_buffer_(MAP_FAILED) {
+    : Allocation(error_reporter, Allocation::Type::kMMap),
+      mmapped_buffer_(MAP_FAILED) {
   mmap_fd_ = open(filename, O_RDONLY);
   if (mmap_fd_ == -1) {
     error_reporter_->Report("Could not open '%s'.", filename);
diff --git a/tensorflow/lite/mmap_allocation_disabled.cc b/tensorflow/lite/mmap_allocation_disabled.cc
index efb0991b594..0b131ef1f3a 100644
--- a/tensorflow/lite/mmap_allocation_disabled.cc
+++ b/tensorflow/lite/mmap_allocation_disabled.cc
@@ -13,15 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/allocation.h"
-
 #include <cassert>
 
+#include "tensorflow/lite/allocation.h"
+
 namespace tflite {
 
 MMAPAllocation::MMAPAllocation(const char* filename,
                                ErrorReporter* error_reporter)
-    : Allocation(error_reporter), mmapped_buffer_(nullptr) {
+    : Allocation(error_reporter, kMMap), mmapped_buffer_(nullptr) {
   // The disabled variant should never be created.
   assert(false);
 }