Use MMAPAllocation to allow copy-less const weights transfer for NNAPI delegate.

PiperOrigin-RevId: 255706169
2019-06-28 18:50:09 -07:00 · 2019-06-28 18:50:09 -07:00 · 19f417d905
commit 19f417d905
parent 52e49d7993
7 changed files with 95 additions and 22 deletions
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@ -144,18 +144,10 @@ cc_library(
    copts = TFLITE_DEFAULT_COPTS,
 )

-# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts.
 cc_library(
-    name = "framework",
+    name = "allocation",
    srcs = [
        "allocation.cc",
-        "core/subgraph.cc",
-        "graph_info.cc",
-        "interpreter.cc",
-        "model.cc",
-        "mutable_op_resolver.cc",
-        "optional_debug_tools.cc",
-        "stderr_reporter.cc",
    ] + select({
        "//tensorflow:android": [
            "mmap_allocation.cc",
@ -167,6 +159,30 @@ cc_library(
            "mmap_allocation.cc",
        ],
    }),
+    hdrs = [
+        "allocation.h",
+    ],
+    copts = TFLITE_DEFAULT_COPTS,
+    deps = [
+        ":simple_memory_arena",
+        ":string",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/core/api",
+    ],
+)
+
+# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts.
+cc_library(
+    name = "framework",
+    srcs = [
+        "core/subgraph.cc",
+        "graph_info.cc",
+        "interpreter.cc",
+        "model.cc",
+        "mutable_op_resolver.cc",
+        "optional_debug_tools.cc",
+        "stderr_reporter.cc",
+    ],
    hdrs = [
        "allocation.h",
        "context.h",
@ -183,6 +199,7 @@ cc_library(
    ],
    copts = tflite_copts() + TFLITE_DEFAULT_COPTS,
    deps = [
+        ":allocation",
        ":arena_planner",
        ":graph_info",
        ":memory_planner",
--- a/tensorflow/lite/allocation.cc
+++ b/tensorflow/lite/allocation.cc
@ -33,7 +33,7 @@ namespace tflite {
 #ifndef TFLITE_MCU
 FileCopyAllocation::FileCopyAllocation(const char* filename,
                                       ErrorReporter* error_reporter)
-    : Allocation(error_reporter) {
+    : Allocation(error_reporter, Allocation::Type::kFileCopy) {
  // Obtain the file size, using an alternative method that is does not
  // require fstat for more compatibility.
  std::unique_ptr<FILE, decltype(&fclose)> file(fopen(filename, "rb"), fclose);
@ -86,7 +86,7 @@ bool FileCopyAllocation::valid() const { return copied_buffer_ != nullptr; }

 MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes,
                                   ErrorReporter* error_reporter)
-    : Allocation(error_reporter) {
+    : Allocation(error_reporter, Allocation::Type::kMemory) {
  buffer_ = ptr;
  buffer_size_bytes_ = num_bytes;
 }
--- a/tensorflow/lite/allocation.h
+++ b/tensorflow/lite/allocation.h
@ -20,6 +20,7 @@ limitations under the License.
 #include <cstdio>
 #include <cstdlib>
 #include <vector>
+
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/core/api/error_reporter.h"
 #include "tensorflow/lite/simple_memory_arena.h"
@ -30,18 +31,30 @@ namespace tflite {
 // A memory allocation handle. This could be a mmap or shared memory.
 class Allocation {
 public:
-  Allocation(ErrorReporter* error_reporter) : error_reporter_(error_reporter) {}
  virtual ~Allocation() {}

+  enum class Type {
+    kMMap,
+    kFileCopy,
+    kMemory,
+  };
+
  // Base pointer of this allocation
  virtual const void* base() const = 0;
  // Size in bytes of the allocation
  virtual size_t bytes() const = 0;
  // Whether the allocation is valid
  virtual bool valid() const = 0;
+  // Return the type of the Allocation.
+  Type type() const { return type_; }

 protected:
+  Allocation(ErrorReporter* error_reporter, Type type)
+      : error_reporter_(error_reporter), type_(type) {}
  ErrorReporter* error_reporter_;
+
+ private:
+  const Type type_;
 };

 class MMAPAllocation : public Allocation {
@ -52,6 +65,8 @@ class MMAPAllocation : public Allocation {
  size_t bytes() const override;
  bool valid() const override;

+  int fd() const { return mmap_fd_; }
+
  static bool IsSupported();

 protected:
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@ -22,6 +22,7 @@ cc_library(
    }),
    hdrs = ["nnapi_delegate.h"],
    deps = [
+        "//tensorflow/lite:allocation",
        "//tensorflow/lite:kernel_api",
        "//tensorflow/lite:minimal_logging",
        "//tensorflow/lite:util",
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -19,10 +19,12 @@ limitations under the License.
 #include <cstring>
 #include <functional>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>

+#include "tensorflow/lite/allocation.h"
 #include "tensorflow/lite/builtin_op_data.h"
 #include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/c/builtin_op_data.h"
@ -417,11 +419,14 @@ class NNAPIOpBuilder {
  NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
                 OperandMapping* tensor_mapping,
                 DequantizeMapping* dequantize_mapping,
+                 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
+                     allocation_mapping,
                 ANeuralNetworksModel* nn_model)
      : nnapi_(nnapi),
        context_(context),
        operand_mapping_(tensor_mapping),
        dequantize_mapping_(dequantize_mapping),
+        allocation_memory_mapping_(allocation_mapping),
        nn_model_(nn_model) {}

  TfLiteStatus AddScalarBoolOperand(bool value) {
@ -748,11 +753,34 @@ class NNAPIOpBuilder {
              nn_model_, ann_tensor_index, &ann_perchannel_params));
    }
    if (tensor->allocation_type == kTfLiteMmapRo) {
-      // TODO(b/80630405): Use NNAPIAllocation.
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context_,
-          nnapi_->ANeuralNetworksModel_setOperandValue(
-              nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
+      if (tensor->allocation &&
+          static_cast<const Allocation*>(tensor->allocation)->type() ==
+              Allocation::Type::kMMap) {
+        const MMAPAllocation* mmap_alloc =
+            static_cast<const MMAPAllocation*>(tensor->allocation);
+        if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
+          ANeuralNetworksMemory* ann_memory_handle = nullptr;
+          nnapi_->ANeuralNetworksMemory_createFromFd(
+              mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
+              &ann_memory_handle);
+          allocation_memory_mapping_->insert(
+              std::make_pair(mmap_alloc, ann_memory_handle));
+        }
+        ANeuralNetworksMemory* ann_memory_handle =
+            allocation_memory_mapping_->at(mmap_alloc);
+        // Compute the offset to the base pointer of the MMAPAllocation.
+        auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
+                      reinterpret_cast<const uint8_t*>(mmap_alloc->base());
+        RETURN_TFLITE_ERROR_IF_NN_ERROR(
+            context_, nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
+                          nn_model_, ann_tensor_index, ann_memory_handle,
+                          offset, tensor->bytes));
+      } else {
+        RETURN_TFLITE_ERROR_IF_NN_ERROR(
+            context_,
+            nnapi_->ANeuralNetworksModel_setOperandValue(
+                nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
+      }
    }

    indices->push_back(ann_tensor_index);
@ -774,6 +802,9 @@ class NNAPIOpBuilder {
  // tensor #4 to a FLOAT32 tensor.
  DequantizeMapping* const dequantize_mapping_;

+  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
+      allocation_memory_mapping_;
+
  // The NNAPI model.
  ANeuralNetworksModel* const nn_model_;

@ -804,6 +835,11 @@ ANeuralNetworksOperationType BasicMappingFn(
 class NNAPIDelegateKernel {
 public:
  NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
+  ~NNAPIDelegateKernel() {
+    for (auto content : allocation_memory_mapping_) {
+      nnapi_->ANeuralNetworksMemory_free(content.second);
+    }
+  }

  typedef ANeuralNetworksOperationType (*MappingFn)(
      const NNAPIOpMappingArgs& mapping_args);
@ -2079,6 +2115,8 @@ class NNAPIDelegateKernel {
  std::vector<int> nodes_;
  // Track indices we use
  OperandMapping operand_mapping_;
+  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
+      allocation_memory_mapping_;
  // Track memory map
  const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
      tensor_memory_map_;
@ -2148,7 +2186,8 @@ class NNAPIDelegateKernel {
    // The operand builder allows creating a single op. It is created outside
    // the for loop to avoid reallocating the vectors.
    NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
-                           &dequantize_mapping, nn_model_.get());
+                           &dequantize_mapping, &allocation_memory_mapping_,
+                           nn_model_.get());
    // Add Tensors.
    for (auto node_index : nodes_) {
      // Obtain the op and registration.
--- a/tensorflow/lite/mmap_allocation.cc
+++ b/tensorflow/lite/mmap_allocation.cc
@ -26,7 +26,8 @@ namespace tflite {

 MMAPAllocation::MMAPAllocation(const char* filename,
                               ErrorReporter* error_reporter)
-    : Allocation(error_reporter), mmapped_buffer_(MAP_FAILED) {
+    : Allocation(error_reporter, Allocation::Type::kMMap),
+      mmapped_buffer_(MAP_FAILED) {
  mmap_fd_ = open(filename, O_RDONLY);
  if (mmap_fd_ == -1) {
    error_reporter_->Report("Could not open '%s'.", filename);
--- a/tensorflow/lite/mmap_allocation_disabled.cc
+++ b/tensorflow/lite/mmap_allocation_disabled.cc
@ -13,15 +13,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/lite/allocation.h"
-
 #include <cassert>

+#include "tensorflow/lite/allocation.h"
+
 namespace tflite {

 MMAPAllocation::MMAPAllocation(const char* filename,
                               ErrorReporter* error_reporter)
-    : Allocation(error_reporter), mmapped_buffer_(nullptr) {
+    : Allocation(error_reporter, kMMap), mmapped_buffer_(nullptr) {
  // The disabled variant should never be created.
  assert(false);
 }