Merge pull request #31563 from jdduke/cherrypicks_8JAWG

Correctly convert const int8 weights to uint8 for NNAPI
2019-08-16 14:16:11 -07:00 · 2019-08-16 14:16:11 -07:00 · b8c634ea60
commit b8c634ea60
parent a671a14cfe a7103376b0
3 changed files with 2484 additions and 2395 deletions
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@ -22,7 +22,10 @@ cc_library(
            "quant_lstm_sup.cc",
        ],
    }),
-    hdrs = ["nnapi_delegate.h"],
+    hdrs = [
+        "nnapi_delegate.h",
+        "nnapi_delegate_kernel.h",
+    ],
    deps = [
        "//tensorflow/lite:allocation",
        "//tensorflow/lite:kernel_api",
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@ -0,0 +1,243 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
+#define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
+
+#include <map>
+#include <memory>
+
+#include "tensorflow/lite/allocation.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#include "tensorflow/lite/nnapi/nnapi_implementation.h"
+
+namespace tflite {
+namespace delegate {
+namespace nnapi {
+
+constexpr int32_t kMinSdkVersionForNNAPI = 27;
+constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
+constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
+
+// Track tensor indices to NN API tensor indices mapping.
+class OperandMapping {
+ public:
+  // Given a TFLite index return the ANN index. If it doesn't exist
+  // return -1.
+  int lite_index_to_ann(int index) const {
+    if (index >= 0 && index < lite_tensor_to_ann_tensor_.size())
+      return lite_tensor_to_ann_tensor_[index];
+    else
+      return -1;
+  }
+
+  // NN API uses non tensor operands instead of structs. This creates one
+  // and returns the index. It uses a std::vector and resizes it as needed
+  // keeping -1 to unmapped values. Intermediate tensors likely will not
+  // be mapped.
+  int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
+
+  // This call is necessary for input operands generated by the delegate
+  // to map constant inputs not present in TFLite but required by NNAPI,
+  // for example when splitting one input in several ones.
+  int add_delegate_generated_input_ann_tensors_operand() {
+    return next_ann_tensor_index_++;
+  }
+
+  // Add a new mapping from `tflite_index` and return the NN API tensor index.
+  int add_new_ann_tensor_index(int tflite_index) {
+    if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
+      lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
+    }
+    const int new_tensor_index = next_ann_tensor_index_++;
+    lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
+    return new_tensor_index;
+  }
+
+  // Given a TFLite index returns a TFLite type to which a tensor must be
+  // converted during copying the data to the memory allocated for NN API.
+  // kTfLiteNoType means no conversion is needed.
+  TfLiteType lite_index_to_ann_type_conversion(int index) const {
+    if (index >= 0 && index < index_to_type_conversion_.size())
+      return index_to_type_conversion_[index];
+    else
+      return kTfLiteNoType;
+  }
+
+  // Add a new mapping from TFLite index to a type conversion.
+  void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
+    if (tflite_index >= index_to_type_conversion_.size()) {
+      index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
+    }
+    index_to_type_conversion_[tflite_index] = tflite_type;
+  }
+
+ private:
+  // Next index of ann tensor
+  int next_ann_tensor_index_ = 0;
+
+  // Mapping from lite index. Use a std::vector for speed and code size
+  // rather than a map.
+  std::vector<int> lite_tensor_to_ann_tensor_;
+  // Mapping from lite index to a type which tensor must be converted to during
+  // the copying of the data to the memory allocated for NN API. kTfLiteNoType
+  // means no conversion is needed. Use an std::vector for speed and code size
+  // rather than a map.
+  std::vector<TfLiteType> index_to_type_conversion_;
+};
+
+class NNAPIOpBuilder;
+
+// The kernel that represents the node sub set of TF Lite being run on NN API.
+struct NNAPIOpMappingArgs {
+  TfLiteContext* context;
+  NNAPIOpBuilder* builder;
+  TfLiteNode* node;
+  std::vector<int>* model_state_outputs;
+  std::vector<int>* model_state_tfl_inputs;
+  std::vector<std::tuple<int, int>>* feedback_loops;
+};
+
+// RAII NN API Model Destructor for use with std::unique_ptr
+struct NNFreeModel {
+  void operator()(ANeuralNetworksModel* model) {
+    NnApiImplementation()->ANeuralNetworksModel_free(model);
+  }
+};
+// RAII NN API Compilation Destructor for use with std::unique_ptr
+struct NNFreeCompilation {
+  void operator()(ANeuralNetworksCompilation* model) {
+    NnApiImplementation()->ANeuralNetworksCompilation_free(model);
+  }
+};
+
+// Manage NNAPI shared memory handle
+class NNMemory {
+ public:
+#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
+  NNMemory(const NnApi* nnapi, const char* name, size_t size) {
+    if (name && size > 0) {
+      nnapi_ = nnapi;
+      byte_size_ = size;
+      fd_ = nnapi_->ASharedMemory_create(name, size);
+      data_ptr_ = reinterpret_cast<uint8_t*>(
+          mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
+      nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
+                                                 fd_, 0, &nn_memory_handle_);
+    }
+  }
+#else
+  NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, size_t /*size*/) {}
+#endif
+
+  ~NNMemory() {
+#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
+    if (data_ptr_) {
+      munmap(data_ptr_, byte_size_);
+    }
+    if (nn_memory_handle_) {
+      nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
+    }
+    if (fd_ > 0) close(fd_);
+#endif
+  }
+
+  ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
+  uint8_t* get_data_ptr() { return data_ptr_; }
+
+ private:
+#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
+  const NnApi* nnapi_;
+  int fd_ = 0;
+  size_t byte_size_ = 0;
+#endif
+  uint8_t* data_ptr_ = nullptr;
+  ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
+};
+
+// The kernel that represents the node sub set of TF Lite being run on NN API.
+class NNAPIDelegateKernel {
+ public:
+  NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
+  ~NNAPIDelegateKernel() {
+    for (auto content : allocation_memory_mapping_) {
+      nnapi_->ANeuralNetworksMemory_free(content.second);
+    }
+  }
+
+  typedef ANeuralNetworksOperationType (*MappingFn)(
+      const NNAPIOpMappingArgs& mapping_args);
+
+  // Return a function that knows how to translate a node into its operands
+  // when called. You can use this function to see if a node is supported
+  // (i.e. if the returned MappingFn is null, then the node is not supported).
+  static MappingFn Map(const TfLiteContext* context, int builtin_code,
+                       int version, int android_sdk_version,
+                       const TfLiteNode* node, bool is_accelerator_specified);
+
+  // Initialize the kernel (a NN model).
+  TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params);
+
+  TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
+
+  TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
+
+ private:
+  // Access to NNApi.
+  const NnApi* nnapi_;
+  // ANN device handle.
+  ANeuralNetworksDevice* nnapi_device_ = nullptr;
+  // ANN API state.
+  std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
+  std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
+      nn_compilation_;
+  // Node indices that this delegate is responsible for. Indices here
+  // indexes into the nodes array in the TfLiteContext.
+  std::vector<int> nodes_;
+  // Track indices we use
+  OperandMapping operand_mapping_;
+  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
+      allocation_memory_mapping_;
+  // Track memory map
+  const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
+      tensor_memory_map_;
+  std::vector<int> model_state_outputs_;
+  std::vector<int> model_state_tfl_inputs_;
+  // This is the equivalent of the pair model_state_outputs_,
+  // model_state_tfl_inputs_ for all tensors where we have to keep the output
+  // data available for TFLite model users
+  std::vector<std::tuple<int, int>> feedback_loops_;
+
+  std::unique_ptr<NNMemory> nn_input_memory_;
+  std::unique_ptr<NNMemory> nn_output_memory_;
+
+  void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
+                                         int builtin_code,
+                                         const TfLiteNode* node,
+                                         NNAPIOpBuilder* builder);
+
+  TfLiteStatus AddOpsAndTensors(TfLiteContext* context);
+
+  TfLiteStatus BuildGraph(TfLiteContext* context,
+                          const TfLiteIntArray* input_tensors,
+                          const TfLiteIntArray* output_tensors);
+};
+
+}  // namespace nnapi
+}  // namespace delegate
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_