Merge pull request #31563 from jdduke/cherrypicks_8JAWG
Correctly convert const int8 weights to uint8 for NNAPI
This commit is contained in:
commit
b8c634ea60
@ -22,7 +22,10 @@ cc_library(
|
|||||||
"quant_lstm_sup.cc",
|
"quant_lstm_sup.cc",
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
hdrs = ["nnapi_delegate.h"],
|
hdrs = [
|
||||||
|
"nnapi_delegate.h",
|
||||||
|
"nnapi_delegate_kernel.h",
|
||||||
|
],
|
||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/lite:allocation",
|
"//tensorflow/lite:allocation",
|
||||||
"//tensorflow/lite:kernel_api",
|
"//tensorflow/lite:kernel_api",
|
||||||
|
File diff suppressed because it is too large
Load Diff
243
tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
Normal file
243
tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
|
||||||
|
#define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "tensorflow/lite/allocation.h"
|
||||||
|
#include "tensorflow/lite/c/c_api_internal.h"
|
||||||
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
||||||
|
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
|
||||||
|
|
||||||
|
namespace tflite {
|
||||||
|
namespace delegate {
|
||||||
|
namespace nnapi {
|
||||||
|
|
||||||
|
constexpr int32_t kMinSdkVersionForNNAPI = 27;
|
||||||
|
constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
|
||||||
|
constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
|
||||||
|
|
||||||
|
// Track tensor indices to NN API tensor indices mapping.
|
||||||
|
class OperandMapping {
|
||||||
|
public:
|
||||||
|
// Given a TFLite index return the ANN index. If it doesn't exist
|
||||||
|
// return -1.
|
||||||
|
int lite_index_to_ann(int index) const {
|
||||||
|
if (index >= 0 && index < lite_tensor_to_ann_tensor_.size())
|
||||||
|
return lite_tensor_to_ann_tensor_[index];
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NN API uses non tensor operands instead of structs. This creates one
|
||||||
|
// and returns the index. It uses a std::vector and resizes it as needed
|
||||||
|
// keeping -1 to unmapped values. Intermediate tensors likely will not
|
||||||
|
// be mapped.
|
||||||
|
int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
|
||||||
|
|
||||||
|
// This call is necessary for input operands generated by the delegate
|
||||||
|
// to map constant inputs not present in TFLite but required by NNAPI,
|
||||||
|
// for example when splitting one input in several ones.
|
||||||
|
int add_delegate_generated_input_ann_tensors_operand() {
|
||||||
|
return next_ann_tensor_index_++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a new mapping from `tflite_index` and return the NN API tensor index.
|
||||||
|
int add_new_ann_tensor_index(int tflite_index) {
|
||||||
|
if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
|
||||||
|
lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
|
||||||
|
}
|
||||||
|
const int new_tensor_index = next_ann_tensor_index_++;
|
||||||
|
lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
|
||||||
|
return new_tensor_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given a TFLite index returns a TFLite type to which a tensor must be
|
||||||
|
// converted during copying the data to the memory allocated for NN API.
|
||||||
|
// kTfLiteNoType means no conversion is needed.
|
||||||
|
TfLiteType lite_index_to_ann_type_conversion(int index) const {
|
||||||
|
if (index >= 0 && index < index_to_type_conversion_.size())
|
||||||
|
return index_to_type_conversion_[index];
|
||||||
|
else
|
||||||
|
return kTfLiteNoType;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a new mapping from TFLite index to a type conversion.
|
||||||
|
void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
|
||||||
|
if (tflite_index >= index_to_type_conversion_.size()) {
|
||||||
|
index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
|
||||||
|
}
|
||||||
|
index_to_type_conversion_[tflite_index] = tflite_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Next index of ann tensor
|
||||||
|
int next_ann_tensor_index_ = 0;
|
||||||
|
|
||||||
|
// Mapping from lite index. Use a std::vector for speed and code size
|
||||||
|
// rather than a map.
|
||||||
|
std::vector<int> lite_tensor_to_ann_tensor_;
|
||||||
|
// Mapping from lite index to a type which tensor must be converted to during
|
||||||
|
// the copying of the data to the memory allocated for NN API. kTfLiteNoType
|
||||||
|
// means no conversion is needed. Use an std::vector for speed and code size
|
||||||
|
// rather than a map.
|
||||||
|
std::vector<TfLiteType> index_to_type_conversion_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class NNAPIOpBuilder;
|
||||||
|
|
||||||
|
// The kernel that represents the node sub set of TF Lite being run on NN API.
|
||||||
|
struct NNAPIOpMappingArgs {
|
||||||
|
TfLiteContext* context;
|
||||||
|
NNAPIOpBuilder* builder;
|
||||||
|
TfLiteNode* node;
|
||||||
|
std::vector<int>* model_state_outputs;
|
||||||
|
std::vector<int>* model_state_tfl_inputs;
|
||||||
|
std::vector<std::tuple<int, int>>* feedback_loops;
|
||||||
|
};
|
||||||
|
|
||||||
|
// RAII NN API Model Destructor for use with std::unique_ptr
|
||||||
|
struct NNFreeModel {
|
||||||
|
void operator()(ANeuralNetworksModel* model) {
|
||||||
|
NnApiImplementation()->ANeuralNetworksModel_free(model);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// RAII NN API Compilation Destructor for use with std::unique_ptr
|
||||||
|
struct NNFreeCompilation {
|
||||||
|
void operator()(ANeuralNetworksCompilation* model) {
|
||||||
|
NnApiImplementation()->ANeuralNetworksCompilation_free(model);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Manage NNAPI shared memory handle
|
||||||
|
class NNMemory {
|
||||||
|
public:
|
||||||
|
#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
||||||
|
NNMemory(const NnApi* nnapi, const char* name, size_t size) {
|
||||||
|
if (name && size > 0) {
|
||||||
|
nnapi_ = nnapi;
|
||||||
|
byte_size_ = size;
|
||||||
|
fd_ = nnapi_->ASharedMemory_create(name, size);
|
||||||
|
data_ptr_ = reinterpret_cast<uint8_t*>(
|
||||||
|
mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
|
||||||
|
nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
|
||||||
|
fd_, 0, &nn_memory_handle_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
NNMemory(const NnApi* /*nnapi*/, const char* /*name*/, size_t /*size*/) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
~NNMemory() {
|
||||||
|
#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
||||||
|
if (data_ptr_) {
|
||||||
|
munmap(data_ptr_, byte_size_);
|
||||||
|
}
|
||||||
|
if (nn_memory_handle_) {
|
||||||
|
nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
|
||||||
|
}
|
||||||
|
if (fd_ > 0) close(fd_);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
|
||||||
|
uint8_t* get_data_ptr() { return data_ptr_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
#ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
|
||||||
|
const NnApi* nnapi_;
|
||||||
|
int fd_ = 0;
|
||||||
|
size_t byte_size_ = 0;
|
||||||
|
#endif
|
||||||
|
uint8_t* data_ptr_ = nullptr;
|
||||||
|
ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
// The kernel that represents the node sub set of TF Lite being run on NN API.
|
||||||
|
class NNAPIDelegateKernel {
|
||||||
|
public:
|
||||||
|
NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
|
||||||
|
~NNAPIDelegateKernel() {
|
||||||
|
for (auto content : allocation_memory_mapping_) {
|
||||||
|
nnapi_->ANeuralNetworksMemory_free(content.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef ANeuralNetworksOperationType (*MappingFn)(
|
||||||
|
const NNAPIOpMappingArgs& mapping_args);
|
||||||
|
|
||||||
|
// Return a function that knows how to translate a node into its operands
|
||||||
|
// when called. You can use this function to see if a node is supported
|
||||||
|
// (i.e. if the returned MappingFn is null, then the node is not supported).
|
||||||
|
static MappingFn Map(const TfLiteContext* context, int builtin_code,
|
||||||
|
int version, int android_sdk_version,
|
||||||
|
const TfLiteNode* node, bool is_accelerator_specified);
|
||||||
|
|
||||||
|
// Initialize the kernel (a NN model).
|
||||||
|
TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params);
|
||||||
|
|
||||||
|
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
|
||||||
|
|
||||||
|
TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Access to NNApi.
|
||||||
|
const NnApi* nnapi_;
|
||||||
|
// ANN device handle.
|
||||||
|
ANeuralNetworksDevice* nnapi_device_ = nullptr;
|
||||||
|
// ANN API state.
|
||||||
|
std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
|
||||||
|
std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
|
||||||
|
nn_compilation_;
|
||||||
|
// Node indices that this delegate is responsible for. Indices here
|
||||||
|
// indexes into the nodes array in the TfLiteContext.
|
||||||
|
std::vector<int> nodes_;
|
||||||
|
// Track indices we use
|
||||||
|
OperandMapping operand_mapping_;
|
||||||
|
std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
|
||||||
|
allocation_memory_mapping_;
|
||||||
|
// Track memory map
|
||||||
|
const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
|
||||||
|
tensor_memory_map_;
|
||||||
|
std::vector<int> model_state_outputs_;
|
||||||
|
std::vector<int> model_state_tfl_inputs_;
|
||||||
|
// This is the equivalent of the pair model_state_outputs_,
|
||||||
|
// model_state_tfl_inputs_ for all tensors where we have to keep the output
|
||||||
|
// data available for TFLite model users
|
||||||
|
std::vector<std::tuple<int, int>> feedback_loops_;
|
||||||
|
|
||||||
|
std::unique_ptr<NNMemory> nn_input_memory_;
|
||||||
|
std::unique_ptr<NNMemory> nn_output_memory_;
|
||||||
|
|
||||||
|
void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
|
||||||
|
int builtin_code,
|
||||||
|
const TfLiteNode* node,
|
||||||
|
NNAPIOpBuilder* builder);
|
||||||
|
|
||||||
|
TfLiteStatus AddOpsAndTensors(TfLiteContext* context);
|
||||||
|
|
||||||
|
TfLiteStatus BuildGraph(TfLiteContext* context,
|
||||||
|
const TfLiteIntArray* input_tensors,
|
||||||
|
const TfLiteIntArray* output_tensors);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace nnapi
|
||||||
|
} // namespace delegate
|
||||||
|
} // namespace tflite
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
|
Loading…
x
Reference in New Issue
Block a user