Use MMAPAllocation to allow copy-less const weights transfer for NNAPI delegate.
PiperOrigin-RevId: 255706169
This commit is contained in:
parent
52e49d7993
commit
19f417d905
@ -144,18 +144,10 @@ cc_library(
|
|||||||
copts = TFLITE_DEFAULT_COPTS,
|
copts = TFLITE_DEFAULT_COPTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts.
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "framework",
|
name = "allocation",
|
||||||
srcs = [
|
srcs = [
|
||||||
"allocation.cc",
|
"allocation.cc",
|
||||||
"core/subgraph.cc",
|
|
||||||
"graph_info.cc",
|
|
||||||
"interpreter.cc",
|
|
||||||
"model.cc",
|
|
||||||
"mutable_op_resolver.cc",
|
|
||||||
"optional_debug_tools.cc",
|
|
||||||
"stderr_reporter.cc",
|
|
||||||
] + select({
|
] + select({
|
||||||
"//tensorflow:android": [
|
"//tensorflow:android": [
|
||||||
"mmap_allocation.cc",
|
"mmap_allocation.cc",
|
||||||
@ -167,6 +159,30 @@ cc_library(
|
|||||||
"mmap_allocation.cc",
|
"mmap_allocation.cc",
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
|
hdrs = [
|
||||||
|
"allocation.h",
|
||||||
|
],
|
||||||
|
copts = TFLITE_DEFAULT_COPTS,
|
||||||
|
deps = [
|
||||||
|
":simple_memory_arena",
|
||||||
|
":string",
|
||||||
|
"//tensorflow/lite/c:c_api_internal",
|
||||||
|
"//tensorflow/lite/core/api",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO(ahentz): investigate dependency on gemm_support requiring usage of tf_copts.
|
||||||
|
cc_library(
|
||||||
|
name = "framework",
|
||||||
|
srcs = [
|
||||||
|
"core/subgraph.cc",
|
||||||
|
"graph_info.cc",
|
||||||
|
"interpreter.cc",
|
||||||
|
"model.cc",
|
||||||
|
"mutable_op_resolver.cc",
|
||||||
|
"optional_debug_tools.cc",
|
||||||
|
"stderr_reporter.cc",
|
||||||
|
],
|
||||||
hdrs = [
|
hdrs = [
|
||||||
"allocation.h",
|
"allocation.h",
|
||||||
"context.h",
|
"context.h",
|
||||||
@ -183,6 +199,7 @@ cc_library(
|
|||||||
],
|
],
|
||||||
copts = tflite_copts() + TFLITE_DEFAULT_COPTS,
|
copts = tflite_copts() + TFLITE_DEFAULT_COPTS,
|
||||||
deps = [
|
deps = [
|
||||||
|
":allocation",
|
||||||
":arena_planner",
|
":arena_planner",
|
||||||
":graph_info",
|
":graph_info",
|
||||||
":memory_planner",
|
":memory_planner",
|
||||||
|
@ -33,7 +33,7 @@ namespace tflite {
|
|||||||
#ifndef TFLITE_MCU
|
#ifndef TFLITE_MCU
|
||||||
FileCopyAllocation::FileCopyAllocation(const char* filename,
|
FileCopyAllocation::FileCopyAllocation(const char* filename,
|
||||||
ErrorReporter* error_reporter)
|
ErrorReporter* error_reporter)
|
||||||
: Allocation(error_reporter) {
|
: Allocation(error_reporter, Allocation::Type::kFileCopy) {
|
||||||
// Obtain the file size, using an alternative method that is does not
|
// Obtain the file size, using an alternative method that is does not
|
||||||
// require fstat for more compatibility.
|
// require fstat for more compatibility.
|
||||||
std::unique_ptr<FILE, decltype(&fclose)> file(fopen(filename, "rb"), fclose);
|
std::unique_ptr<FILE, decltype(&fclose)> file(fopen(filename, "rb"), fclose);
|
||||||
@ -86,7 +86,7 @@ bool FileCopyAllocation::valid() const { return copied_buffer_ != nullptr; }
|
|||||||
|
|
||||||
MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes,
|
MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes,
|
||||||
ErrorReporter* error_reporter)
|
ErrorReporter* error_reporter)
|
||||||
: Allocation(error_reporter) {
|
: Allocation(error_reporter, Allocation::Type::kMemory) {
|
||||||
buffer_ = ptr;
|
buffer_ = ptr;
|
||||||
buffer_size_bytes_ = num_bytes;
|
buffer_size_bytes_ = num_bytes;
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/lite/c/c_api_internal.h"
|
#include "tensorflow/lite/c/c_api_internal.h"
|
||||||
#include "tensorflow/lite/core/api/error_reporter.h"
|
#include "tensorflow/lite/core/api/error_reporter.h"
|
||||||
#include "tensorflow/lite/simple_memory_arena.h"
|
#include "tensorflow/lite/simple_memory_arena.h"
|
||||||
@ -30,18 +31,30 @@ namespace tflite {
|
|||||||
// A memory allocation handle. This could be a mmap or shared memory.
|
// A memory allocation handle. This could be a mmap or shared memory.
|
||||||
class Allocation {
|
class Allocation {
|
||||||
public:
|
public:
|
||||||
Allocation(ErrorReporter* error_reporter) : error_reporter_(error_reporter) {}
|
|
||||||
virtual ~Allocation() {}
|
virtual ~Allocation() {}
|
||||||
|
|
||||||
|
enum class Type {
|
||||||
|
kMMap,
|
||||||
|
kFileCopy,
|
||||||
|
kMemory,
|
||||||
|
};
|
||||||
|
|
||||||
// Base pointer of this allocation
|
// Base pointer of this allocation
|
||||||
virtual const void* base() const = 0;
|
virtual const void* base() const = 0;
|
||||||
// Size in bytes of the allocation
|
// Size in bytes of the allocation
|
||||||
virtual size_t bytes() const = 0;
|
virtual size_t bytes() const = 0;
|
||||||
// Whether the allocation is valid
|
// Whether the allocation is valid
|
||||||
virtual bool valid() const = 0;
|
virtual bool valid() const = 0;
|
||||||
|
// Return the type of the Allocation.
|
||||||
|
Type type() const { return type_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
Allocation(ErrorReporter* error_reporter, Type type)
|
||||||
|
: error_reporter_(error_reporter), type_(type) {}
|
||||||
ErrorReporter* error_reporter_;
|
ErrorReporter* error_reporter_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Type type_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MMAPAllocation : public Allocation {
|
class MMAPAllocation : public Allocation {
|
||||||
@ -52,6 +65,8 @@ class MMAPAllocation : public Allocation {
|
|||||||
size_t bytes() const override;
|
size_t bytes() const override;
|
||||||
bool valid() const override;
|
bool valid() const override;
|
||||||
|
|
||||||
|
int fd() const { return mmap_fd_; }
|
||||||
|
|
||||||
static bool IsSupported();
|
static bool IsSupported();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -22,6 +22,7 @@ cc_library(
|
|||||||
}),
|
}),
|
||||||
hdrs = ["nnapi_delegate.h"],
|
hdrs = ["nnapi_delegate.h"],
|
||||||
deps = [
|
deps = [
|
||||||
|
"//tensorflow/lite:allocation",
|
||||||
"//tensorflow/lite:kernel_api",
|
"//tensorflow/lite:kernel_api",
|
||||||
"//tensorflow/lite:minimal_logging",
|
"//tensorflow/lite:minimal_logging",
|
||||||
"//tensorflow/lite:util",
|
"//tensorflow/lite:util",
|
||||||
|
@ -19,10 +19,12 @@ limitations under the License.
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "tensorflow/lite/allocation.h"
|
||||||
#include "tensorflow/lite/builtin_op_data.h"
|
#include "tensorflow/lite/builtin_op_data.h"
|
||||||
#include "tensorflow/lite/builtin_ops.h"
|
#include "tensorflow/lite/builtin_ops.h"
|
||||||
#include "tensorflow/lite/c/builtin_op_data.h"
|
#include "tensorflow/lite/c/builtin_op_data.h"
|
||||||
@ -417,11 +419,14 @@ class NNAPIOpBuilder {
|
|||||||
NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
|
NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
|
||||||
OperandMapping* tensor_mapping,
|
OperandMapping* tensor_mapping,
|
||||||
DequantizeMapping* dequantize_mapping,
|
DequantizeMapping* dequantize_mapping,
|
||||||
|
std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
|
||||||
|
allocation_mapping,
|
||||||
ANeuralNetworksModel* nn_model)
|
ANeuralNetworksModel* nn_model)
|
||||||
: nnapi_(nnapi),
|
: nnapi_(nnapi),
|
||||||
context_(context),
|
context_(context),
|
||||||
operand_mapping_(tensor_mapping),
|
operand_mapping_(tensor_mapping),
|
||||||
dequantize_mapping_(dequantize_mapping),
|
dequantize_mapping_(dequantize_mapping),
|
||||||
|
allocation_memory_mapping_(allocation_mapping),
|
||||||
nn_model_(nn_model) {}
|
nn_model_(nn_model) {}
|
||||||
|
|
||||||
TfLiteStatus AddScalarBoolOperand(bool value) {
|
TfLiteStatus AddScalarBoolOperand(bool value) {
|
||||||
@ -748,12 +753,35 @@ class NNAPIOpBuilder {
|
|||||||
nn_model_, ann_tensor_index, &ann_perchannel_params));
|
nn_model_, ann_tensor_index, &ann_perchannel_params));
|
||||||
}
|
}
|
||||||
if (tensor->allocation_type == kTfLiteMmapRo) {
|
if (tensor->allocation_type == kTfLiteMmapRo) {
|
||||||
// TODO(b/80630405): Use NNAPIAllocation.
|
if (tensor->allocation &&
|
||||||
|
static_cast<const Allocation*>(tensor->allocation)->type() ==
|
||||||
|
Allocation::Type::kMMap) {
|
||||||
|
const MMAPAllocation* mmap_alloc =
|
||||||
|
static_cast<const MMAPAllocation*>(tensor->allocation);
|
||||||
|
if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
|
||||||
|
ANeuralNetworksMemory* ann_memory_handle = nullptr;
|
||||||
|
nnapi_->ANeuralNetworksMemory_createFromFd(
|
||||||
|
mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
|
||||||
|
&ann_memory_handle);
|
||||||
|
allocation_memory_mapping_->insert(
|
||||||
|
std::make_pair(mmap_alloc, ann_memory_handle));
|
||||||
|
}
|
||||||
|
ANeuralNetworksMemory* ann_memory_handle =
|
||||||
|
allocation_memory_mapping_->at(mmap_alloc);
|
||||||
|
// Compute the offset to the base pointer of the MMAPAllocation.
|
||||||
|
auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
|
||||||
|
reinterpret_cast<const uint8_t*>(mmap_alloc->base());
|
||||||
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
||||||
|
context_, nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
|
||||||
|
nn_model_, ann_tensor_index, ann_memory_handle,
|
||||||
|
offset, tensor->bytes));
|
||||||
|
} else {
|
||||||
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
RETURN_TFLITE_ERROR_IF_NN_ERROR(
|
||||||
context_,
|
context_,
|
||||||
nnapi_->ANeuralNetworksModel_setOperandValue(
|
nnapi_->ANeuralNetworksModel_setOperandValue(
|
||||||
nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
|
nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
indices->push_back(ann_tensor_index);
|
indices->push_back(ann_tensor_index);
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
@ -774,6 +802,9 @@ class NNAPIOpBuilder {
|
|||||||
// tensor #4 to a FLOAT32 tensor.
|
// tensor #4 to a FLOAT32 tensor.
|
||||||
DequantizeMapping* const dequantize_mapping_;
|
DequantizeMapping* const dequantize_mapping_;
|
||||||
|
|
||||||
|
std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
|
||||||
|
allocation_memory_mapping_;
|
||||||
|
|
||||||
// The NNAPI model.
|
// The NNAPI model.
|
||||||
ANeuralNetworksModel* const nn_model_;
|
ANeuralNetworksModel* const nn_model_;
|
||||||
|
|
||||||
@ -804,6 +835,11 @@ ANeuralNetworksOperationType BasicMappingFn(
|
|||||||
class NNAPIDelegateKernel {
|
class NNAPIDelegateKernel {
|
||||||
public:
|
public:
|
||||||
NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
|
NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
|
||||||
|
~NNAPIDelegateKernel() {
|
||||||
|
for (auto content : allocation_memory_mapping_) {
|
||||||
|
nnapi_->ANeuralNetworksMemory_free(content.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
typedef ANeuralNetworksOperationType (*MappingFn)(
|
typedef ANeuralNetworksOperationType (*MappingFn)(
|
||||||
const NNAPIOpMappingArgs& mapping_args);
|
const NNAPIOpMappingArgs& mapping_args);
|
||||||
@ -2079,6 +2115,8 @@ class NNAPIDelegateKernel {
|
|||||||
std::vector<int> nodes_;
|
std::vector<int> nodes_;
|
||||||
// Track indices we use
|
// Track indices we use
|
||||||
OperandMapping operand_mapping_;
|
OperandMapping operand_mapping_;
|
||||||
|
std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
|
||||||
|
allocation_memory_mapping_;
|
||||||
// Track memory map
|
// Track memory map
|
||||||
const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
|
const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
|
||||||
tensor_memory_map_;
|
tensor_memory_map_;
|
||||||
@ -2148,7 +2186,8 @@ class NNAPIDelegateKernel {
|
|||||||
// The operand builder allows creating a single op. It is created outside
|
// The operand builder allows creating a single op. It is created outside
|
||||||
// the for loop to avoid reallocating the vectors.
|
// the for loop to avoid reallocating the vectors.
|
||||||
NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
|
NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
|
||||||
&dequantize_mapping, nn_model_.get());
|
&dequantize_mapping, &allocation_memory_mapping_,
|
||||||
|
nn_model_.get());
|
||||||
// Add Tensors.
|
// Add Tensors.
|
||||||
for (auto node_index : nodes_) {
|
for (auto node_index : nodes_) {
|
||||||
// Obtain the op and registration.
|
// Obtain the op and registration.
|
||||||
|
@ -26,7 +26,8 @@ namespace tflite {
|
|||||||
|
|
||||||
MMAPAllocation::MMAPAllocation(const char* filename,
|
MMAPAllocation::MMAPAllocation(const char* filename,
|
||||||
ErrorReporter* error_reporter)
|
ErrorReporter* error_reporter)
|
||||||
: Allocation(error_reporter), mmapped_buffer_(MAP_FAILED) {
|
: Allocation(error_reporter, Allocation::Type::kMMap),
|
||||||
|
mmapped_buffer_(MAP_FAILED) {
|
||||||
mmap_fd_ = open(filename, O_RDONLY);
|
mmap_fd_ = open(filename, O_RDONLY);
|
||||||
if (mmap_fd_ == -1) {
|
if (mmap_fd_ == -1) {
|
||||||
error_reporter_->Report("Could not open '%s'.", filename);
|
error_reporter_->Report("Could not open '%s'.", filename);
|
||||||
|
@ -13,15 +13,15 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
#include "tensorflow/lite/allocation.h"
|
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "tensorflow/lite/allocation.h"
|
||||||
|
|
||||||
namespace tflite {
|
namespace tflite {
|
||||||
|
|
||||||
MMAPAllocation::MMAPAllocation(const char* filename,
|
MMAPAllocation::MMAPAllocation(const char* filename,
|
||||||
ErrorReporter* error_reporter)
|
ErrorReporter* error_reporter)
|
||||||
: Allocation(error_reporter), mmapped_buffer_(nullptr) {
|
: Allocation(error_reporter, kMMap), mmapped_buffer_(nullptr) {
|
||||||
// The disabled variant should never be created.
|
// The disabled variant should never be created.
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user