Expose option to limit the number of partitions that will be delegated to NNAPI
PiperOrigin-RevId: 295962456 Change-Id: I43e13700e23b798ce786b7f1034066961c4c3613
This commit is contained in:
parent
28046a55b7
commit
911d4a618a
@ -65,24 +65,35 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Options setAcceleratorName(String name) {
|
public Options setAcceleratorName(String name) {
|
||||||
this.accelerator_name = name;
|
this.acceleratorName = name;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Options setCacheDir(String name) {
|
public Options setCacheDir(String cacheDir) {
|
||||||
this.cache_dir = name;
|
this.cacheDir = cacheDir;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Options setModelToken(String name) {
|
public Options setModelToken(String modelToken) {
|
||||||
this.model_token = name;
|
this.modelToken = modelToken;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
|
/**
|
||||||
String accelerator_name = null;
|
* Sets the maximum number of graph partitions that the delegate will try to delegate. If more
|
||||||
String cache_dir = null;
|
* partitions could be delegated than the limit, the ones with the larger number of nodes will
|
||||||
String model_token = null;
|
* be chosen. If unset it will use the NNAPI default limit.
|
||||||
|
*/
|
||||||
|
public Options setMaxNumberOfDelegatedPartitions(int limit) {
|
||||||
|
this.maxDelegatedPartitions = limit;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
|
||||||
|
private String acceleratorName = null;
|
||||||
|
private String cacheDir = null;
|
||||||
|
private String modelToken = null;
|
||||||
|
private Integer maxDelegatedPartitions = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public NnApiDelegate(Options options) {
|
public NnApiDelegate(Options options) {
|
||||||
@ -91,9 +102,10 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
|
|||||||
delegateHandle =
|
delegateHandle =
|
||||||
createDelegate(
|
createDelegate(
|
||||||
options.executionPreference,
|
options.executionPreference,
|
||||||
options.accelerator_name,
|
options.acceleratorName,
|
||||||
options.cache_dir,
|
options.cacheDir,
|
||||||
options.model_token);
|
options.modelToken,
|
||||||
|
options.maxDelegatedPartitions != null ? options.maxDelegatedPartitions : -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public NnApiDelegate() {
|
public NnApiDelegate() {
|
||||||
@ -118,8 +130,13 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
private static native long createDelegate(
|
private static native long createDelegate(
|
||||||
int preference, String device_name, String cache_dir, String model_token);
|
int preference,
|
||||||
|
String deviceName,
|
||||||
|
String cacheDir,
|
||||||
|
String modelToken,
|
||||||
|
int maxDelegatedPartitions);
|
||||||
|
|
||||||
private static native void deleteDelegate(long delegateHandle);
|
private static native void deleteDelegate(long delegateHandle);
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ using namespace tflite;
|
|||||||
JNIEXPORT jlong JNICALL
|
JNIEXPORT jlong JNICALL
|
||||||
Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
|
Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
|
||||||
JNIEnv* env, jclass clazz, jint preference, jstring accelerator_name,
|
JNIEnv* env, jclass clazz, jint preference, jstring accelerator_name,
|
||||||
jstring cache_dir, jstring model_token) {
|
jstring cache_dir, jstring model_token, jint max_delegated_partitions) {
|
||||||
StatefulNnApiDelegate::Options options = StatefulNnApiDelegate::Options();
|
StatefulNnApiDelegate::Options options = StatefulNnApiDelegate::Options();
|
||||||
options.execution_preference =
|
options.execution_preference =
|
||||||
(StatefulNnApiDelegate::Options::ExecutionPreference)preference;
|
(StatefulNnApiDelegate::Options::ExecutionPreference)preference;
|
||||||
@ -40,6 +40,10 @@ Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
|
|||||||
options.model_token = env->GetStringUTFChars(model_token, NULL);
|
options.model_token = env->GetStringUTFChars(model_token, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (max_delegated_partitions >= 0) {
|
||||||
|
options.max_number_delegated_partitions = max_delegated_partitions;
|
||||||
|
}
|
||||||
|
|
||||||
auto delegate = new StatefulNnApiDelegate(options);
|
auto delegate = new StatefulNnApiDelegate(options);
|
||||||
|
|
||||||
if (options.accelerator_name) {
|
if (options.accelerator_name) {
|
||||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <iterator>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -3850,6 +3851,8 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
|
|||||||
delegate_data_.model_token = options.model_token;
|
delegate_data_.model_token = options.model_token;
|
||||||
}
|
}
|
||||||
delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
|
delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
|
||||||
|
delegate_data_.max_number_delegated_partitions =
|
||||||
|
options.max_number_delegated_partitions;
|
||||||
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
|
||||||
"Created TensorFlow Lite delegate for NNAPI.");
|
"Created TensorFlow Lite delegate for NNAPI.");
|
||||||
Prepare = DoPrepare;
|
Prepare = DoPrepare;
|
||||||
@ -3877,6 +3880,8 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
|
|||||||
? nullptr
|
? nullptr
|
||||||
: delegate_data->model_token.c_str();
|
: delegate_data->model_token.c_str();
|
||||||
options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
|
options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
|
||||||
|
options.max_number_delegated_partitions =
|
||||||
|
delegate_data->max_number_delegated_partitions;
|
||||||
return options;
|
return options;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3943,6 +3948,110 @@ int StatefulNnApiDelegate::GetNnApiErrno() const {
|
|||||||
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
|
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
|
||||||
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
|
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> BuildTfLiteIntArray(
|
||||||
|
const std::vector<int>& data) {
|
||||||
|
std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> result(
|
||||||
|
TfLiteIntArrayCreate(data.size()));
|
||||||
|
std::copy(data.begin(), data.end(), result->data);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// static
|
||||||
|
TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
|
||||||
|
TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
|
||||||
|
const std::vector<int>& supported_nodes,
|
||||||
|
std::vector<int>* device_supported_nodes, int* num_partitions,
|
||||||
|
TfLiteDelegateParams** params_array, int* nnapi_errno) {
|
||||||
|
auto* delegate_data = static_cast<Data*>(delegate->data_);
|
||||||
|
// The first entry in the array is the element count
|
||||||
|
|
||||||
|
auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
|
||||||
|
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
||||||
|
context, supported_nodes_int_array.get(), params_array, num_partitions));
|
||||||
|
// For each partition check if which nodes are actually supported by the
|
||||||
|
// target accelerators.
|
||||||
|
delegate_data->delegate_state_cache.clear();
|
||||||
|
for (int idx = 0; idx < *num_partitions; idx++) {
|
||||||
|
const auto& partition_params = (*params_array)[idx];
|
||||||
|
auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
|
||||||
|
TfLiteDelegateParams params_with_delegate = partition_params;
|
||||||
|
params_with_delegate.delegate = delegate;
|
||||||
|
TF_LITE_ENSURE_STATUS(
|
||||||
|
kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno));
|
||||||
|
std::vector<int> supported_partition_nodes;
|
||||||
|
TF_LITE_ENSURE_STATUS(
|
||||||
|
kernel_state->GetOperationsSupportedByTargetNnApiDevices(
|
||||||
|
context, &supported_partition_nodes, nnapi_errno));
|
||||||
|
device_supported_nodes->insert(device_supported_nodes->end(),
|
||||||
|
supported_partition_nodes.begin(),
|
||||||
|
supported_partition_nodes.end());
|
||||||
|
|
||||||
|
bool model_fully_supported = (supported_partition_nodes.size() ==
|
||||||
|
partition_params.nodes_to_replace->size);
|
||||||
|
if (model_fully_supported) {
|
||||||
|
delegate_data->CacheDelegateKernel(&partition_params,
|
||||||
|
kernel_state.release());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device_supported_nodes->size() != supported_nodes.size()) {
|
||||||
|
// We changed the set of nodes to delegate this will create a different
|
||||||
|
// partitioning layout.
|
||||||
|
auto device_sup_nodes_int_array =
|
||||||
|
BuildTfLiteIntArray(*device_supported_nodes);
|
||||||
|
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
||||||
|
context, device_sup_nodes_int_array.get(), params_array,
|
||||||
|
num_partitions));
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
// static
|
||||||
|
TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
|
||||||
|
int max_partitions,
|
||||||
|
std::vector<TfLiteDelegateParams> partition_params_array,
|
||||||
|
std::vector<int>* nodes_to_delegate) {
|
||||||
|
int num_partitions = partition_params_array.size();
|
||||||
|
if (max_partitions <= 0 || num_partitions <= max_partitions) {
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
int number_delegated_partitions = std::count_if(
|
||||||
|
partition_params_array.begin(), partition_params_array.end(),
|
||||||
|
[nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
|
||||||
|
return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
|
||||||
|
partition_params.nodes_to_replace->data[0]) !=
|
||||||
|
nodes_to_delegate->end();
|
||||||
|
});
|
||||||
|
|
||||||
|
if (number_delegated_partitions > max_partitions) {
|
||||||
|
std::sort(partition_params_array.begin(), partition_params_array.end(),
|
||||||
|
[](const TfLiteDelegateParams& left,
|
||||||
|
const TfLiteDelegateParams& right) -> bool {
|
||||||
|
// Reverse sort
|
||||||
|
return left.nodes_to_replace->size >
|
||||||
|
right.nodes_to_replace->size;
|
||||||
|
});
|
||||||
|
|
||||||
|
nodes_to_delegate->clear();
|
||||||
|
|
||||||
|
for (int i = 0; i < max_partitions; i++) {
|
||||||
|
const TfLiteDelegateParams& partition_params = partition_params_array[i];
|
||||||
|
|
||||||
|
nodes_to_delegate->insert(nodes_to_delegate->end(),
|
||||||
|
partition_params.nodes_to_replace->data,
|
||||||
|
partition_params.nodes_to_replace->data +
|
||||||
|
partition_params.nodes_to_replace->size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return kTfLiteOk;
|
||||||
|
}
|
||||||
|
|
||||||
TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
||||||
TfLiteDelegate* delegate) {
|
TfLiteDelegate* delegate) {
|
||||||
auto* delegate_data = static_cast<Data*>(delegate->data_);
|
auto* delegate_data = static_cast<Data*>(delegate->data_);
|
||||||
@ -3998,10 +4107,8 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Allocate one element in vector already since TensorFlow Lite uses
|
|
||||||
// the first value as the number of nodes. The actual value will be set
|
std::vector<int> supported_nodes;
|
||||||
// later, after the vector has been filled.
|
|
||||||
std::vector<int> supported_nodes(1);
|
|
||||||
// We don't care about all nodes_, we only care about ones in the
|
// We don't care about all nodes_, we only care about ones in the
|
||||||
// current plan.
|
// current plan.
|
||||||
TfLiteIntArray* plan;
|
TfLiteIntArray* plan;
|
||||||
@ -4021,11 +4128,9 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
|||||||
supported_nodes.push_back(node_index);
|
supported_nodes.push_back(node_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// First element in vector must be the number of actual nodes.
|
|
||||||
supported_nodes[0] = supported_nodes.size() - 1;
|
|
||||||
|
|
||||||
// If there are no delegated nodes, short-circuit node replacement.
|
// If there are no delegated nodes, short-circuit node replacement.
|
||||||
if (!supported_nodes[0]) {
|
if (supported_nodes.empty()) {
|
||||||
return kTfLiteOk;
|
return kTfLiteOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4082,40 +4187,20 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
|||||||
|
|
||||||
std::vector<int>& nodes_to_delegate = supported_nodes;
|
std::vector<int>& nodes_to_delegate = supported_nodes;
|
||||||
if (is_accelerator_specified) {
|
if (is_accelerator_specified) {
|
||||||
|
std::vector<int> device_supported_nodes;
|
||||||
|
int num_partitions;
|
||||||
TfLiteDelegateParams* params_array;
|
TfLiteDelegateParams* params_array;
|
||||||
int num_partitions = 0;
|
|
||||||
// The first entry in the array is the element count
|
|
||||||
std::vector<int> device_supported_nodes(1);
|
|
||||||
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
|
|
||||||
context, reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
|
|
||||||
¶ms_array, &num_partitions));
|
|
||||||
// For each partition check if which nodes are actually supported by the
|
|
||||||
// target accelerators.
|
|
||||||
delegate_data->delegate_state_cache.clear();
|
|
||||||
for (int idx = 0; idx < num_partitions; idx++) {
|
|
||||||
const auto& partition_params = params_array[idx];
|
|
||||||
auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
|
|
||||||
TfLiteDelegateParams params_with_delegate = partition_params;
|
|
||||||
params_with_delegate.delegate = delegate;
|
|
||||||
TF_LITE_ENSURE_STATUS(
|
|
||||||
kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno));
|
|
||||||
std::vector<int> supported_partition_nodes;
|
|
||||||
TF_LITE_ENSURE_STATUS(
|
|
||||||
kernel_state->GetOperationsSupportedByTargetNnApiDevices(
|
|
||||||
context, &supported_partition_nodes, nnapi_errno));
|
|
||||||
device_supported_nodes.insert(device_supported_nodes.end(),
|
|
||||||
supported_partition_nodes.begin(),
|
|
||||||
supported_partition_nodes.end());
|
|
||||||
|
|
||||||
bool model_fully_supported = (supported_partition_nodes.size() ==
|
TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
|
||||||
partition_params.nodes_to_replace->size);
|
context, delegate, nnapi, supported_nodes, &device_supported_nodes,
|
||||||
if (model_fully_supported) {
|
&num_partitions, ¶ms_array, nnapi_errno));
|
||||||
delegate_data->CacheDelegateKernel(&partition_params,
|
|
||||||
kernel_state.release());
|
TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions(
|
||||||
}
|
delegate_options.max_number_delegated_partitions,
|
||||||
}
|
std::vector<TfLiteDelegateParams>(params_array,
|
||||||
|
params_array + num_partitions),
|
||||||
|
&device_supported_nodes));
|
||||||
|
|
||||||
device_supported_nodes[0] = device_supported_nodes.size() - 1;
|
|
||||||
nodes_to_delegate = device_supported_nodes;
|
nodes_to_delegate = device_supported_nodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4124,9 +4209,10 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
|
|||||||
} else {
|
} else {
|
||||||
// Request TFLite to partition the graph and make kernels
|
// Request TFLite to partition the graph and make kernels
|
||||||
// for each independent node sub set a new nnapi_delegate_kernel.
|
// for each independent node sub set a new nnapi_delegate_kernel.
|
||||||
|
auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
|
||||||
return context->ReplaceNodeSubsetsWithDelegateKernels(
|
return context->ReplaceNodeSubsetsWithDelegateKernels(
|
||||||
context, nnapi_delegate_kernel,
|
context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
|
||||||
reinterpret_cast<TfLiteIntArray*>(nodes_to_delegate.data()), delegate);
|
delegate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,6 +80,15 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
|
|||||||
// kernels, but allowing CPU allows partial acceleration of models. If this
|
// kernels, but allowing CPU allows partial acceleration of models. If this
|
||||||
// is set to true, NNAPI is only used if the whole model is accelerated.
|
// is set to true, NNAPI is only used if the whole model is accelerated.
|
||||||
bool disallow_nnapi_cpu = false;
|
bool disallow_nnapi_cpu = false;
|
||||||
|
|
||||||
|
// Specifies the max number of partitions to delegate. A value <= 0 means
|
||||||
|
// no limit.
|
||||||
|
// If the delegation of the full set of supported nodes would generate a
|
||||||
|
// number of partition greater than this parameter, only
|
||||||
|
// <max_number_delegated_partitions> of them will be actually accelerated.
|
||||||
|
// The selection is currently done sorting partitions in decreasing order
|
||||||
|
// of number of nodes and selecting them until the limit is reached.
|
||||||
|
int max_number_delegated_partitions = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Uses default options.
|
// Uses default options.
|
||||||
@ -172,13 +181,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
|
|||||||
// The key is the index of the first node in the partition.
|
// The key is the index of the first node in the partition.
|
||||||
// Couldn't use unique_ptr because of problems building on gcc
|
// Couldn't use unique_ptr because of problems building on gcc
|
||||||
std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
|
std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
|
||||||
|
// Maximum number of NNAPI partition to delegate. Zero or negative means
|
||||||
|
// no limit. Copied from StatefulNnApiDelegate::Options
|
||||||
|
int max_number_delegated_partitions;
|
||||||
|
|
||||||
~Data();
|
~Data();
|
||||||
|
|
||||||
// Caches an initialised NNAPIDelegateKernel.
|
// Caches an initialised NNAPIDelegateKernel.
|
||||||
void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
|
void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
|
||||||
NNAPIDelegateKernel* delegate_state);
|
NNAPIDelegateKernel* delegate_state);
|
||||||
// Returns a cached NNAPIDelegateKernel if available.
|
// Returns a cached NNAPIDelegateKernel if available and removes it
|
||||||
|
// from the cache transferring the ownership to the caller.
|
||||||
absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
|
absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
|
||||||
const TfLiteDelegateParams* delegate_params);
|
const TfLiteDelegateParams* delegate_params);
|
||||||
};
|
};
|
||||||
@ -211,6 +224,34 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
|
|||||||
TfLiteDelegate* delegate,
|
TfLiteDelegate* delegate,
|
||||||
TfLiteBufferHandle* handle);
|
TfLiteBufferHandle* handle);
|
||||||
|
|
||||||
|
// Returns the nodes that can be delegated via NNAPI to the accelerator
|
||||||
|
// specified in the delegate options and information about the way the
|
||||||
|
// graph will be partitioned if the supported nodes will be delegated.
|
||||||
|
// Partition information is composed by the number of partitions and
|
||||||
|
// the delegate parameters associated to each partition.
|
||||||
|
// The method also caches in delegate->data the NNApiDelegateKernel instances
|
||||||
|
// that have been created during the device evaluation.
|
||||||
|
// All arguments are expected to be non-null.
|
||||||
|
static TfLiteStatus GetNodesSupportedByAccelerator(
|
||||||
|
TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
|
||||||
|
const std::vector<int>& supported_nodes,
|
||||||
|
std::vector<int>* device_supported_nodes, int* num_partitions,
|
||||||
|
TfLiteDelegateParams** params_array, int* nnapi_errno);
|
||||||
|
|
||||||
|
// Alters the given array of nodes_to_delegate to limit the number of NNAPI
|
||||||
|
// owned partition to be less or equal than num_partitions. If num_partitions
|
||||||
|
// is less or equal to zero the input is left unaltered.
|
||||||
|
// The nodes_to_delegate array is expected to contain at element 0 the number
|
||||||
|
// of nodes to delegate and in remaining elements the set of nodes
|
||||||
|
// that would be delegated to NNAPI if this function wouldn't be
|
||||||
|
// called. It will be altered storing in the first element the count of
|
||||||
|
// nodes to actually delegate and in the remainder of the array the indexes.
|
||||||
|
// The params_array params might be altered during the functions execution.
|
||||||
|
static TfLiteStatus LimitDelegatedPartitions(
|
||||||
|
int max_partitions,
|
||||||
|
std::vector<TfLiteDelegateParams> partition_params_array,
|
||||||
|
std::vector<int>* nodes_to_delegate);
|
||||||
|
|
||||||
// Delegate data presented through TfLiteDelegate::data_.
|
// Delegate data presented through TfLiteDelegate::data_.
|
||||||
Data delegate_data_;
|
Data delegate_data_;
|
||||||
};
|
};
|
||||||
|
@ -14,6 +14,14 @@ limitations under the License.
|
|||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <memory>
|
||||||
|
#include <numeric>
|
||||||
|
#include <ostream>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include "tensorflow/lite/c/common.h"
|
#include "tensorflow/lite/c/common.h"
|
||||||
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
||||||
@ -223,18 +231,21 @@ class AcceleratedModel {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
// build a delegate with a target accelerator name.
|
// build a delegate with a target accelerator name.
|
||||||
explicit AcceleratedModel(const NnApi* nnapi,
|
AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
|
||||||
const std::string& accelerator_name) {
|
int max_nnapi_partitions = 0) {
|
||||||
StatefulNnApiDelegate::Options options;
|
StatefulNnApiDelegate::Options options;
|
||||||
options.accelerator_name = accelerator_name.c_str();
|
options.accelerator_name = accelerator_name.c_str();
|
||||||
|
options.max_number_delegated_partitions = max_nnapi_partitions;
|
||||||
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
|
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
|
||||||
}
|
}
|
||||||
|
|
||||||
// build a delegate with no target accelerator name, can disable the NNAPI CPU
|
// build a delegate with no target accelerator name, can disable the NNAPI CPU
|
||||||
// fallback implementation using the disallow_nnapi_cpu flag.
|
// fallback implementation using the disallow_nnapi_cpu flag.
|
||||||
explicit AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu) {
|
AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
|
||||||
|
int max_nnapi_partitions = 0) {
|
||||||
StatefulNnApiDelegate::Options options;
|
StatefulNnApiDelegate::Options options;
|
||||||
options.disallow_nnapi_cpu = disallow_nnapi_cpu;
|
options.disallow_nnapi_cpu = disallow_nnapi_cpu;
|
||||||
|
options.max_number_delegated_partitions = max_nnapi_partitions;
|
||||||
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
|
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -305,8 +316,6 @@ TEST_F(UnsupportedOperationOnDeviceTest,
|
|||||||
<< "Expected Max not to be delegates since it not supported before NNAPI "
|
<< "Expected Max not to be delegates since it not supported before NNAPI "
|
||||||
"1.2 and device declares to support only NNAPI 1.1.";
|
"1.2 and device declares to support only NNAPI 1.1.";
|
||||||
|
|
||||||
TFLITE_LOG_PROD(TFLITE_LOG_INFO, "First part of test done");
|
|
||||||
|
|
||||||
nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
|
nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
|
||||||
|
|
||||||
ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
|
ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
|
||||||
@ -535,6 +544,198 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
|
|||||||
EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
|
EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Model with a chain of no-op (add with zero operations)
|
||||||
|
class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
|
||||||
|
public:
|
||||||
|
LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
|
||||||
|
const NnApi* nnapi, const std::string& accelerator_name,
|
||||||
|
int max_nnapi_partitions)
|
||||||
|
: MultiOpModel(),
|
||||||
|
AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
|
||||||
|
auto* delegate = GetDelegate();
|
||||||
|
this->SetApplyDelegate([delegate](Interpreter* interpreter) {
|
||||||
|
interpreter->ModifyGraphWithDelegate(delegate);
|
||||||
|
});
|
||||||
|
|
||||||
|
const TensorData tensor_data{TensorType_FLOAT32, input_shape};
|
||||||
|
|
||||||
|
input_ = AddInput(tensor_data);
|
||||||
|
zero_input_ = AddInput(tensor_data);
|
||||||
|
|
||||||
|
std::vector<int> intermediate_outputs(graph_size - 1);
|
||||||
|
std::generate(
|
||||||
|
std::begin(intermediate_outputs), std::end(intermediate_outputs),
|
||||||
|
[this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
|
||||||
|
|
||||||
|
output_ = AddOutput(tensor_data);
|
||||||
|
|
||||||
|
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
||||||
|
CreateAddOptions(builder_).Union(), {input_, zero_input_},
|
||||||
|
{intermediate_outputs[0]});
|
||||||
|
|
||||||
|
for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
|
||||||
|
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
||||||
|
CreateAddOptions(builder_).Union(),
|
||||||
|
{intermediate_outputs[i], zero_input_},
|
||||||
|
{intermediate_outputs[i + 1]});
|
||||||
|
}
|
||||||
|
|
||||||
|
AddBuiltinOp(
|
||||||
|
BuiltinOperator_ADD, BuiltinOptions_AddOptions,
|
||||||
|
CreateAddOptions(builder_).Union(),
|
||||||
|
{intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
|
||||||
|
{output_});
|
||||||
|
|
||||||
|
BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
|
||||||
|
|
||||||
|
std::vector<float> zero(GetTensorSize(input_), 0.0);
|
||||||
|
PopulateTensor(zero_input_, zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
|
||||||
|
|
||||||
|
int CountNnApiPartitions() {
|
||||||
|
return std::count_if(
|
||||||
|
std::begin(interpreter_->execution_plan()),
|
||||||
|
std::end(interpreter_->execution_plan()), [this](const int node_index) {
|
||||||
|
return interpreter_->node_and_registration(node_index)
|
||||||
|
->first.delegate != nullptr;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int input_;
|
||||||
|
int zero_input_;
|
||||||
|
int output_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class NodeFilter {
|
||||||
|
public:
|
||||||
|
void ConfigureSupportedNodes(
|
||||||
|
int graph_size, const std::unordered_set<int>& unsupported_indexes) {
|
||||||
|
graph_size_ = graph_size;
|
||||||
|
unsupported_indexes_ = unsupported_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetNodeSupport(bool* supported_ops) {
|
||||||
|
for (int i = 0; i < graph_size_; i++) {
|
||||||
|
supported_ops[i] = (unsupported_indexes_.count(i) == 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int graph_size_;
|
||||||
|
std::unordered_set<int> unsupported_indexes_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Using the same node filter for all DelegatePartitionLimitTests
|
||||||
|
// because StubGetSupportedOperationsForDevicesWith wants a C function.
|
||||||
|
NodeFilter* DelegatePartitionLimitTestNodeFilter() {
|
||||||
|
static NodeFilter* node_filter = new NodeFilter();
|
||||||
|
return node_filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
class DelegatePartitionLimitTest
|
||||||
|
: public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
|
||||||
|
protected:
|
||||||
|
// Configure the underlying graph to generate a set of nnapi partition
|
||||||
|
// with the sizes specified in nnapi_partition_sizes and the given
|
||||||
|
// input_shape.
|
||||||
|
void Init(int max_nnapi_partitions,
|
||||||
|
const std::vector<int>& nnapi_partition_sizes,
|
||||||
|
const std::vector<int>& input_shape) {
|
||||||
|
// The graph will have as number of nodes the sum of nodes in the NNAPI
|
||||||
|
// partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
|
||||||
|
// not supported by NNAPI and will cause the
|
||||||
|
graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
|
||||||
|
std::end(nnapi_partition_sizes),
|
||||||
|
nnapi_partition_sizes.size() - 1);
|
||||||
|
|
||||||
|
std::unordered_set<int> unsupported_ops_idxs;
|
||||||
|
int partition_node_idx = -1;
|
||||||
|
for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
|
||||||
|
partition_node_idx += nnapi_partition_sizes[i] + 1;
|
||||||
|
unsupported_ops_idxs.insert(partition_node_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
|
||||||
|
graph_size_, unsupported_ops_idxs);
|
||||||
|
|
||||||
|
nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
|
||||||
|
[](const ANeuralNetworksModel* model,
|
||||||
|
const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
|
||||||
|
bool* supported_ops) -> int {
|
||||||
|
DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops);
|
||||||
|
return ANEURALNETWORKS_NO_ERROR;
|
||||||
|
});
|
||||||
|
|
||||||
|
model_ = std::make_unique<LongIdentityModel>(
|
||||||
|
input_shape, graph_size_, nnapi_mock_->GetNnApi(),
|
||||||
|
/*accelerator_name=*/"test-device", max_nnapi_partitions);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<LongIdentityModel> model_;
|
||||||
|
|
||||||
|
int OriginalGraphSize() { return graph_size_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
int graph_size_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
|
||||||
|
Init(/*max_nnapi_partitions=*/1,
|
||||||
|
/*nnapi_partition_sizes=*/{3, 2},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest,
|
||||||
|
ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
|
||||||
|
Init(/*max_nnapi_partitions=*/0,
|
||||||
|
/*nnapi_partition_sizes=*/{3, 2},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest,
|
||||||
|
ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
|
||||||
|
Init(/*max_nnapi_partitions=*/0,
|
||||||
|
/*nnapi_partition_sizes=*/{3, 2},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest,
|
||||||
|
ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
|
||||||
|
Init(/*max_nnapi_partitions=*/3,
|
||||||
|
/*nnapi_partition_sizes=*/{3, 2},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
|
||||||
|
Init(/*max_nnapi_partitions=*/1,
|
||||||
|
/*nnapi_partition_sizes=*/{3, 2},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 1);
|
||||||
|
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(DelegatePartitionLimitTest,
|
||||||
|
ShouldDelegatePartitionsWithHigherNodeCount) {
|
||||||
|
Init(/*max_nnapi_partitions=*/2,
|
||||||
|
/*nnapi_partition_sizes=*/{1, 5, 2, 4},
|
||||||
|
/*input_shape=*/{1, 2, 2, 1});
|
||||||
|
|
||||||
|
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
|
||||||
|
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace tflite
|
} // namespace tflite
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user