Expose option to limit the number of partitions that will be delegated to NNAPI

PiperOrigin-RevId: 295962456
Change-Id: I43e13700e23b798ce786b7f1034066961c4c3613
This commit is contained in:
Stefano Galarraga 2020-02-19 07:21:11 -08:00 committed by TensorFlower Gardener
parent 28046a55b7
commit 911d4a618a
5 changed files with 409 additions and 60 deletions

View File

@ -65,24 +65,35 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
}
public Options setAcceleratorName(String name) {
this.accelerator_name = name;
this.acceleratorName = name;
return this;
}
public Options setCacheDir(String name) {
this.cache_dir = name;
public Options setCacheDir(String cacheDir) {
this.cacheDir = cacheDir;
return this;
}
public Options setModelToken(String name) {
this.model_token = name;
public Options setModelToken(String modelToken) {
this.modelToken = modelToken;
return this;
}
int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
String accelerator_name = null;
String cache_dir = null;
String model_token = null;
/**
* Sets the maximum number of graph partitions that the delegate will try to delegate. If more
* partitions could be delegated than the limit, the ones with the larger number of nodes will
* be chosen. If unset it will use the NNAPI default limit.
*/
public Options setMaxNumberOfDelegatedPartitions(int limit) {
this.maxDelegatedPartitions = limit;
return this;
}
private int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
private String acceleratorName = null;
private String cacheDir = null;
private String modelToken = null;
private Integer maxDelegatedPartitions = null;
}
public NnApiDelegate(Options options) {
@ -91,9 +102,10 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
delegateHandle =
createDelegate(
options.executionPreference,
options.accelerator_name,
options.cache_dir,
options.model_token);
options.acceleratorName,
options.cacheDir,
options.modelToken,
options.maxDelegatedPartitions != null ? options.maxDelegatedPartitions : -1);
}
public NnApiDelegate() {
@ -118,8 +130,13 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
}
}
//
private static native long createDelegate(
int preference, String device_name, String cache_dir, String model_token);
int preference,
String deviceName,
String cacheDir,
String modelToken,
int maxDelegatedPartitions);
private static native void deleteDelegate(long delegateHandle);
}

View File

@ -26,7 +26,7 @@ using namespace tflite;
JNIEXPORT jlong JNICALL
Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
JNIEnv* env, jclass clazz, jint preference, jstring accelerator_name,
jstring cache_dir, jstring model_token) {
jstring cache_dir, jstring model_token, jint max_delegated_partitions) {
StatefulNnApiDelegate::Options options = StatefulNnApiDelegate::Options();
options.execution_preference =
(StatefulNnApiDelegate::Options::ExecutionPreference)preference;
@ -40,6 +40,10 @@ Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
options.model_token = env->GetStringUTFChars(model_token, NULL);
}
if (max_delegated_partitions >= 0) {
options.max_number_delegated_partitions = max_delegated_partitions;
}
auto delegate = new StatefulNnApiDelegate(options);
if (options.accelerator_name) {

View File

@ -22,6 +22,7 @@ limitations under the License.
#include <functional>
#include <initializer_list>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <string>
@ -3850,6 +3851,8 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
delegate_data_.model_token = options.model_token;
}
delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
delegate_data_.max_number_delegated_partitions =
options.max_number_delegated_partitions;
TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
"Created TensorFlow Lite delegate for NNAPI.");
Prepare = DoPrepare;
@ -3877,6 +3880,8 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
? nullptr
: delegate_data->model_token.c_str();
options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
options.max_number_delegated_partitions =
delegate_data->max_number_delegated_partitions;
return options;
}
@ -3943,6 +3948,110 @@ int StatefulNnApiDelegate::GetNnApiErrno() const {
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
namespace {
std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> BuildTfLiteIntArray(
const std::vector<int>& data) {
std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> result(
TfLiteIntArrayCreate(data.size()));
std::copy(data.begin(), data.end(), result->data);
return result;
}
} // namespace
// static
TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
const std::vector<int>& supported_nodes,
std::vector<int>* device_supported_nodes, int* num_partitions,
TfLiteDelegateParams** params_array, int* nnapi_errno) {
auto* delegate_data = static_cast<Data*>(delegate->data_);
// The first entry in the array is the element count
auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
context, supported_nodes_int_array.get(), params_array, num_partitions));
// For each partition check if which nodes are actually supported by the
// target accelerators.
delegate_data->delegate_state_cache.clear();
for (int idx = 0; idx < *num_partitions; idx++) {
const auto& partition_params = (*params_array)[idx];
auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
TfLiteDelegateParams params_with_delegate = partition_params;
params_with_delegate.delegate = delegate;
TF_LITE_ENSURE_STATUS(
kernel_state->Init(context, &params_with_delegate, nnapi_errno));
std::vector<int> supported_partition_nodes;
TF_LITE_ENSURE_STATUS(
kernel_state->GetOperationsSupportedByTargetNnApiDevices(
context, &supported_partition_nodes, nnapi_errno));
device_supported_nodes->insert(device_supported_nodes->end(),
supported_partition_nodes.begin(),
supported_partition_nodes.end());
bool model_fully_supported = (supported_partition_nodes.size() ==
partition_params.nodes_to_replace->size);
if (model_fully_supported) {
delegate_data->CacheDelegateKernel(&partition_params,
kernel_state.release());
}
}
if (device_supported_nodes->size() != supported_nodes.size()) {
// We changed the set of nodes to delegate this will create a different
// partitioning layout.
auto device_sup_nodes_int_array =
BuildTfLiteIntArray(*device_supported_nodes);
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
context, device_sup_nodes_int_array.get(), params_array,
num_partitions));
}
return kTfLiteOk;
}
// static
TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
int max_partitions,
std::vector<TfLiteDelegateParams> partition_params_array,
std::vector<int>* nodes_to_delegate) {
int num_partitions = partition_params_array.size();
if (max_partitions <= 0 || num_partitions <= max_partitions) {
return kTfLiteOk;
}
int number_delegated_partitions = std::count_if(
partition_params_array.begin(), partition_params_array.end(),
[nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
partition_params.nodes_to_replace->data[0]) !=
nodes_to_delegate->end();
});
if (number_delegated_partitions > max_partitions) {
std::sort(partition_params_array.begin(), partition_params_array.end(),
[](const TfLiteDelegateParams& left,
const TfLiteDelegateParams& right) -> bool {
// Reverse sort
return left.nodes_to_replace->size >
right.nodes_to_replace->size;
});
nodes_to_delegate->clear();
for (int i = 0; i < max_partitions; i++) {
const TfLiteDelegateParams& partition_params = partition_params_array[i];
nodes_to_delegate->insert(nodes_to_delegate->end(),
partition_params.nodes_to_replace->data,
partition_params.nodes_to_replace->data +
partition_params.nodes_to_replace->size);
}
}
return kTfLiteOk;
}
TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
TfLiteDelegate* delegate) {
auto* delegate_data = static_cast<Data*>(delegate->data_);
@ -3998,10 +4107,8 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
}
}
}
// Allocate one element in vector already since TensorFlow Lite uses
// the first value as the number of nodes. The actual value will be set
// later, after the vector has been filled.
std::vector<int> supported_nodes(1);
std::vector<int> supported_nodes;
// We don't care about all nodes_, we only care about ones in the
// current plan.
TfLiteIntArray* plan;
@ -4021,11 +4128,9 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
supported_nodes.push_back(node_index);
}
}
// First element in vector must be the number of actual nodes.
supported_nodes[0] = supported_nodes.size() - 1;
// If there are no delegated nodes, short-circuit node replacement.
if (!supported_nodes[0]) {
if (supported_nodes.empty()) {
return kTfLiteOk;
}
@ -4082,40 +4187,20 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
std::vector<int>& nodes_to_delegate = supported_nodes;
if (is_accelerator_specified) {
std::vector<int> device_supported_nodes;
int num_partitions;
TfLiteDelegateParams* params_array;
int num_partitions = 0;
// The first entry in the array is the element count
std::vector<int> device_supported_nodes(1);
TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
context, reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
&params_array, &num_partitions));
// For each partition check if which nodes are actually supported by the
// target accelerators.
delegate_data->delegate_state_cache.clear();
for (int idx = 0; idx < num_partitions; idx++) {
const auto& partition_params = params_array[idx];
auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
TfLiteDelegateParams params_with_delegate = partition_params;
params_with_delegate.delegate = delegate;
TF_LITE_ENSURE_STATUS(
kernel_state->Init(context, &params_with_delegate, nnapi_errno));
std::vector<int> supported_partition_nodes;
TF_LITE_ENSURE_STATUS(
kernel_state->GetOperationsSupportedByTargetNnApiDevices(
context, &supported_partition_nodes, nnapi_errno));
device_supported_nodes.insert(device_supported_nodes.end(),
supported_partition_nodes.begin(),
supported_partition_nodes.end());
bool model_fully_supported = (supported_partition_nodes.size() ==
partition_params.nodes_to_replace->size);
if (model_fully_supported) {
delegate_data->CacheDelegateKernel(&partition_params,
kernel_state.release());
}
}
TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
context, delegate, nnapi, supported_nodes, &device_supported_nodes,
&num_partitions, &params_array, nnapi_errno));
TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions(
delegate_options.max_number_delegated_partitions,
std::vector<TfLiteDelegateParams>(params_array,
params_array + num_partitions),
&device_supported_nodes));
device_supported_nodes[0] = device_supported_nodes.size() - 1;
nodes_to_delegate = device_supported_nodes;
}
@ -4124,9 +4209,10 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
} else {
// Request TFLite to partition the graph and make kernels
// for each independent node sub set a new nnapi_delegate_kernel.
auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
return context->ReplaceNodeSubsetsWithDelegateKernels(
context, nnapi_delegate_kernel,
reinterpret_cast<TfLiteIntArray*>(nodes_to_delegate.data()), delegate);
context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
delegate);
}
}

View File

@ -80,6 +80,15 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
// kernels, but allowing CPU allows partial acceleration of models. If this
// is set to true, NNAPI is only used if the whole model is accelerated.
bool disallow_nnapi_cpu = false;
// Specifies the max number of partitions to delegate. A value <= 0 means
// no limit.
// If the delegation of the full set of supported nodes would generate a
// number of partition greater than this parameter, only
// <max_number_delegated_partitions> of them will be actually accelerated.
// The selection is currently done sorting partitions in decreasing order
// of number of nodes and selecting them until the limit is reached.
int max_number_delegated_partitions = 0;
};
// Uses default options.
@ -172,13 +181,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
// The key is the index of the first node in the partition.
// Couldn't use unique_ptr because of problems building on gcc
std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
// Maximum number of NNAPI partition to delegate. Zero or negative means
// no limit. Copied from StatefulNnApiDelegate::Options
int max_number_delegated_partitions;
~Data();
// Caches an initialised NNAPIDelegateKernel.
void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
NNAPIDelegateKernel* delegate_state);
// Returns a cached NNAPIDelegateKernel if available.
// Returns a cached NNAPIDelegateKernel if available and removes it
// from the cache transferring the ownership to the caller.
absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
const TfLiteDelegateParams* delegate_params);
};
@ -211,6 +224,34 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
TfLiteDelegate* delegate,
TfLiteBufferHandle* handle);
// Returns the nodes that can be delegated via NNAPI to the accelerator
// specified in the delegate options and information about the way the
// graph will be partitioned if the supported nodes will be delegated.
// Partition information is composed by the number of partitions and
// the delegate parameters associated to each partition.
// The method also caches in delegate->data the NNApiDelegateKernel instances
// that have been created during the device evaluation.
// All arguments are expected to be non-null.
static TfLiteStatus GetNodesSupportedByAccelerator(
TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
const std::vector<int>& supported_nodes,
std::vector<int>* device_supported_nodes, int* num_partitions,
TfLiteDelegateParams** params_array, int* nnapi_errno);
// Alters the given array of nodes_to_delegate to limit the number of NNAPI
// owned partition to be less or equal than num_partitions. If num_partitions
// is less or equal to zero the input is left unaltered.
// The nodes_to_delegate array is expected to contain at element 0 the number
// of nodes to delegate and in remaining elements the set of nodes
// that would be delegated to NNAPI if this function wouldn't be
// called. It will be altered storing in the first element the count of
// nodes to actually delegate and in the remainder of the array the indexes.
// The params_array params might be altered during the functions execution.
static TfLiteStatus LimitDelegatedPartitions(
int max_partitions,
std::vector<TfLiteDelegateParams> partition_params_array,
std::vector<int>* nodes_to_delegate);
// Delegate data presented through TfLiteDelegate::data_.
Data delegate_data_;
};

View File

@ -14,6 +14,14 @@ limitations under the License.
==============================================================================*/
#include <sys/mman.h>
#include <algorithm>
#include <iterator>
#include <memory>
#include <numeric>
#include <ostream>
#include <unordered_set>
#include <vector>
#include <gtest/gtest.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
@ -223,18 +231,21 @@ class AcceleratedModel {
protected:
// build a delegate with a target accelerator name.
explicit AcceleratedModel(const NnApi* nnapi,
const std::string& accelerator_name) {
AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
int max_nnapi_partitions = 0) {
StatefulNnApiDelegate::Options options;
options.accelerator_name = accelerator_name.c_str();
options.max_number_delegated_partitions = max_nnapi_partitions;
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
}
// build a delegate with no target accelerator name, can disable the NNAPI CPU
// fallback implementation using the disallow_nnapi_cpu flag.
explicit AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu) {
AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
int max_nnapi_partitions = 0) {
StatefulNnApiDelegate::Options options;
options.disallow_nnapi_cpu = disallow_nnapi_cpu;
options.max_number_delegated_partitions = max_nnapi_partitions;
stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
}
@ -305,8 +316,6 @@ TEST_F(UnsupportedOperationOnDeviceTest,
<< "Expected Max not to be delegates since it not supported before NNAPI "
"1.2 and device declares to support only NNAPI 1.1.";
TFLITE_LOG_PROD(TFLITE_LOG_INFO, "First part of test done");
nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
@ -535,6 +544,198 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
}
// Model with a chain of no-op (add with zero operations)
class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
public:
LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
const NnApi* nnapi, const std::string& accelerator_name,
int max_nnapi_partitions)
: MultiOpModel(),
AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
auto* delegate = GetDelegate();
this->SetApplyDelegate([delegate](Interpreter* interpreter) {
interpreter->ModifyGraphWithDelegate(delegate);
});
const TensorData tensor_data{TensorType_FLOAT32, input_shape};
input_ = AddInput(tensor_data);
zero_input_ = AddInput(tensor_data);
std::vector<int> intermediate_outputs(graph_size - 1);
std::generate(
std::begin(intermediate_outputs), std::end(intermediate_outputs),
[this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
output_ = AddOutput(tensor_data);
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
CreateAddOptions(builder_).Union(), {input_, zero_input_},
{intermediate_outputs[0]});
for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
CreateAddOptions(builder_).Union(),
{intermediate_outputs[i], zero_input_},
{intermediate_outputs[i + 1]});
}
AddBuiltinOp(
BuiltinOperator_ADD, BuiltinOptions_AddOptions,
CreateAddOptions(builder_).Union(),
{intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
{output_});
BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
std::vector<float> zero(GetTensorSize(input_), 0.0);
PopulateTensor(zero_input_, zero);
}
void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
int CountNnApiPartitions() {
return std::count_if(
std::begin(interpreter_->execution_plan()),
std::end(interpreter_->execution_plan()), [this](const int node_index) {
return interpreter_->node_and_registration(node_index)
->first.delegate != nullptr;
});
}
private:
int input_;
int zero_input_;
int output_;
};
class NodeFilter {
public:
void ConfigureSupportedNodes(
int graph_size, const std::unordered_set<int>& unsupported_indexes) {
graph_size_ = graph_size;
unsupported_indexes_ = unsupported_indexes;
}
void SetNodeSupport(bool* supported_ops) {
for (int i = 0; i < graph_size_; i++) {
supported_ops[i] = (unsupported_indexes_.count(i) == 0);
}
}
private:
int graph_size_;
std::unordered_set<int> unsupported_indexes_;
};
// Using the same node filter for all DelegatePartitionLimitTests
// because StubGetSupportedOperationsForDevicesWith wants a C function.
NodeFilter* DelegatePartitionLimitTestNodeFilter() {
static NodeFilter* node_filter = new NodeFilter();
return node_filter;
}
class DelegatePartitionLimitTest
: public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
protected:
// Configure the underlying graph to generate a set of nnapi partition
// with the sizes specified in nnapi_partition_sizes and the given
// input_shape.
void Init(int max_nnapi_partitions,
const std::vector<int>& nnapi_partition_sizes,
const std::vector<int>& input_shape) {
// The graph will have as number of nodes the sum of nodes in the NNAPI
// partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
// not supported by NNAPI and will cause the
graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
std::end(nnapi_partition_sizes),
nnapi_partition_sizes.size() - 1);
std::unordered_set<int> unsupported_ops_idxs;
int partition_node_idx = -1;
for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
partition_node_idx += nnapi_partition_sizes[i] + 1;
unsupported_ops_idxs.insert(partition_node_idx);
}
DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
graph_size_, unsupported_ops_idxs);
nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
[](const ANeuralNetworksModel* model,
const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
bool* supported_ops) -> int {
DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops);
return ANEURALNETWORKS_NO_ERROR;
});
model_ = std::make_unique<LongIdentityModel>(
input_shape, graph_size_, nnapi_mock_->GetNnApi(),
/*accelerator_name=*/"test-device", max_nnapi_partitions);
}
std::unique_ptr<LongIdentityModel> model_;
int OriginalGraphSize() { return graph_size_; }
private:
int graph_size_;
};
TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
Init(/*max_nnapi_partitions=*/1,
/*nnapi_partition_sizes=*/{3, 2},
/*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 1);
}
TEST_F(DelegatePartitionLimitTest,
ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
Init(/*max_nnapi_partitions=*/0,
/*nnapi_partition_sizes=*/{3, 2},
/*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
}
TEST_F(DelegatePartitionLimitTest,
ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
Init(/*max_nnapi_partitions=*/0,
/*nnapi_partition_sizes=*/{3, 2},
/*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
}
TEST_F(DelegatePartitionLimitTest,
ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
Init(/*max_nnapi_partitions=*/3,
/*nnapi_partition_sizes=*/{3, 2},
/*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
}
TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
Init(/*max_nnapi_partitions=*/1,
/*nnapi_partition_sizes=*/{3, 2},
/*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 1);
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3);
}
TEST_F(DelegatePartitionLimitTest,
ShouldDelegatePartitionsWithHigherNodeCount) {
Init(/*max_nnapi_partitions=*/2,
/*nnapi_partition_sizes=*/{1, 5, 2, 4},
/*input_shape=*/{1, 2, 2, 1});
EXPECT_EQ(model_->CountNnApiPartitions(), 2);
EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
}
} // namespace
} // namespace tflite