Expose option to limit the number of partitions that will be delegated to NNAPI

PiperOrigin-RevId: 295962456 Change-Id: I43e13700e23b798ce786b7f1034066961c4c3613
2020-02-19 07:21:11 -08:00 · 2020-02-19 07:21:11 -08:00 · 911d4a618a
commit 911d4a618a
parent 28046a55b7
5 changed files with 409 additions and 60 deletions
--- a/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
@ -65,24 +65,35 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    }

    public Options setAcceleratorName(String name) {
-      this.accelerator_name = name;
+      this.acceleratorName = name;
      return this;
    }

-    public Options setCacheDir(String name) {
-      this.cache_dir = name;
+    public Options setCacheDir(String cacheDir) {
+      this.cacheDir = cacheDir;
      return this;
    }

-    public Options setModelToken(String name) {
-      this.model_token = name;
+    public Options setModelToken(String modelToken) {
+      this.modelToken = modelToken;
      return this;
    }

-    int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
-    String accelerator_name = null;
-    String cache_dir = null;
-    String model_token = null;
+    /**
+     * Sets the maximum number of graph partitions that the delegate will try to delegate. If more
+     * partitions could be delegated than the limit, the ones with the larger number of nodes will
+     * be chosen. If unset it will use the NNAPI default limit.
+     */
+    public Options setMaxNumberOfDelegatedPartitions(int limit) {
+      this.maxDelegatedPartitions = limit;
+      return this;
+    }
+
+    private int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
+    private String acceleratorName = null;
+    private String cacheDir = null;
+    private String modelToken = null;
+    private Integer maxDelegatedPartitions = null;
  }

  public NnApiDelegate(Options options) {
@ -91,9 +102,10 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    delegateHandle =
        createDelegate(
            options.executionPreference,
-            options.accelerator_name,
-            options.cache_dir,
-            options.model_token);
+            options.acceleratorName,
+            options.cacheDir,
+            options.modelToken,
+            options.maxDelegatedPartitions != null ? options.maxDelegatedPartitions : -1);
  }

  public NnApiDelegate() {
@ -118,8 +130,13 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    }
  }

+  //
  private static native long createDelegate(
-      int preference, String device_name, String cache_dir, String model_token);
+      int preference,
+      String deviceName,
+      String cacheDir,
+      String modelToken,
+      int maxDelegatedPartitions);

  private static native void deleteDelegate(long delegateHandle);
 }
--- a/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
@ -26,7 +26,7 @@ using namespace tflite;
 JNIEXPORT jlong JNICALL
 Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
    JNIEnv* env, jclass clazz, jint preference, jstring accelerator_name,
-    jstring cache_dir, jstring model_token) {
+    jstring cache_dir, jstring model_token, jint max_delegated_partitions) {
  StatefulNnApiDelegate::Options options = StatefulNnApiDelegate::Options();
  options.execution_preference =
      (StatefulNnApiDelegate::Options::ExecutionPreference)preference;
@ -40,6 +40,10 @@ Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
    options.model_token = env->GetStringUTFChars(model_token, NULL);
  }

+  if (max_delegated_partitions >= 0) {
+    options.max_number_delegated_partitions = max_delegated_partitions;
+  }
+
  auto delegate = new StatefulNnApiDelegate(options);

  if (options.accelerator_name) {
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -22,6 +22,7 @@ limitations under the License.
 #include <functional>
 #include <initializer_list>
 #include <iostream>
+#include <iterator>
 #include <map>
 #include <memory>
 #include <string>
@ -3850,6 +3851,8 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
    delegate_data_.model_token = options.model_token;
  }
  delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
+  delegate_data_.max_number_delegated_partitions =
+      options.max_number_delegated_partitions;
  TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
                       "Created TensorFlow Lite delegate for NNAPI.");
  Prepare = DoPrepare;
@ -3877,6 +3880,8 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
                            ? nullptr
                            : delegate_data->model_token.c_str();
  options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
+  options.max_number_delegated_partitions =
+      delegate_data->max_number_delegated_partitions;
  return options;
 }

@ -3943,6 +3948,110 @@ int StatefulNnApiDelegate::GetNnApiErrno() const {
 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;

+namespace {
+
+std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> BuildTfLiteIntArray(
+    const std::vector<int>& data) {
+  std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> result(
+      TfLiteIntArrayCreate(data.size()));
+  std::copy(data.begin(), data.end(), result->data);
+  return result;
+}
+}  // namespace
+
+// static
+TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
+    TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
+    const std::vector<int>& supported_nodes,
+    std::vector<int>* device_supported_nodes, int* num_partitions,
+    TfLiteDelegateParams** params_array, int* nnapi_errno) {
+  auto* delegate_data = static_cast<Data*>(delegate->data_);
+  // The first entry in the array is the element count
+
+  auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
+  TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
+      context, supported_nodes_int_array.get(), params_array, num_partitions));
+  // For each partition check if which nodes are actually supported by the
+  // target accelerators.
+  delegate_data->delegate_state_cache.clear();
+  for (int idx = 0; idx < *num_partitions; idx++) {
+    const auto& partition_params = (*params_array)[idx];
+    auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
+    TfLiteDelegateParams params_with_delegate = partition_params;
+    params_with_delegate.delegate = delegate;
+    TF_LITE_ENSURE_STATUS(
+        kernel_state->Init(context, &params_with_delegate, nnapi_errno));
+    std::vector<int> supported_partition_nodes;
+    TF_LITE_ENSURE_STATUS(
+        kernel_state->GetOperationsSupportedByTargetNnApiDevices(
+            context, &supported_partition_nodes, nnapi_errno));
+    device_supported_nodes->insert(device_supported_nodes->end(),
+                                   supported_partition_nodes.begin(),
+                                   supported_partition_nodes.end());
+
+    bool model_fully_supported = (supported_partition_nodes.size() ==
+                                  partition_params.nodes_to_replace->size);
+    if (model_fully_supported) {
+      delegate_data->CacheDelegateKernel(&partition_params,
+                                         kernel_state.release());
+    }
+  }
+
+  if (device_supported_nodes->size() != supported_nodes.size()) {
+    // We changed the set of nodes to delegate this will create a different
+    // partitioning layout.
+    auto device_sup_nodes_int_array =
+        BuildTfLiteIntArray(*device_supported_nodes);
+    TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
+        context, device_sup_nodes_int_array.get(), params_array,
+        num_partitions));
+  }
+
+  return kTfLiteOk;
+}
+
+// static
+TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
+    int max_partitions,
+    std::vector<TfLiteDelegateParams> partition_params_array,
+    std::vector<int>* nodes_to_delegate) {
+  int num_partitions = partition_params_array.size();
+  if (max_partitions <= 0 || num_partitions <= max_partitions) {
+    return kTfLiteOk;
+  }
+
+  int number_delegated_partitions = std::count_if(
+      partition_params_array.begin(), partition_params_array.end(),
+      [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
+        return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
+                         partition_params.nodes_to_replace->data[0]) !=
+               nodes_to_delegate->end();
+      });
+
+  if (number_delegated_partitions > max_partitions) {
+    std::sort(partition_params_array.begin(), partition_params_array.end(),
+              [](const TfLiteDelegateParams& left,
+                 const TfLiteDelegateParams& right) -> bool {
+                // Reverse sort
+                return left.nodes_to_replace->size >
+                       right.nodes_to_replace->size;
+              });
+
+    nodes_to_delegate->clear();
+
+    for (int i = 0; i < max_partitions; i++) {
+      const TfLiteDelegateParams& partition_params = partition_params_array[i];
+
+      nodes_to_delegate->insert(nodes_to_delegate->end(),
+                                partition_params.nodes_to_replace->data,
+                                partition_params.nodes_to_replace->data +
+                                    partition_params.nodes_to_replace->size);
+    }
+  }
+
+  return kTfLiteOk;
+}
+
 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                                              TfLiteDelegate* delegate) {
  auto* delegate_data = static_cast<Data*>(delegate->data_);
@ -3998,10 +4107,8 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
      }
    }
  }
-  // Allocate one element in vector already since TensorFlow Lite uses
-  // the first value as the number of nodes. The actual value will be set
-  // later, after the vector has been filled.
-  std::vector<int> supported_nodes(1);
+
+  std::vector<int> supported_nodes;
  // We don't care about all nodes_, we only care about ones in the
  // current plan.
  TfLiteIntArray* plan;
@ -4021,11 +4128,9 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
      supported_nodes.push_back(node_index);
    }
  }
-  // First element in vector must be the number of actual nodes.
-  supported_nodes[0] = supported_nodes.size() - 1;

  // If there are no delegated nodes, short-circuit node replacement.
-  if (!supported_nodes[0]) {
+  if (supported_nodes.empty()) {
    return kTfLiteOk;
  }

@ -4082,40 +4187,20 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,

  std::vector<int>& nodes_to_delegate = supported_nodes;
  if (is_accelerator_specified) {
+    std::vector<int> device_supported_nodes;
+    int num_partitions;
    TfLiteDelegateParams* params_array;
-    int num_partitions = 0;
-    // The first entry in the array is the element count
-    std::vector<int> device_supported_nodes(1);
-    TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
-        context, reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
-        &params_array, &num_partitions));
-    // For each partition check if which nodes are actually supported by the
-    // target accelerators.
-    delegate_data->delegate_state_cache.clear();
-    for (int idx = 0; idx < num_partitions; idx++) {
-      const auto& partition_params = params_array[idx];
-      auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
-      TfLiteDelegateParams params_with_delegate = partition_params;
-      params_with_delegate.delegate = delegate;
-      TF_LITE_ENSURE_STATUS(
-          kernel_state->Init(context, &params_with_delegate, nnapi_errno));
-      std::vector<int> supported_partition_nodes;
-      TF_LITE_ENSURE_STATUS(
-          kernel_state->GetOperationsSupportedByTargetNnApiDevices(
-              context, &supported_partition_nodes, nnapi_errno));
-      device_supported_nodes.insert(device_supported_nodes.end(),
-                                    supported_partition_nodes.begin(),
-                                    supported_partition_nodes.end());

-      bool model_fully_supported = (supported_partition_nodes.size() ==
-                                    partition_params.nodes_to_replace->size);
-      if (model_fully_supported) {
-        delegate_data->CacheDelegateKernel(&partition_params,
-                                           kernel_state.release());
-      }
-    }
+    TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
+        context, delegate, nnapi, supported_nodes, &device_supported_nodes,
+        &num_partitions, &params_array, nnapi_errno));
+
+    TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions(
+        delegate_options.max_number_delegated_partitions,
+        std::vector<TfLiteDelegateParams>(params_array,
+                                          params_array + num_partitions),
+        &device_supported_nodes));

-    device_supported_nodes[0] = device_supported_nodes.size() - 1;
    nodes_to_delegate = device_supported_nodes;
  }

@ -4124,9 +4209,10 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  } else {
    // Request TFLite to partition the graph and make kernels
    // for each independent node sub set a new nnapi_delegate_kernel.
+    auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
    return context->ReplaceNodeSubsetsWithDelegateKernels(
-        context, nnapi_delegate_kernel,
-        reinterpret_cast<TfLiteIntArray*>(nodes_to_delegate.data()), delegate);
+        context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
+        delegate);
  }
 }

--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@ -80,6 +80,15 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
    // kernels, but allowing CPU allows partial acceleration of models. If this
    // is set to true, NNAPI is only used if the whole model is accelerated.
    bool disallow_nnapi_cpu = false;
+
+    // Specifies the max number of partitions to delegate. A value <= 0 means
+    // no limit.
+    // If the delegation of the full set of supported nodes would generate a
+    // number of partition greater than this parameter, only
+    // <max_number_delegated_partitions> of them will be actually accelerated.
+    // The selection is currently done sorting partitions in decreasing order
+    // of number of nodes and selecting them until the limit is reached.
+    int max_number_delegated_partitions = 0;
  };

  // Uses default options.
@ -172,13 +181,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
    // The key is the index of the first node in the partition.
    // Couldn't use unique_ptr because of problems building on gcc
    std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
+    // Maximum number of NNAPI partition to delegate. Zero or negative means
+    // no limit. Copied from StatefulNnApiDelegate::Options
+    int max_number_delegated_partitions;

    ~Data();

    // Caches an initialised NNAPIDelegateKernel.
    void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
                             NNAPIDelegateKernel* delegate_state);
-    // Returns a cached NNAPIDelegateKernel if available.
+    // Returns a cached NNAPIDelegateKernel if available and removes it
+    // from the cache transferring the ownership to the caller.
    absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
        const TfLiteDelegateParams* delegate_params);
  };
@ -211,6 +224,34 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
                                 TfLiteDelegate* delegate,
                                 TfLiteBufferHandle* handle);

+  // Returns the nodes that can be delegated via NNAPI to the accelerator
+  // specified in the delegate options and information about the way the
+  // graph will be partitioned if the supported nodes will be delegated.
+  // Partition information is composed by the number of partitions and
+  // the delegate parameters associated to each partition.
+  // The method also caches in delegate->data the NNApiDelegateKernel instances
+  // that have been created during the device evaluation.
+  // All arguments are expected to be non-null.
+  static TfLiteStatus GetNodesSupportedByAccelerator(
+      TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
+      const std::vector<int>& supported_nodes,
+      std::vector<int>* device_supported_nodes, int* num_partitions,
+      TfLiteDelegateParams** params_array, int* nnapi_errno);
+
+  // Alters the given array of nodes_to_delegate to limit the number of NNAPI
+  // owned partition to be less or equal than num_partitions. If num_partitions
+  // is less or equal to zero the input is left unaltered.
+  // The nodes_to_delegate array is expected to contain at element 0 the number
+  // of nodes to delegate and in remaining elements the set of nodes
+  // that would be delegated to NNAPI if this function wouldn't be
+  // called. It will be altered storing in the first element the count of
+  // nodes to actually delegate and in the remainder of the array the indexes.
+  // The params_array params might be altered during the functions execution.
+  static TfLiteStatus LimitDelegatedPartitions(
+      int max_partitions,
+      std::vector<TfLiteDelegateParams> partition_params_array,
+      std::vector<int>* nodes_to_delegate);
+
  // Delegate data presented through TfLiteDelegate::data_.
  Data delegate_data_;
 };
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
@ -14,6 +14,14 @@ limitations under the License.
 ==============================================================================*/
 #include <sys/mman.h>

+#include <algorithm>
+#include <iterator>
+#include <memory>
+#include <numeric>
+#include <ostream>
+#include <unordered_set>
+#include <vector>
+
 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
@ -223,18 +231,21 @@ class AcceleratedModel {

 protected:
  // build a delegate with a target accelerator name.
-  explicit AcceleratedModel(const NnApi* nnapi,
-                            const std::string& accelerator_name) {
+  AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
+                   int max_nnapi_partitions = 0) {
    StatefulNnApiDelegate::Options options;
    options.accelerator_name = accelerator_name.c_str();
+    options.max_number_delegated_partitions = max_nnapi_partitions;
    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
  }

  // build a delegate with no target accelerator name, can disable the NNAPI CPU
  // fallback implementation using the disallow_nnapi_cpu flag.
-  explicit AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu) {
+  AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
+                   int max_nnapi_partitions = 0) {
    StatefulNnApiDelegate::Options options;
    options.disallow_nnapi_cpu = disallow_nnapi_cpu;
+    options.max_number_delegated_partitions = max_nnapi_partitions;
    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
  }

@ -305,8 +316,6 @@ TEST_F(UnsupportedOperationOnDeviceTest,
      << "Expected Max not to be delegates since it not supported before NNAPI "
         "1.2 and device declares to support only NNAPI 1.1.";

-  TFLITE_LOG_PROD(TFLITE_LOG_INFO, "First part of test done");
-
  nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);

  ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
@ -535,6 +544,198 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
  EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
 }

+// Model with a chain of no-op (add with zero operations)
+class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
+ public:
+  LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
+                    const NnApi* nnapi, const std::string& accelerator_name,
+                    int max_nnapi_partitions)
+      : MultiOpModel(),
+        AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
+    auto* delegate = GetDelegate();
+    this->SetApplyDelegate([delegate](Interpreter* interpreter) {
+      interpreter->ModifyGraphWithDelegate(delegate);
+    });
+
+    const TensorData tensor_data{TensorType_FLOAT32, input_shape};
+
+    input_ = AddInput(tensor_data);
+    zero_input_ = AddInput(tensor_data);
+
+    std::vector<int> intermediate_outputs(graph_size - 1);
+    std::generate(
+        std::begin(intermediate_outputs), std::end(intermediate_outputs),
+        [this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
+
+    output_ = AddOutput(tensor_data);
+
+    AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
+                 CreateAddOptions(builder_).Union(), {input_, zero_input_},
+                 {intermediate_outputs[0]});
+
+    for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
+      AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
+                   CreateAddOptions(builder_).Union(),
+                   {intermediate_outputs[i], zero_input_},
+                   {intermediate_outputs[i + 1]});
+    }
+
+    AddBuiltinOp(
+        BuiltinOperator_ADD, BuiltinOptions_AddOptions,
+        CreateAddOptions(builder_).Union(),
+        {intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
+        {output_});
+
+    BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
+
+    std::vector<float> zero(GetTensorSize(input_), 0.0);
+    PopulateTensor(zero_input_, zero);
+  }
+
+  void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
+
+  int CountNnApiPartitions() {
+    return std::count_if(
+        std::begin(interpreter_->execution_plan()),
+        std::end(interpreter_->execution_plan()), [this](const int node_index) {
+          return interpreter_->node_and_registration(node_index)
+                     ->first.delegate != nullptr;
+        });
+  }
+
+ private:
+  int input_;
+  int zero_input_;
+  int output_;
+};
+
+class NodeFilter {
+ public:
+  void ConfigureSupportedNodes(
+      int graph_size, const std::unordered_set<int>& unsupported_indexes) {
+    graph_size_ = graph_size;
+    unsupported_indexes_ = unsupported_indexes;
+  }
+
+  void SetNodeSupport(bool* supported_ops) {
+    for (int i = 0; i < graph_size_; i++) {
+      supported_ops[i] = (unsupported_indexes_.count(i) == 0);
+    }
+  }
+
+ private:
+  int graph_size_;
+  std::unordered_set<int> unsupported_indexes_;
+};
+
+// Using the same node filter for all DelegatePartitionLimitTests
+// because StubGetSupportedOperationsForDevicesWith wants a C function.
+NodeFilter* DelegatePartitionLimitTestNodeFilter() {
+  static NodeFilter* node_filter = new NodeFilter();
+  return node_filter;
+}
+
+class DelegatePartitionLimitTest
+    : public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
+ protected:
+  // Configure the underlying graph to generate a set of nnapi partition
+  // with the sizes specified in nnapi_partition_sizes and the given
+  // input_shape.
+  void Init(int max_nnapi_partitions,
+            const std::vector<int>& nnapi_partition_sizes,
+            const std::vector<int>& input_shape) {
+    // The graph will have as number of nodes the sum of nodes in the NNAPI
+    // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
+    // not supported by NNAPI and will cause the
+    graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
+                                  std::end(nnapi_partition_sizes),
+                                  nnapi_partition_sizes.size() - 1);
+
+    std::unordered_set<int> unsupported_ops_idxs;
+    int partition_node_idx = -1;
+    for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
+      partition_node_idx += nnapi_partition_sizes[i] + 1;
+      unsupported_ops_idxs.insert(partition_node_idx);
+    }
+
+    DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
+        graph_size_, unsupported_ops_idxs);
+
+    nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
+        [](const ANeuralNetworksModel* model,
+           const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
+           bool* supported_ops) -> int {
+          DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops);
+          return ANEURALNETWORKS_NO_ERROR;
+        });
+
+    model_ = std::make_unique<LongIdentityModel>(
+        input_shape, graph_size_, nnapi_mock_->GetNnApi(),
+        /*accelerator_name=*/"test-device", max_nnapi_partitions);
+  }
+
+  std::unique_ptr<LongIdentityModel> model_;
+
+  int OriginalGraphSize() { return graph_size_; }
+
+ private:
+  int graph_size_;
+};
+
+TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
+  Init(/*max_nnapi_partitions=*/1,
+       /*nnapi_partition_sizes=*/{3, 2},
+       /*input_shape=*/{1, 2, 2, 1});
+
+  EXPECT_EQ(model_->CountNnApiPartitions(), 1);
+}
+
+TEST_F(DelegatePartitionLimitTest,
+       ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
+  Init(/*max_nnapi_partitions=*/0,
+       /*nnapi_partition_sizes=*/{3, 2},
+       /*input_shape=*/{1, 2, 2, 1});
+
+  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
+}
+
+TEST_F(DelegatePartitionLimitTest,
+       ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
+  Init(/*max_nnapi_partitions=*/0,
+       /*nnapi_partition_sizes=*/{3, 2},
+       /*input_shape=*/{1, 2, 2, 1});
+
+  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
+}
+
+TEST_F(DelegatePartitionLimitTest,
+       ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
+  Init(/*max_nnapi_partitions=*/3,
+       /*nnapi_partition_sizes=*/{3, 2},
+       /*input_shape=*/{1, 2, 2, 1});
+
+  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
+}
+
+TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
+  Init(/*max_nnapi_partitions=*/1,
+       /*nnapi_partition_sizes=*/{3, 2},
+       /*input_shape=*/{1, 2, 2, 1});
+
+  EXPECT_EQ(model_->CountNnApiPartitions(), 1);
+  EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3);
+}
+
+TEST_F(DelegatePartitionLimitTest,
+       ShouldDelegatePartitionsWithHigherNodeCount) {
+  Init(/*max_nnapi_partitions=*/2,
+       /*nnapi_partition_sizes=*/{1, 5, 2, 4},
+       /*input_shape=*/{1, 2, 2, 1});
+
+  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
+  EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
+}
+
 }  // namespace
 }  // namespace tflite