Expose option to limit the number of partitions that will be delegated to NNAPI

PiperOrigin-RevId: 295962456 Change-Id: I43e13700e23b798ce786b7f1034066961c4c3613
2020-02-19 07:21:11 -08:00 · 2020-02-19 07:21:11 -08:00 · 911d4a618a
commit 911d4a618a
parent 28046a55b7
5 changed files with 409 additions and 60 deletions
--- a/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
@ -65,24 +65,35 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    }
    public Options setAcceleratorName(String name) {
-      this.accelerator_name = name;
+      this.acceleratorName = name;
      return this;
    }
-    public Options setCacheDir(String name) {
+    public Options setCacheDir(String cacheDir) {
-      this.cache_dir = name;
+      this.cacheDir = cacheDir;
      return this;
    }
-    public Options setModelToken(String name) {
+    public Options setModelToken(String modelToken) {
-      this.model_token = name;
+      this.modelToken = modelToken;
      return this;
    }
-    int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
+    /**
-    String accelerator_name = null;
+     * Sets the maximum number of graph partitions that the delegate will try to delegate. If more
-    String cache_dir = null;
+     * partitions could be delegated than the limit, the ones with the larger number of nodes will
-    String model_token = null;
+     * be chosen. If unset it will use the NNAPI default limit.
     */
    public Options setMaxNumberOfDelegatedPartitions(int limit) {
      this.maxDelegatedPartitions = limit;
      return this;
    }
    private int executionPreference = EXECUTION_PREFERENCE_UNDEFINED;
    private String acceleratorName = null;
    private String cacheDir = null;
    private String modelToken = null;
    private Integer maxDelegatedPartitions = null;
  }
  public NnApiDelegate(Options options) {
@ -91,9 +102,10 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    delegateHandle =
        createDelegate(
            options.executionPreference,
-            options.accelerator_name,
+            options.acceleratorName,
-            options.cache_dir,
+            options.cacheDir,
-            options.model_token);
+            options.modelToken,
            options.maxDelegatedPartitions != null ? options.maxDelegatedPartitions : -1);
  }
  public NnApiDelegate() {
@ -118,8 +130,13 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    }
  }
  //
  private static native long createDelegate(
-      int preference, String device_name, String cache_dir, String model_token);
+      int preference,
      String deviceName,
      String cacheDir,
      String modelToken,
      int maxDelegatedPartitions);
  private static native void deleteDelegate(long delegateHandle);
 }
--- a/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/native/nnapi_delegate_jni.cc
@ -26,7 +26,7 @@ using namespace tflite;
 JNIEXPORT jlong JNICALL
 Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
    JNIEnv* env, jclass clazz, jint preference, jstring accelerator_name,
-    jstring cache_dir, jstring model_token) {
+    jstring cache_dir, jstring model_token, jint max_delegated_partitions) {
  StatefulNnApiDelegate::Options options = StatefulNnApiDelegate::Options();
  options.execution_preference =
      (StatefulNnApiDelegate::Options::ExecutionPreference)preference;
@ -40,6 +40,10 @@ Java_org_tensorflow_lite_nnapi_NnApiDelegate_createDelegate(
    options.model_token = env->GetStringUTFChars(model_token, NULL);
  }
  if (max_delegated_partitions >= 0) {
    options.max_number_delegated_partitions = max_delegated_partitions;
  }
  auto delegate = new StatefulNnApiDelegate(options);
  if (options.accelerator_name) {
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -22,6 +22,7 @@ limitations under the License.
 #include <functional>
 #include <initializer_list>
 #include <iostream>
 #include <iterator>
 #include <map>
 #include <memory>
 #include <string>
@ -3850,6 +3851,8 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
    delegate_data_.model_token = options.model_token;
  }
  delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
  delegate_data_.max_number_delegated_partitions =
      options.max_number_delegated_partitions;
  TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
                       "Created TensorFlow Lite delegate for NNAPI.");
  Prepare = DoPrepare;
@ -3877,6 +3880,8 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
                            ? nullptr
                            : delegate_data->model_token.c_str();
  options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
  options.max_number_delegated_partitions =
      delegate_data->max_number_delegated_partitions;
  return options;
 }
@ -3943,6 +3948,110 @@ int StatefulNnApiDelegate::GetNnApiErrno() const {
 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
 namespace {
 std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> BuildTfLiteIntArray(
    const std::vector<int>& data) {
  std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> result(
      TfLiteIntArrayCreate(data.size()));
  std::copy(data.begin(), data.end(), result->data);
  return result;
 }
 }  // namespace
 // static
 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
    TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
    const std::vector<int>& supported_nodes,
    std::vector<int>* device_supported_nodes, int* num_partitions,
    TfLiteDelegateParams** params_array, int* nnapi_errno) {
  auto* delegate_data = static_cast<Data*>(delegate->data_);
  // The first entry in the array is the element count
  auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
  TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
      context, supported_nodes_int_array.get(), params_array, num_partitions));
  // For each partition check if which nodes are actually supported by the
  // target accelerators.
  delegate_data->delegate_state_cache.clear();
  for (int idx = 0; idx < *num_partitions; idx++) {
    const auto& partition_params = (*params_array)[idx];
    auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
    TfLiteDelegateParams params_with_delegate = partition_params;
    params_with_delegate.delegate = delegate;
    TF_LITE_ENSURE_STATUS(
        kernel_state->Init(context, &params_with_delegate, nnapi_errno));
    std::vector<int> supported_partition_nodes;
    TF_LITE_ENSURE_STATUS(
        kernel_state->GetOperationsSupportedByTargetNnApiDevices(
            context, &supported_partition_nodes, nnapi_errno));
    device_supported_nodes->insert(device_supported_nodes->end(),
                                   supported_partition_nodes.begin(),
                                   supported_partition_nodes.end());
    bool model_fully_supported = (supported_partition_nodes.size() ==
                                  partition_params.nodes_to_replace->size);
    if (model_fully_supported) {
      delegate_data->CacheDelegateKernel(&partition_params,
                                         kernel_state.release());
    }
  }
  if (device_supported_nodes->size() != supported_nodes.size()) {
    // We changed the set of nodes to delegate this will create a different
    // partitioning layout.
    auto device_sup_nodes_int_array =
        BuildTfLiteIntArray(*device_supported_nodes);
    TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
        context, device_sup_nodes_int_array.get(), params_array,
        num_partitions));
  }
  return kTfLiteOk;
 }
 // static
 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
    int max_partitions,
    std::vector<TfLiteDelegateParams> partition_params_array,
    std::vector<int>* nodes_to_delegate) {
  int num_partitions = partition_params_array.size();
  if (max_partitions <= 0 || num_partitions <= max_partitions) {
    return kTfLiteOk;
  }
  int number_delegated_partitions = std::count_if(
      partition_params_array.begin(), partition_params_array.end(),
      [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
        return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
                         partition_params.nodes_to_replace->data[0]) !=
               nodes_to_delegate->end();
      });
  if (number_delegated_partitions > max_partitions) {
    std::sort(partition_params_array.begin(), partition_params_array.end(),
              [](const TfLiteDelegateParams& left,
                 const TfLiteDelegateParams& right) -> bool {
                // Reverse sort
                return left.nodes_to_replace->size >
                       right.nodes_to_replace->size;
              });
    nodes_to_delegate->clear();
    for (int i = 0; i < max_partitions; i++) {
      const TfLiteDelegateParams& partition_params = partition_params_array[i];
      nodes_to_delegate->insert(nodes_to_delegate->end(),
                                partition_params.nodes_to_replace->data,
                                partition_params.nodes_to_replace->data +
                                    partition_params.nodes_to_replace->size);
    }
  }
  return kTfLiteOk;
 }
 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                                              TfLiteDelegate* delegate) {
  auto* delegate_data = static_cast<Data*>(delegate->data_);
@ -3998,10 +4107,8 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
      }
    }
  }
-  // Allocate one element in vector already since TensorFlow Lite uses
+
-  // the first value as the number of nodes. The actual value will be set
+  std::vector<int> supported_nodes;
  // later, after the vector has been filled.
  std::vector<int> supported_nodes(1);
  // We don't care about all nodes_, we only care about ones in the
  // current plan.
  TfLiteIntArray* plan;
@ -4021,11 +4128,9 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
      supported_nodes.push_back(node_index);
    }
  }
  // First element in vector must be the number of actual nodes.
  supported_nodes[0] = supported_nodes.size() - 1;
  // If there are no delegated nodes, short-circuit node replacement.
-  if (!supported_nodes[0]) {
+  if (supported_nodes.empty()) {
    return kTfLiteOk;
  }
@ -4082,40 +4187,20 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  std::vector<int>& nodes_to_delegate = supported_nodes;
  if (is_accelerator_specified) {
    std::vector<int> device_supported_nodes;
    int num_partitions;
    TfLiteDelegateParams* params_array;
    int num_partitions = 0;
    // The first entry in the array is the element count
    std::vector<int> device_supported_nodes(1);
    TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
        context, reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
        &params_array, &num_partitions));
    // For each partition check if which nodes are actually supported by the
    // target accelerators.
    delegate_data->delegate_state_cache.clear();
    for (int idx = 0; idx < num_partitions; idx++) {
      const auto& partition_params = params_array[idx];
      auto kernel_state = absl::make_unique<NNAPIDelegateKernel>(nnapi);
      TfLiteDelegateParams params_with_delegate = partition_params;
      params_with_delegate.delegate = delegate;
      TF_LITE_ENSURE_STATUS(
          kernel_state->Init(context, &params_with_delegate, nnapi_errno));
      std::vector<int> supported_partition_nodes;
      TF_LITE_ENSURE_STATUS(
          kernel_state->GetOperationsSupportedByTargetNnApiDevices(
              context, &supported_partition_nodes, nnapi_errno));
      device_supported_nodes.insert(device_supported_nodes.end(),
                                    supported_partition_nodes.begin(),
                                    supported_partition_nodes.end());
-      bool model_fully_supported = (supported_partition_nodes.size() ==
+    TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
-                                    partition_params.nodes_to_replace->size);
+        context, delegate, nnapi, supported_nodes, &device_supported_nodes,
-      if (model_fully_supported) {
+        &num_partitions, &params_array, nnapi_errno));
-        delegate_data->CacheDelegateKernel(&partition_params,
+
-                                           kernel_state.release());
+    TF_LITE_ENSURE_STATUS(LimitDelegatedPartitions(
-      }
+        delegate_options.max_number_delegated_partitions,
-    }
+        std::vector<TfLiteDelegateParams>(params_array,
                                          params_array + num_partitions),
        &device_supported_nodes));
    device_supported_nodes[0] = device_supported_nodes.size() - 1;
    nodes_to_delegate = device_supported_nodes;
  }
@ -4124,9 +4209,10 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  } else {
    // Request TFLite to partition the graph and make kernels
    // for each independent node sub set a new nnapi_delegate_kernel.
    auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
    return context->ReplaceNodeSubsetsWithDelegateKernels(
-        context, nnapi_delegate_kernel,
+        context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
-        reinterpret_cast<TfLiteIntArray*>(nodes_to_delegate.data()), delegate);
+        delegate);
  }
 }
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@ -80,6 +80,15 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
    // kernels, but allowing CPU allows partial acceleration of models. If this
    // is set to true, NNAPI is only used if the whole model is accelerated.
    bool disallow_nnapi_cpu = false;
    // Specifies the max number of partitions to delegate. A value <= 0 means
    // no limit.
    // If the delegation of the full set of supported nodes would generate a
    // number of partition greater than this parameter, only
    // <max_number_delegated_partitions> of them will be actually accelerated.
    // The selection is currently done sorting partitions in decreasing order
    // of number of nodes and selecting them until the limit is reached.
    int max_number_delegated_partitions = 0;
  };
  // Uses default options.
@ -172,13 +181,17 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
    // The key is the index of the first node in the partition.
    // Couldn't use unique_ptr because of problems building on gcc
    std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
    // Maximum number of NNAPI partition to delegate. Zero or negative means
    // no limit. Copied from StatefulNnApiDelegate::Options
    int max_number_delegated_partitions;
    ~Data();
    // Caches an initialised NNAPIDelegateKernel.
    void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
                             NNAPIDelegateKernel* delegate_state);
-    // Returns a cached NNAPIDelegateKernel if available.
+    // Returns a cached NNAPIDelegateKernel if available and removes it
    // from the cache transferring the ownership to the caller.
    absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
        const TfLiteDelegateParams* delegate_params);
  };
@ -211,6 +224,34 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
                                 TfLiteDelegate* delegate,
                                 TfLiteBufferHandle* handle);
  // Returns the nodes that can be delegated via NNAPI to the accelerator
  // specified in the delegate options and information about the way the
  // graph will be partitioned if the supported nodes will be delegated.
  // Partition information is composed by the number of partitions and
  // the delegate parameters associated to each partition.
  // The method also caches in delegate->data the NNApiDelegateKernel instances
  // that have been created during the device evaluation.
  // All arguments are expected to be non-null.
  static TfLiteStatus GetNodesSupportedByAccelerator(
      TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
      const std::vector<int>& supported_nodes,
      std::vector<int>* device_supported_nodes, int* num_partitions,
      TfLiteDelegateParams** params_array, int* nnapi_errno);
  // Alters the given array of nodes_to_delegate to limit the number of NNAPI
  // owned partition to be less or equal than num_partitions. If num_partitions
  // is less or equal to zero the input is left unaltered.
  // The nodes_to_delegate array is expected to contain at element 0 the number
  // of nodes to delegate and in remaining elements the set of nodes
  // that would be delegated to NNAPI if this function wouldn't be
  // called. It will be altered storing in the first element the count of
  // nodes to actually delegate and in the remainder of the array the indexes.
  // The params_array params might be altered during the functions execution.
  static TfLiteStatus LimitDelegatedPartitions(
      int max_partitions,
      std::vector<TfLiteDelegateParams> partition_params_array,
      std::vector<int>* nodes_to_delegate);
  // Delegate data presented through TfLiteDelegate::data_.
  Data delegate_data_;
 };
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
@ -14,6 +14,14 @@ limitations under the License.
 ==============================================================================*/
 #include <sys/mman.h>
 #include <algorithm>
 #include <iterator>
 #include <memory>
 #include <numeric>
 #include <ostream>
 #include <unordered_set>
 #include <vector>
 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
@ -223,18 +231,21 @@ class AcceleratedModel {
 protected:
  // build a delegate with a target accelerator name.
-  explicit AcceleratedModel(const NnApi* nnapi,
+  AcceleratedModel(const NnApi* nnapi, const std::string& accelerator_name,
-                            const std::string& accelerator_name) {
+                   int max_nnapi_partitions = 0) {
    StatefulNnApiDelegate::Options options;
    options.accelerator_name = accelerator_name.c_str();
    options.max_number_delegated_partitions = max_nnapi_partitions;
    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
  }
  // build a delegate with no target accelerator name, can disable the NNAPI CPU
  // fallback implementation using the disallow_nnapi_cpu flag.
-  explicit AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu) {
+  AcceleratedModel(const NnApi* nnapi, bool disallow_nnapi_cpu,
                   int max_nnapi_partitions = 0) {
    StatefulNnApiDelegate::Options options;
    options.disallow_nnapi_cpu = disallow_nnapi_cpu;
    options.max_number_delegated_partitions = max_nnapi_partitions;
    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options));
  }
@ -305,8 +316,6 @@ TEST_F(UnsupportedOperationOnDeviceTest,
      << "Expected Max not to be delegates since it not supported before NNAPI "
         "1.2 and device declares to support only NNAPI 1.1.";
  TFLITE_LOG_PROD(TFLITE_LOG_INFO, "First part of test done");
  nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
  ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
@ -535,6 +544,198 @@ TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
  EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
 }
 // Model with a chain of no-op (add with zero operations)
 class LongIdentityModel : public MultiOpModel, public AcceleratedModel {
 public:
  LongIdentityModel(const std::vector<int>& input_shape, int graph_size,
                    const NnApi* nnapi, const std::string& accelerator_name,
                    int max_nnapi_partitions)
      : MultiOpModel(),
        AcceleratedModel(nnapi, accelerator_name, max_nnapi_partitions) {
    auto* delegate = GetDelegate();
    this->SetApplyDelegate([delegate](Interpreter* interpreter) {
      interpreter->ModifyGraphWithDelegate(delegate);
    });
    const TensorData tensor_data{TensorType_FLOAT32, input_shape};
    input_ = AddInput(tensor_data);
    zero_input_ = AddInput(tensor_data);
    std::vector<int> intermediate_outputs(graph_size - 1);
    std::generate(
        std::begin(intermediate_outputs), std::end(intermediate_outputs),
        [this, &tensor_data]() { return AddInnerTensor<float>(tensor_data); });
    output_ = AddOutput(tensor_data);
    AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
                 CreateAddOptions(builder_).Union(), {input_, zero_input_},
                 {intermediate_outputs[0]});
    for (int i = 0; i < intermediate_outputs.size() - 1; i++) {
      AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
                   CreateAddOptions(builder_).Union(),
                   {intermediate_outputs[i], zero_input_},
                   {intermediate_outputs[i + 1]});
    }
    AddBuiltinOp(
        BuiltinOperator_ADD, BuiltinOptions_AddOptions,
        CreateAddOptions(builder_).Union(),
        {intermediate_outputs[intermediate_outputs.size() - 1], zero_input_},
        {output_});
    BuildInterpreter({GetShape(input_), GetShape(zero_input_)});
    std::vector<float> zero(GetTensorSize(input_), 0.0);
    PopulateTensor(zero_input_, zero);
  }
  void SetInput(std::vector<float> value) { PopulateTensor(input_, value); }
  int CountNnApiPartitions() {
    return std::count_if(
        std::begin(interpreter_->execution_plan()),
        std::end(interpreter_->execution_plan()), [this](const int node_index) {
          return interpreter_->node_and_registration(node_index)
                     ->first.delegate != nullptr;
        });
  }
 private:
  int input_;
  int zero_input_;
  int output_;
 };
 class NodeFilter {
 public:
  void ConfigureSupportedNodes(
      int graph_size, const std::unordered_set<int>& unsupported_indexes) {
    graph_size_ = graph_size;
    unsupported_indexes_ = unsupported_indexes;
  }
  void SetNodeSupport(bool* supported_ops) {
    for (int i = 0; i < graph_size_; i++) {
      supported_ops[i] = (unsupported_indexes_.count(i) == 0);
    }
  }
 private:
  int graph_size_;
  std::unordered_set<int> unsupported_indexes_;
 };
 // Using the same node filter for all DelegatePartitionLimitTests
 // because StubGetSupportedOperationsForDevicesWith wants a C function.
 NodeFilter* DelegatePartitionLimitTestNodeFilter() {
  static NodeFilter* node_filter = new NodeFilter();
  return node_filter;
 }
 class DelegatePartitionLimitTest
    : public ::tflite::delegate::nnapi::NnApiDelegateMockTest {
 protected:
  // Configure the underlying graph to generate a set of nnapi partition
  // with the sizes specified in nnapi_partition_sizes and the given
  // input_shape.
  void Init(int max_nnapi_partitions,
            const std::vector<int>& nnapi_partition_sizes,
            const std::vector<int>& input_shape) {
    // The graph will have as number of nodes the sum of nodes in the NNAPI
    // partitions plus nnapi_partition_sizes.size() - 1 nodes that will be
    // not supported by NNAPI and will cause the
    graph_size_ = std::accumulate(std::begin(nnapi_partition_sizes),
                                  std::end(nnapi_partition_sizes),
                                  nnapi_partition_sizes.size() - 1);
    std::unordered_set<int> unsupported_ops_idxs;
    int partition_node_idx = -1;
    for (int i = 0; i < nnapi_partition_sizes.size() - 1; i++) {
      partition_node_idx += nnapi_partition_sizes[i] + 1;
      unsupported_ops_idxs.insert(partition_node_idx);
    }
    DelegatePartitionLimitTestNodeFilter()->ConfigureSupportedNodes(
        graph_size_, unsupported_ops_idxs);
    nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
        [](const ANeuralNetworksModel* model,
           const ANeuralNetworksDevice* const* devices, uint32_t num_devices,
           bool* supported_ops) -> int {
          DelegatePartitionLimitTestNodeFilter()->SetNodeSupport(supported_ops);
          return ANEURALNETWORKS_NO_ERROR;
        });
    model_ = std::make_unique<LongIdentityModel>(
        input_shape, graph_size_, nnapi_mock_->GetNnApi(),
        /*accelerator_name=*/"test-device", max_nnapi_partitions);
  }
  std::unique_ptr<LongIdentityModel> model_;
  int OriginalGraphSize() { return graph_size_; }
 private:
  int graph_size_;
 };
 TEST_F(DelegatePartitionLimitTest, ShouldDelegateOnePartitionOnly) {
  Init(/*max_nnapi_partitions=*/1,
       /*nnapi_partition_sizes=*/{3, 2},
       /*input_shape=*/{1, 2, 2, 1});
  EXPECT_EQ(model_->CountNnApiPartitions(), 1);
 }
 TEST_F(DelegatePartitionLimitTest,
       ShouldDelegateAllPossiblePartitionsIfLimitIsZero) {
  Init(/*max_nnapi_partitions=*/0,
       /*nnapi_partition_sizes=*/{3, 2},
       /*input_shape=*/{1, 2, 2, 1});
  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
 }
 TEST_F(DelegatePartitionLimitTest,
       ShouldDelegateAllPossiblePartitionsIfLimitIsNegative) {
  Init(/*max_nnapi_partitions=*/0,
       /*nnapi_partition_sizes=*/{3, 2},
       /*input_shape=*/{1, 2, 2, 1});
  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
 }
 TEST_F(DelegatePartitionLimitTest,
       ShouldDelegateAllPossiblePartitionsIfBelowLimit) {
  Init(/*max_nnapi_partitions=*/3,
       /*nnapi_partition_sizes=*/{3, 2},
       /*input_shape=*/{1, 2, 2, 1});
  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
 }
 TEST_F(DelegatePartitionLimitTest, ShouldDelegatePartitionWithHigherNodeCount) {
  Init(/*max_nnapi_partitions=*/1,
       /*nnapi_partition_sizes=*/{3, 2},
       /*input_shape=*/{1, 2, 2, 1});
  EXPECT_EQ(model_->CountNnApiPartitions(), 1);
  EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 3);
 }
 TEST_F(DelegatePartitionLimitTest,
       ShouldDelegatePartitionsWithHigherNodeCount) {
  Init(/*max_nnapi_partitions=*/2,
       /*nnapi_partition_sizes=*/{1, 5, 2, 4},
       /*input_shape=*/{1, 2, 2, 1});
  EXPECT_EQ(model_->CountNnApiPartitions(), 2);
  EXPECT_EQ(model_->CountOpsExecutedByCpuKernel(), OriginalGraphSize() - 9);
 }
 }  // namespace
 }  // namespace tflite