Limit delegated ops to actually supported ones if a device name is specified or NNAPI CPU Fallback is disabled.

PiperOrigin-RevId: 291957162 Change-Id: I3dd084818eaea0fc7f8332bd162ba30519a07c68
2020-01-28 10:05:46 -08:00 · 2020-01-28 10:05:46 -08:00 · 8f9ecf6dd2
commit 8f9ecf6dd2
parent acf820c4bb
10 changed files with 647 additions and 196 deletions
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@ -36,6 +36,9 @@ cc_library(
        "//tensorflow/lite/nnapi:nnapi_implementation",
        "//tensorflow/lite/nnapi:nnapi_lib",
        "//tensorflow/lite/nnapi:nnapi_util",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:optional",
    ],
 )
@ -68,6 +71,8 @@ cc_library(
        "//tensorflow/lite/kernels:kernel_util",
        "//tensorflow/lite/nnapi:nnapi_implementation",
        "//tensorflow/lite/nnapi:nnapi_util",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/types:optional",
    ],
 )
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -26,6 +26,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 #include <tuple>
 #include <utility>
 #include <vector>
 #ifdef __ANDROID__
@ -38,6 +39,8 @@ limitations under the License.
 #include <unistd.h>
 #endif
 #include "absl/memory/memory.h"
 #include "absl/types/optional.h"
 #include "tensorflow/lite/allocation.h"
 #include "tensorflow/lite/builtin_op_data.h"
 #include "tensorflow/lite/builtin_ops.h"
@ -403,9 +406,8 @@ TfLiteStatus GetTargetSdkVersion(
 // - NNAPI CPU implementation has been explicitly disabled.
 // If exclude_nnapi_reference is true this method will return false if the
 // accelerator_name in the delegate options is equal to "nnapi-reference"
-bool ShouldUseTargetDevices(TfLiteDelegate* delegate,
+bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
                            bool exclude_nnapi_reference = false) {
  const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
  const char* device_name_ptr = delegate_options.accelerator_name;
  std::string nnapi_cpu("nnapi-reference");
  bool has_selected_accelerator = device_name_ptr != nullptr;
@ -3046,7 +3048,7 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
  const auto delegate_options =
      StatefulNnApiDelegate::GetOptions(params->delegate);
  if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
-      ShouldUseTargetDevices(params->delegate)) {
+      ShouldUseTargetDevices(delegate_options)) {
    TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
                                           nnapi_errno, &nnapi_devices_));
@ -3072,91 +3074,133 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
                                     params->output_tensors, nnapi_errno));
  }
-  if (!nn_compilation_) {
+  // Calculating model compilation cache here since the value depends on
-    ANeuralNetworksCompilation* compilation = nullptr;
+  // some of the TfLiteDelegateParams
-    if (!nnapi_devices_.empty()) {
+  nn_compilation_cache_token_.clear();
-      // Compile for the selected accelerator.
+  const char* cache_dir = delegate_options.cache_dir;
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
+  const char* model_token = delegate_options.model_token;
-          context,
+  if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
-          nnapi_->ANeuralNetworksCompilation_createForDevices(
+      model_token) {
-              nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
+    // Compilation caching could be enabled, try construct the uint8
-              &compilation),
+    // token.
-          "creating NNAPI model for given devices", nnapi_errno);
+    // TODO(b/133342794): use a generic token generator class.
-    } else {
+    uint64_t token_parts[4];
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
+    // bits from model_token.
-                                      nnapi_->ANeuralNetworksCompilation_create(
+    token_parts[0] = std::hash<std::string>{}(model_token);
-                                          nn_model_.get(), &compilation),
+    // bits from params->nodes_to_replace.
-                                      "creating NNAPI compilation",
+    token_parts[1] = GetHash(params->nodes_to_replace);
-                                      nnapi_errno);
+    // bits from params->input_tensors.
    token_parts[2] = GetHash(params->input_tensors);
    // bits from params->output_tensors.
    token_parts[3] = GetHash(params->output_tensors);
    // NNAPI requires the token to be 256bit long.
    std::vector<uint8_t> nnapi_cache_token(32, 0);
    // Copy the token bits.
    uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
    for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
      nnapi_cache_token[i] = p[i];
    }
-    auto preference = delegate_options.execution_preference;
+    nn_compilation_cache_token_ = nnapi_cache_token;
    if (preference !=
        StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
      const int preference_result =
          nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
                                                           preference);
      if (preference_result != ANEURALNETWORKS_NO_ERROR) {
        nnapi_->ANeuralNetworksCompilation_free(compilation);
        compilation = nullptr;
      }
      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
                                      "setting compilation preferences",
                                      nnapi_errno);
    }
    const char* cache_dir = delegate_options.cache_dir;
    const char* model_token = delegate_options.model_token;
    if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
        model_token) {
      // Compilation caching could be enabled, try construct the uint8
      // token.
      // TODO(133342794): use a generic token generator class.
      uint64_t token_parts[4];
      // bits from model_token.
      token_parts[0] = std::hash<std::string>{}(model_token);
      // bits from params->nodes_to_replace.
      token_parts[1] = GetHash(params->nodes_to_replace);
      // bits from params->input_tensors.
      token_parts[2] = GetHash(params->input_tensors);
      // bits from params->output_tensors.
      token_parts[3] = GetHash(params->output_tensors);
      // NNAPI requires the token to be 256bit long.
      std::vector<uint8_t> nnapi_cache_token(32, 0);
      // Copy the token bits.
      uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
      for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
        nnapi_cache_token[i] = p[i];
      }
      const int set_caching_result =
          nnapi_->ANeuralNetworksCompilation_setCaching(
              compilation, cache_dir, nnapi_cache_token.data());
      if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
        nnapi_->ANeuralNetworksCompilation_free(compilation);
        compilation = nullptr;
      }
      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
                                      "configuring NNAPI caching", nnapi_errno);
    }
    const int finish_result =
        nnapi_->ANeuralNetworksCompilation_finish(compilation);
    if (finish_result != ANEURALNETWORKS_NO_ERROR) {
      nnapi_->ANeuralNetworksCompilation_free(compilation);
      compilation = nullptr;
    }
    RETURN_TFLITE_ERROR_IF_NN_ERROR(
        context, finish_result, "completing NNAPI compilation", nnapi_errno);
    nn_compilation_.reset(compilation);
  }
  initialised_ = true;
  return kTfLiteOk;
 }
 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
                                          TfLiteNode* node, int* nnapi_errno) {
-  if (!nn_compilation_) {
+  if (!initialised_) {
    // Compilation failed earlier, return error.
    return kTfLiteError;
  }
  if (nn_compilation_) {
    return kTfLiteOk;
  }
  const auto delegate_options =
      StatefulNnApiDelegate::GetOptions(node->delegate);
  ANeuralNetworksCompilation* compilation = nullptr;
  if (!nnapi_devices_.empty()) {
    // Compile for the selected accelerator.
    RETURN_TFLITE_ERROR_IF_NN_ERROR(
        context,
        nnapi_->ANeuralNetworksCompilation_createForDevices(
            nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
            &compilation),
        "creating NNAPI model for given devices", nnapi_errno);
  } else {
    RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
                                    nnapi_->ANeuralNetworksCompilation_create(
                                        nn_model_.get(), &compilation),
                                    "creating NNAPI compilation", nnapi_errno);
  }
  auto preference = delegate_options.execution_preference;
  if (preference !=
      StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
    const int preference_result =
        nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
                                                         preference);
    if (preference_result != ANEURALNETWORKS_NO_ERROR) {
      nnapi_->ANeuralNetworksCompilation_free(compilation);
      compilation = nullptr;
    }
    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
                                    "setting compilation preferences",
                                    nnapi_errno);
  }
  if (!nn_compilation_cache_token_.empty()) {
    const char* cache_dir = delegate_options.cache_dir;
    const int set_caching_result =
        nnapi_->ANeuralNetworksCompilation_setCaching(
            compilation, cache_dir, nn_compilation_cache_token_.data());
    if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
      nnapi_->ANeuralNetworksCompilation_free(compilation);
      compilation = nullptr;
    }
    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
                                    "configuring NNAPI caching", nnapi_errno);
  }
  const int finish_result =
      nnapi_->ANeuralNetworksCompilation_finish(compilation);
  if (finish_result != ANEURALNETWORKS_NO_ERROR) {
    nnapi_->ANeuralNetworksCompilation_free(compilation);
    compilation = nullptr;
  }
  RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
                                  "completing NNAPI compilation", nnapi_errno);
  nn_compilation_.reset(compilation);
  return kTfLiteOk;
 }
 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
    TfLiteContext* context, std::vector<int>* supported_nodes,
    int* nnapi_errno) {
  if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
    return kTfLiteError;
  }
  // Determine the list of operations the device actually supports
  auto support_flags = absl::make_unique<bool[]>(nodes_.size());
  RETURN_TFLITE_ERROR_IF_NN_ERROR(
      context,
      nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
          nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
          support_flags.get()),
      "Checking supported operations for devices", nnapi_errno);
  supported_nodes->clear();
  for (int i = 0; i < nodes_.size(); i++) {
    if (support_flags[i]) {
      supported_nodes->push_back(nodes_[i]);
    }
  }
  return kTfLiteOk;
 }
@ -3768,6 +3812,35 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
 StatefulNnApiDelegate::Data::~Data() {
  std::for_each(std::begin(delegate_state_cache),
                std::end(delegate_state_cache),
                [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
                  delete entry.second;
                });
 }
 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
    const TfLiteDelegateParams* delegate_params,
    NNAPIDelegateKernel* delegate_state) {
  const int cache_key = delegate_params->nodes_to_replace->data[0];
  delegate_state_cache.emplace(cache_key, delegate_state);
 }
 absl::optional<NNAPIDelegateKernel*>
 StatefulNnApiDelegate::Data::GetCachedDelegateKernel(
    const TfLiteDelegateParams* delegate_params) {
  const int cache_key = delegate_params->nodes_to_replace->data[0];
  const auto cached_state = delegate_state_cache.find(cache_key);
  if (cached_state != std::end(delegate_state_cache)) {
    auto result = absl::optional<NNAPIDelegateKernel*>(cached_state->second);
    delegate_state_cache.erase(cached_state);
    return result;
  } else {
    return absl::nullopt;
  }
 }
 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
    : TfLiteDelegate(TfLiteDelegateCreate()),
      delegate_data_(
@ -3877,7 +3950,8 @@ using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                                              TfLiteDelegate* delegate) {
-  int* nnapi_errno = &(static_cast<Data*>(delegate->data_)->nnapi_errno);
+  auto* delegate_data = static_cast<Data*>(delegate->data_);
  int* nnapi_errno = &(delegate_data->nnapi_errno);
  // Resetting the error code when the delegate is initialized
  // by TFLite. This causes the error to be reset if reusing the same
@ -3892,17 +3966,19 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  }
  int target_sdk_version = nnapi->android_sdk_version;
  const StatefulNnApiDelegate::Options delegate_options =
      StatefulNnApiDelegate::GetOptions(delegate);
  // For NNAPI 1.2+, check if there is any accelerator available.
  // If not, don't delegate to NNAPI's CPU reference implementation unless
  // it has been specified as target accelerator.
  if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
-    if (ShouldUseTargetDevices(delegate)) {
+    if (ShouldUseTargetDevices(delegate_options)) {
      std::vector<ANeuralNetworksDevice*> devices;
      TF_LITE_ENSURE_STATUS(
          GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
      if (devices.empty()) {
-        if (StatefulNnApiDelegate::GetOptions(delegate).accelerator_name) {
+        if (delegate_options.accelerator_name) {
          // There was a selected device and it is not available.
          return kTfLiteError;
        } else {
@ -3937,13 +4013,13 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
  // Check for every node if it is supported
  const bool is_accelerator_specified = ShouldUseTargetDevices(
      delegate_options, /*exclude_nnapi_reference=*/true);
  for (int node_index : TfLiteIntArrayView(plan)) {
    TfLiteNode* node;
    TfLiteRegistration* registration;
    TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
        context, node_index, &node, &registration));
    const bool is_accelerator_specified =
        ShouldUseTargetDevices(delegate, /*exclude_nnapi_reference=*/true);
    if (NNAPIDelegateKernel::Validate(context, registration->builtin_code,
                                      registration->version, target_sdk_version,
                                      node, is_accelerator_specified)) {
@ -3965,10 +4041,21 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                 size_t length) -> void* {
        const TfLiteDelegateParams* params =
            reinterpret_cast<const TfLiteDelegateParams*>(buffer);
-        int* nnapi_errno =
+
-            &(static_cast<Data*>(params->delegate->data_)->nnapi_errno);
+        auto* delegate_data = static_cast<Data*>(params->delegate->data_);
-        NNAPIDelegateKernel* kernel_state = new NNAPIDelegateKernel;
+        int* nnapi_errno = &(delegate_data->nnapi_errno);
-        kernel_state->Init(context, params, nnapi_errno);
+
        auto delegate_state_maybe =
            delegate_data->GetCachedDelegateKernel(params);
        NNAPIDelegateKernel* kernel_state;
        if (delegate_state_maybe.has_value()) {
          kernel_state = *delegate_state_maybe;
        } else {
          kernel_state = new NNAPIDelegateKernel;
          kernel_state->Init(context, params, nnapi_errno);
        }
        return kernel_state;
      },
@ -3998,11 +4085,55 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
      .version = 1,
  };
-  // Request TFLite to partition the graph and make kernels
+  std::vector<int>& nodes_to_delegate = supported_nodes;
-  // for each independent node sub set a new nnapi_delegate_kernel.
+  if (is_accelerator_specified) {
-  return context->ReplaceNodeSubsetsWithDelegateKernels(
+    TfLiteDelegateParams* params_array;
-      context, nnapi_delegate_kernel,
+    int num_partitions = 0;
-      reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()), delegate);
+    // The first entry in the array is the element count
    std::vector<int> device_supported_nodes(1);
    TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
        context, reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
        &params_array, &num_partitions));
    // For each partition check if which nodes are actually supported by the
    // target accelerators.
    delegate_data->delegate_state_cache.clear();
    for (int idx = 0; idx < num_partitions; idx++) {
      const auto& partition_params = params_array[idx];
      auto kernel_state = absl::make_unique<NNAPIDelegateKernel>();
      TfLiteDelegateParams params_with_delegate = partition_params;
      params_with_delegate.delegate = delegate;
      TF_LITE_ENSURE_STATUS(
          kernel_state->Init(context, &params_with_delegate, nnapi_errno));
      std::vector<int> supported_partition_nodes;
      TF_LITE_ENSURE_STATUS(
          kernel_state->GetOperationsSupportedByTargetNnApiDevices(
              context, &supported_partition_nodes, nnapi_errno));
      device_supported_nodes.insert(device_supported_nodes.end(),
                                    supported_partition_nodes.begin(),
                                    supported_partition_nodes.end());
      bool model_fully_supported = (supported_partition_nodes.size() ==
                                    partition_params.nodes_to_replace->size);
      if (model_fully_supported) {
        delegate_data->CacheDelegateKernel(&partition_params,
                                           kernel_state.release());
      }
    }
    device_supported_nodes[0] = device_supported_nodes.size() - 1;
    nodes_to_delegate = device_supported_nodes;
  }
  if (nodes_to_delegate.empty()) {
    return kTfLiteOk;
  } else {
    // Request TFLite to partition the graph and make kernels
    // for each independent node sub set a new nnapi_delegate_kernel.
    return context->ReplaceNodeSubsetsWithDelegateKernels(
        context, nnapi_delegate_kernel,
        reinterpret_cast<TfLiteIntArray*>(nodes_to_delegate.data()), delegate);
  }
 }
 // Returns a singleton NNAPI Delegate that can check for support of ops.
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@ -17,14 +17,22 @@ limitations under the License.
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "absl/types/optional.h"
 #include "tensorflow/lite/c/common.h"
 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
 namespace tflite {
 namespace delegate::nnapi {
 class NNAPIDelegateKernel;
 }  // namespace delegate::nnapi
 using tflite::delegate::nnapi::NNAPIDelegateKernel;
 // TFliteDelegate to interface with NNAPI.
 class StatefulNnApiDelegate : public TfLiteDelegate {
 public:
@ -144,6 +152,21 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
    // Constains a non zero value if any NNAPI method call
    // operation returned a non zero result code.
    int nnapi_errno;
    // Cache of kernels already built in StatefulNnApiDelegate::DoPrepare
    // when trying to understand if all nodes are supported by the target
    // accelerators.
    // The key is the index of the first node in the partition.
    // Couldn't use unique_ptr because of problems building on gcc
    std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
    ~Data();
    // Caches an initialised NNAPIDelegateKernel.
    void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
                             NNAPIDelegateKernel* delegate_state);
    // Returns a cached NNAPIDelegateKernel if available.
    absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
        const TfLiteDelegateParams* delegate_params);
  };
  // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/test_util.h"
 #include "tensorflow/lite/minimal_logging.h"
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
@ -86,7 +87,7 @@ struct NnApiDeviceSelectionTest
    ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
    nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
      *numDevices = 3;
-      return 0;
+      return ANEURALNETWORKS_NO_ERROR;
    };
    nnapi_->ANeuralNetworks_getDevice =
        [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
@ -102,8 +103,15 @@ struct NnApiDeviceSelectionTest
      } else {
        *name = "nnapi-reference";
      }
-      return 0;
+      return ANEURALNETWORKS_NO_ERROR;
    };
    nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
        [](const ANeuralNetworksModel* model,
           const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
           bool* supportedOps) -> int {
          supportedOps[0] = true;
          return ANEURALNETWORKS_NO_ERROR;
        });
  }
  void InitWithOptions(tflite::StatefulNnApiDelegate::Options options) {
    m.Init(options, {TensorType_FLOAT32, {1, 2, 2, 1}},
@ -116,13 +124,13 @@ struct NnApiDeviceSelectionTest
 };
 TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWithoutFlags) {
-  nnapi_->ANeuralNetworksCompilation_createForDevices =
+  nnapi_mock_->StubCompilationCreateForDevicesWith(
      [](ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
         ANeuralNetworksCompilation** compilation) -> int {
-    EXPECT_TRUE(false) << "Should not call createForDevices";
+        EXPECT_TRUE(false) << "Should not call createForDevices";
-    return 1;
+        return 1;
-  };
+      });
  tflite::StatefulNnApiDelegate::Options options;
  InitWithOptions(options);
@ -132,20 +140,20 @@ TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWithoutFlags) {
 TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
  nnapi_mock_->CompilationCreateReturns<1>();
-  nnapi_->ANeuralNetworksCompilation_createForDevices =
+  nnapi_mock_->StubCompilationCreateForDevicesWith(
      [](ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
         ANeuralNetworksCompilation** compilation) -> int {
-    EXPECT_EQ(numDevices, 1);
+        EXPECT_EQ(numDevices, 1);
-    EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
+        EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
-    if (numDevices != 1 ||
+        if (numDevices != 1 ||
-        devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
+            devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
-      return 1;
+          return 1;
-    } else {
+        } else {
-      *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
+          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
-      return 0;
+          return ANEURALNETWORKS_NO_ERROR;
-    }
+        }
-  };
+      });
  tflite::StatefulNnApiDelegate::Options options;
  options.accelerator_name = "dsp";
@ -156,22 +164,22 @@ TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
 TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
  nnapi_mock_->CompilationCreateReturns<1>();
-  nnapi_->ANeuralNetworksCompilation_createForDevices =
+  nnapi_mock_->StubCompilationCreateForDevicesWith(
      [](ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
         ANeuralNetworksCompilation** compilation) -> int {
-    EXPECT_EQ(numDevices, 2);
+        EXPECT_EQ(numDevices, 2);
-    EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
+        EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
-    EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
+        EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
-    if (numDevices != 2 ||
+        if (numDevices != 2 ||
-        devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
+            devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
-        devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
+            devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
-      return 1;
+          return 1;
-    } else {
+        } else {
-      *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
+          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
-      return 0;
+          return ANEURALNETWORKS_NO_ERROR;
-    }
+        }
-  };
+      });
  tflite::StatefulNnApiDelegate::Options options;
  options.disallow_nnapi_cpu = true;
@ -185,14 +193,14 @@ TEST_F(NnApiDeviceSelectionTest,
  // Only nnapi-reference is available on device
  nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
    *numDevices = 1;
-    return 0;
+    return ANEURALNETWORKS_NO_ERROR;
  };
  nnapi_->ANeuralNetworksDevice_getName =
      [](const ANeuralNetworksDevice* device, const char** name) -> int {
    if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
      *name = "nnapi-reference";
    }
-    return 0;
+    return ANEURALNETWORKS_NO_ERROR;
  };
  tflite::StatefulNnApiDelegate::Options options;
@ -208,14 +216,14 @@ TEST_F(NnApiDeviceSelectionTest,
  // Only nnapi-reference is available on device
  nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
    *numDevices = 1;
-    return 0;
+    return ANEURALNETWORKS_NO_ERROR;
  };
  nnapi_->ANeuralNetworksDevice_getName =
      [](const ANeuralNetworksDevice* device, const char** name) -> int {
    if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
      *name = "nnapi-reference";
    }
-    return 0;
+    return ANEURALNETWORKS_NO_ERROR;
  };
  tflite::StatefulNnApiDelegate::Options options;
@ -349,6 +357,172 @@ TEST_F(UnsupportedOperationOnDeviceTest,
      << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
 }
 // This is a model with two ops:
 //
 //  input1 ---->
 //                ADD --
 //  input2   -->        |
 //                       -->
 //                          SUB --> output
 //  input3 ---------------->
 //
 class AddSubOpsAcceleratedModel : public MultiOpModel, public AcceleratedModel {
 public:
  AddSubOpsAcceleratedModel(const TensorData& input1, const TensorData& input2,
                            const TensorData& input3, const TensorData& output,
                            ActivationFunctionType activation_type,
                            const std::string& accelerator_name,
                            bool allow_fp32_relax_to_fp16 = false)
      : MultiOpModel(), AcceleratedModel(accelerator_name) {
    auto* delegate = GetDelegate();
    this->SetApplyDelegate([delegate](Interpreter* interpreter) {
      interpreter->ModifyGraphWithDelegate(delegate);
    });
    Init(input1, input2, input3, output, activation_type,
         allow_fp32_relax_to_fp16);
  }
  int input1() { return input1_; }
  int input2() { return input2_; }
  int input3() { return input3_; }
  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 protected:
  int input1_;
  int input2_;
  int input3_;
  int output_;
 private:
  // Performs initialization logic shared across all constructors.
  void Init(const TensorData& input1, const TensorData& input2,
            const TensorData& input3, const TensorData& output,
            ActivationFunctionType activation_type,
            bool allow_fp32_relax_to_fp16 = false) {
    input1_ = AddInput(input1);
    input2_ = AddInput(input2);
    input3_ = AddInput(input3);
    const int add_output = AddInnerTensor<float>(output);
    output_ = AddOutput(output);
    AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
                 CreateAddOptions(builder_, activation_type).Union(),
                 {input1_, input2_}, {add_output});
    AddBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions,
                 CreateSubOptions(builder_, activation_type).Union(),
                 {add_output, input3_}, {output_});
    BuildInterpreter({GetShape(input1_), GetShape(input2_), GetShape(input3_)},
                     allow_fp32_relax_to_fp16);
  }
 };
 int should_build_model_with_sup_ops_compilation_model_create_count = 0;
 int should_build_model_with_sup_ops_add_operation_count = 0;
 TEST_F(UnsupportedOperationOnDeviceTest,
       ShouldBuildModelWithOnlyDeviceSupportedOps) {
  nnapi_mock_->SetNnapiSupportedDevice("test-device");
  nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
      [](const ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
         bool* supportedOps) -> int {
        // Returning the first as supported since this will leverage
        // the assertion on caching.
        supportedOps[0] = true;
        supportedOps[1] = false;
        return ANEURALNETWORKS_NO_ERROR;
      });
  nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
    ++should_build_model_with_sup_ops_compilation_model_create_count;
    *model = reinterpret_cast<ANeuralNetworksModel*>(1);
    return ANEURALNETWORKS_NO_ERROR;
  });
  nnapi_mock_->StubAddOperationWith(
      [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
         uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
         const uint32_t* outputs) -> int {
        ++should_build_model_with_sup_ops_add_operation_count;
        return ANEURALNETWORKS_NO_ERROR;
      });
  AddSubOpsAcceleratedModel m(
      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
      ActivationFunctionType_NONE, /*accelerator_name=*/"test-device");
  std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
  std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
  m.PopulateTensor<float>(m.input1(), input1);
  m.PopulateTensor<float>(m.input2(), input2);
  m.PopulateTensor<float>(m.input3(), input2);
  m.Invoke();
  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
  ASSERT_EQ(should_build_model_with_sup_ops_compilation_model_create_count, 2)
      << "Model with unsupported operations has been cached";
  EXPECT_EQ(should_build_model_with_sup_ops_add_operation_count, 3)
      << "The second model should contain only one operation";
 }
 TEST_F(UnsupportedOperationOnDeviceTest, ShouldRunOnCpuIfDeviceSupportsNoOps) {
  nnapi_mock_->SetNnapiSupportedDevice("test-device");
  nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
      [](const ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
         bool* supportedOps) -> int {
        std::fill(supportedOps, supportedOps + 2, false);
        return ANEURALNETWORKS_NO_ERROR;
      });
  AddSubOpsAcceleratedModel m(
      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
      ActivationFunctionType_NONE, /*accelerator_name=*/"test-device");
  std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
  std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
  m.PopulateTensor<float>(m.input1(), input1);
  m.PopulateTensor<float>(m.input2(), input2);
  m.PopulateTensor<float>(m.input3(), input2);
  m.Invoke();
  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 2);
 }
 int should_cache_model_compilation_model_create_count = 0;
 TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
  nnapi_mock_->SetNnapiSupportedDevice("test-device");
  nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
      [](const ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
         bool* supportedOps) -> int {
        std::fill(supportedOps, supportedOps + 2, true);
        return ANEURALNETWORKS_NO_ERROR;
      });
  nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
    ++should_cache_model_compilation_model_create_count;
    *model = reinterpret_cast<ANeuralNetworksModel*>(1);
    return ANEURALNETWORKS_NO_ERROR;
  });
  AddSubOpsAcceleratedModel m(
      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
      ActivationFunctionType_NONE, /*accelerator_name=*/"test-device");
  std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
  std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
  m.PopulateTensor<float>(m.input1(), input1);
  m.PopulateTensor<float>(m.input2(), input2);
  m.PopulateTensor<float>(m.input3(), input2);
  m.Invoke();
  ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
  EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
 }
 }  // namespace
 }  // namespace tflite
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_disabled.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_disabled.cc
@ -44,4 +44,18 @@ TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
 int StatefulNnApiDelegate::GetNnApiErrno() const { return 0; }
 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
 StatefulNnApiDelegate::Data::~Data() {}
 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
    const TfLiteDelegateParams* delegate_params,
    NNAPIDelegateKernel* delegate_state) {}
 absl::optional<NNAPIDelegateKernel*>
 StatefulNnApiDelegate::Data::GetCachedDelegateKernel(
    const TfLiteDelegateParams* delegate_params) {
  return absl::nullopt;
 }
 }  // namespace tflite
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@ -224,7 +224,8 @@ struct NNAPIValidationFailure {
 class NNAPIDelegateKernel {
 public:
  explicit NNAPIDelegateKernel(const NnApi* nnapi)
-      : nnapi_(nnapi),
+      : initialised_(false),
        nnapi_(nnapi),
        nn_model_(nullptr, NNFreeModel(nnapi_)),
        nn_compilation_(nullptr, NNFreeCompilation(nnapi_)) {}
  NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
@ -255,23 +256,41 @@ class NNAPIDelegateKernel {
      // the given node
      std::vector<NNAPIValidationFailure>* map_failures = nullptr);
-  // Initialize the kernel (a NN model).
+  // Initialize the kernel (a NN model) and builds the NN Model.
  // Any NNAPI Related error causing this method to fail will have the
  // associated error number stored in nnapi_errno
  TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
                    int* nnapi_errno);
  // Creates the NNAPI Compilation for the NN model. It assumes that Init has
  // been called and completed successfully.
  // Any NNAPI Related error causing this method to fail will have the
  // associated error number stored in nnapi_errno
  TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
                       int* nnapi_errno);
  // Invoke the NN Model. Expects Init and Prepare to have been completed
  // successfully.
  // Any NNAPI Related error causing this method to fail will have the
  // associated error number stored in nnapi_errno
  TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
                      int* nnapi_errno);
  // Returns the list of operations supported by the current NNAPI model as
  // built in Prepare. Every operation is identified by the index as provided
  // in the delegate parameters given to the delegate during the Init call.
  // It expects the Init method has been called and completed successfully and
  // returns kTfLiteError if not. Returns an error if any of the NNAPI
  // operations fails or if the
  // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
  // available in the NnApi object.
  TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
      TfLiteContext* context, std::vector<int>* supported_nodes,
      int* nnapi_errno);
 private:
  // True if initialization has been completed successfully
  bool initialised_;
  // Access to NNApi.
  const NnApi* nnapi_;
  // ANN device handle.
@ -302,6 +321,8 @@ class NNAPIDelegateKernel {
  std::unique_ptr<NNMemory> nn_input_memory_;
  std::unique_ptr<NNMemory> nn_output_memory_;
  std::vector<uint8_t> nn_compilation_cache_token_;
  void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
                                         int builtin_code,
                                         const TfLiteNode* node,
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
@ -18,7 +18,6 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/test_util.h"
 #include "tensorflow/lite/minimal_logging.h"
--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@ -344,4 +344,32 @@ int SingleOpModel::CountOpsExecutedByCpuKernel() {
 SingleOpModel::~SingleOpModel() { ValidateAcceleration(); }
 void MultiOpModel::AddBuiltinOp(
    BuiltinOperator type, BuiltinOptions builtin_options_type,
    const flatbuffers::Offset<void>& builtin_options,
    const std::vector<int32_t>& inputs, const std::vector<int32_t>& outputs) {
  opcodes_.push_back(CreateOperatorCode(builder_, type, 0));
  const int opcode_index = opcodes_.size() - 1;
  operators_.push_back(CreateOperator(
      builder_, opcode_index, builder_.CreateVector<int32_t>(inputs),
      builder_.CreateVector<int32_t>(outputs), builtin_options_type,
      builtin_options,
      /*custom_options=*/0, CustomOptionsFormat_FLEXBUFFERS));
 }
 void MultiOpModel::AddCustomOp(
    const string& name, const std::vector<uint8_t>& custom_option,
    const std::function<TfLiteRegistration*()>& registration,
    const std::vector<int32_t>& inputs, const std::vector<int32_t>& outputs) {
  custom_registrations_[name] = registration;
  opcodes_.push_back(
      CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data()));
  const int opcode_index = opcodes_.size() - 1;
  operators_.push_back(CreateOperator(
      builder_, opcode_index, builder_.CreateVector<int32_t>(inputs),
      builder_.CreateVector<int32_t>(outputs), BuiltinOptions_NONE, 0,
      builder_.CreateVector<uint8_t>(custom_option),
      CustomOptionsFormat_FLEXBUFFERS));
 }
 }  // namespace tflite
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@ -423,6 +423,74 @@ class SingleOpModel {
  std::unique_ptr<tflite::Interpreter> interpreter_;
  std::unique_ptr<OpResolver> resolver_;
  std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
  std::vector<flatbuffers::Offset<Operator>> operators_;
  std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
  template <typename T>
  int AddTensor(TensorData t, std::initializer_list<T> data,
                bool is_variable = false) {
    int id = tensors_.size();
    // This is slightly different depending on whether we are adding a
    // quantized or a regular tensor.
    bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
    flatbuffers::Offset<QuantizationParameters> q_params = 0;
    if (is_quantized) {
      if (t.min != 0 || t.max != 0) {
        if (t.type == TensorType_UINT8) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<uint8_t>(t.min, t.max);
        } else if (t.type == TensorType_INT8) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<int8_t>(t.min, t.max);
        } else if (t.type == TensorType_INT32) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<int32_t>(t.min, t.max);
        } else if (t.type == TensorType_INT16) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<int16_t>(t.min, t.max);
        } else {
          LOG(FATAL) << "No support for the requested quantized type";
        }
        t.min = 0;
        t.max = 0;
      }
      q_params = CreateQuantizationParameters(
          builder_, /*min=*/0, /*max=*/0,
          builder_.CreateVector<float>({t.scale}),
          builder_.CreateVector<int64_t>({t.zero_point}));
    }
    int buffer_id = 0;
    if (data.size()) {
      // Initialize buffers list with empty buffer to allow for non-const
      // tensors.
      if (buffers_.empty()) {
        buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
      }
      // Add data as a Buffer to buffers list.
      buffer_id = buffers_.size();
      auto data_buffer =
          builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
                                sizeof(T) * data.size());
      buffers_.push_back(CreateBuffer(builder_, data_buffer));
    }
    tensors_.push_back(CreateTensor(builder_,
                                    builder_.CreateVector<int>(t.shape), t.type,
                                    /*buffer=*/buffer_id,
                                    /*name=*/0, q_params, is_variable));
    tensor_data_[id] = t;
    return id;
  }
 private:
  template <typename T>
  std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
@ -601,70 +669,6 @@ class SingleOpModel {
    return id;
  }
  template <typename T>
  int AddTensor(TensorData t, std::initializer_list<T> data,
                bool is_variable = false) {
    int id = tensors_.size();
    // This is slightly different depending on whether we are adding a
    // quantized or a regular tensor.
    bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
    flatbuffers::Offset<QuantizationParameters> q_params = 0;
    if (is_quantized) {
      if (t.min != 0 || t.max != 0) {
        if (t.type == TensorType_UINT8) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<uint8_t>(t.min, t.max);
        } else if (t.type == TensorType_INT8) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<int8_t>(t.min, t.max);
        } else if (t.type == TensorType_INT32) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<int32_t>(t.min, t.max);
        } else if (t.type == TensorType_INT16) {
          std::tie(t.scale, t.zero_point) =
              QuantizationParams<int16_t>(t.min, t.max);
        } else {
          LOG(FATAL) << "No support for the requested quantized type";
        }
        t.min = 0;
        t.max = 0;
      }
      q_params = CreateQuantizationParameters(
          builder_, /*min=*/0, /*max=*/0,
          builder_.CreateVector<float>({t.scale}),
          builder_.CreateVector<int64_t>({t.zero_point}));
    }
    int buffer_id = 0;
    if (data.size()) {
      // Initialize buffers list with empty buffer to allow for non-const
      // tensors.
      if (buffers_.empty()) {
        buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
      }
      // Add data as a Buffer to buffers list.
      buffer_id = buffers_.size();
      auto data_buffer =
          builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
                                sizeof(T) * data.size());
      buffers_.push_back(CreateBuffer(builder_, data_buffer));
    }
    tensors_.push_back(CreateTensor(builder_,
                                    builder_.CreateVector<int>(t.shape), t.type,
                                    /*buffer=*/buffer_id,
                                    /*name=*/0, q_params, is_variable));
    tensor_data_[id] = t;
    return id;
  }
  std::vector<int8_t> QuantizeTensor(int index,
                                     const std::vector<float>& data) {
    TfLiteTensor* t = interpreter_->tensor(index);
@ -723,10 +727,7 @@ class SingleOpModel {
  std::vector<int32_t> intermediates_;
  std::vector<int32_t> outputs_;
  std::vector<flatbuffers::Offset<Tensor>> tensors_;
  std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
  std::vector<flatbuffers::Offset<Operator>> operators_;
  std::vector<flatbuffers::Offset<Buffer>> buffers_;
  std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
  // A function pointer that gets called after the interpreter is created but
  // before evaluation happens. This is useful for applying a delegate.
  std::function<void(Interpreter*)> apply_delegate_fn_;
@ -837,6 +838,28 @@ struct TypeUnion<uint8_t> {
  typedef uint8_t ScalarType;
 };
 class MultiOpModel : public SingleOpModel {
 public:
  MultiOpModel() : SingleOpModel() {}
  ~MultiOpModel() {}
  void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
                    const flatbuffers::Offset<void>& builtin_options,
                    const std::vector<int32_t>& inputs,
                    const std::vector<int32_t>& outputs);
  void AddCustomOp(const string& name,
                   const std::vector<uint8_t>& custom_option,
                   const std::function<TfLiteRegistration*()>& registration,
                   const std::vector<int32_t>& inputs,
                   const std::vector<int32_t>& outputs);
  template <typename T>
  int AddInnerTensor(TensorData t) {
    return AddTensor<T>(t, {}, false);
  }
 };
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
--- a/tensorflow/lite/nnapi/nnapi_handler.h
+++ b/tensorflow/lite/nnapi/nnapi_handler.h
@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_NNAPI_NNAPI_HANDLER_H_
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
 namespace tflite {
@ -97,6 +98,10 @@ class NnApiHandler {
    };
  }
  void StubModelCreateWith(int(stub)(ANeuralNetworksModel** model)) {
    nnapi_->ANeuralNetworksModel_create = stub;
  }
  template <int Value>
  void AddOperandReturns() {
    nnapi_->ANeuralNetworksModel_addOperand =
@ -119,6 +124,13 @@ class NnApiHandler {
           const uint32_t* outputs) { return Value; };
  }
  void StubAddOperationWith(
      int(stub)(ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
                uint32_t inputCount, const uint32_t* inputs,
                uint32_t outputCount, const uint32_t* outputs)) {
    nnapi_->ANeuralNetworksModel_addOperation = stub;
  }
  template <int Value>
  void IdentifyInputAndOutputsReturns() {
    nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs =
@ -171,6 +183,12 @@ class NnApiHandler {
        };
  }
  void StubCompilationCreateForDevicesWith(int(stub)(
      ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices,
      uint32_t numDevices, ANeuralNetworksCompilation** compilation)) {
    nnapi_->ANeuralNetworksCompilation_createForDevices = stub;
  }
  template <int Value>
  void CompilationFinishReturns() {
    nnapi_->ANeuralNetworksCompilation_finish =
@ -210,6 +228,21 @@ class NnApiHandler {
        [](ANeuralNetworksExecution* execution) { return Value; };
  }
  template <int Value>
  void GetSupportedOperationsForDevicesReturns() {
    nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices =
        [](const ANeuralNetworksModel* model,
           const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
           bool* supportedOps) { return Value; };
  }
  void StubGetSupportedOperationsForDevicesWith(
      int(stub)(const ANeuralNetworksModel* model,
                const ANeuralNetworksDevice* const* devices,
                uint32_t numDevices, bool* supportedOps)) {
    nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices = stub;
  }
  void SetAndroidSdkVersion(int version);
 protected: