From 73675deef7df2575117214f45c575aed206808f5 Mon Sep 17 00:00:00 2001
From: Stefano Galarraga <galarragas@google.com>
Date: Tue, 28 Jan 2020 11:34:53 -0800
Subject: [PATCH] Limit delegated ops to actually supported ones if a device
 name is specified or NNAPI CPU Fallback is disabled.

PiperOrigin-RevId: 291979150
Change-Id: I380d4a879e4ae009cafe41925334608206257e9c
---
 tensorflow/lite/delegates/nnapi/BUILD         |   5 -
 .../lite/delegates/nnapi/nnapi_delegate.cc    | 315 +++++-------------
 .../lite/delegates/nnapi/nnapi_delegate.h     |  23 --
 .../nnapi_delegate_device_selection_test.cc   | 242 ++------------
 .../nnapi/nnapi_delegate_disabled.cc          |  14 -
 .../delegates/nnapi/nnapi_delegate_kernel.h   |  25 +-
 .../delegates/nnapi/nnapi_delegate_test.cc    |   1 +
 tensorflow/lite/kernels/test_util.cc          |  28 --
 tensorflow/lite/kernels/test_util.h           | 157 ++++-----
 tensorflow/lite/nnapi/nnapi_handler.h         |  33 --
 10 files changed, 196 insertions(+), 647 deletions(-)
diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD
index ee47ad0e24d..3953c73f263 100644
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@@ -36,9 +36,6 @@ cc_library(
         "//tensorflow/lite/nnapi:nnapi_implementation",
         "//tensorflow/lite/nnapi:nnapi_lib",
         "//tensorflow/lite/nnapi:nnapi_util",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/types:optional",
     ],
 )
 
@@ -71,8 +68,6 @@ cc_library(
         "//tensorflow/lite/kernels:kernel_util",
         "//tensorflow/lite/nnapi:nnapi_implementation",
         "//tensorflow/lite/nnapi:nnapi_util",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/types:optional",
     ],
 )
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index 6889dcce27b..017986376bd 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include <memory>
 #include <string>
 #include <tuple>
-#include <utility>
 #include <vector>
 
 #ifdef __ANDROID__
@@ -39,8 +38,6 @@ limitations under the License.
 #include <unistd.h>
 #endif
 
-#include "absl/memory/memory.h"
-#include "absl/types/optional.h"
 #include "tensorflow/lite/allocation.h"
 #include "tensorflow/lite/builtin_op_data.h"
 #include "tensorflow/lite/builtin_ops.h"
@@ -406,8 +403,9 @@ TfLiteStatus GetTargetSdkVersion(
 // - NNAPI CPU implementation has been explicitly disabled.
 // If exclude_nnapi_reference is true this method will return false if the
 // accelerator_name in the delegate options is equal to "nnapi-reference"
-bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
+bool ShouldUseTargetDevices(TfLiteDelegate* delegate,
                             bool exclude_nnapi_reference = false) {
+  const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
   const char* device_name_ptr = delegate_options.accelerator_name;
   std::string nnapi_cpu("nnapi-reference");
   bool has_selected_accelerator = device_name_ptr != nullptr;
@@ -3048,7 +3046,7 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
   const auto delegate_options =
       StatefulNnApiDelegate::GetOptions(params->delegate);
   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
-      ShouldUseTargetDevices(delegate_options)) {
+      ShouldUseTargetDevices(params->delegate)) {
     TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
                                            nnapi_errno, &nnapi_devices_));
 
@@ -3074,133 +3072,91 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
                                      params->output_tensors, nnapi_errno));
   }
 
-  // Calculating model compilation cache here since the value depends on
-  // some of the TfLiteDelegateParams
-  nn_compilation_cache_token_.clear();
-  const char* cache_dir = delegate_options.cache_dir;
-  const char* model_token = delegate_options.model_token;
-  if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
-      model_token) {
-    // Compilation caching could be enabled, try construct the uint8
-    // token.
-    // TODO(b/133342794): use a generic token generator class.
-    uint64_t token_parts[4];
-    // bits from model_token.
-    token_parts[0] = std::hash<std::string>{}(model_token);
-    // bits from params->nodes_to_replace.
-    token_parts[1] = GetHash(params->nodes_to_replace);
-    // bits from params->input_tensors.
-    token_parts[2] = GetHash(params->input_tensors);
-    // bits from params->output_tensors.
-    token_parts[3] = GetHash(params->output_tensors);
-    // NNAPI requires the token to be 256bit long.
-    std::vector<uint8_t> nnapi_cache_token(32, 0);
-    // Copy the token bits.
-    uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
-    for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
-      nnapi_cache_token[i] = p[i];
+  if (!nn_compilation_) {
+    ANeuralNetworksCompilation* compilation = nullptr;
+    if (!nnapi_devices_.empty()) {
+      // Compile for the selected accelerator.
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(
+          context,
+          nnapi_->ANeuralNetworksCompilation_createForDevices(
+              nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
+              &compilation),
+          "creating NNAPI model for given devices", nnapi_errno);
+    } else {
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
+                                      nnapi_->ANeuralNetworksCompilation_create(
+                                          nn_model_.get(), &compilation),
+                                      "creating NNAPI compilation",
+                                      nnapi_errno);
     }
 
-    nn_compilation_cache_token_ = nnapi_cache_token;
+    auto preference = delegate_options.execution_preference;
+    if (preference !=
+        StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
+      const int preference_result =
+          nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
+                                                           preference);
+      if (preference_result != ANEURALNETWORKS_NO_ERROR) {
+        nnapi_->ANeuralNetworksCompilation_free(compilation);
+        compilation = nullptr;
+      }
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
+                                      "setting compilation preferences",
+                                      nnapi_errno);
+    }
+
+    const char* cache_dir = delegate_options.cache_dir;
+    const char* model_token = delegate_options.model_token;
+    if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
+        model_token) {
+      // Compilation caching could be enabled, try construct the uint8
+      // token.
+      // TODO(b/133342794): use a generic token generator class.
+      uint64_t token_parts[4];
+      // bits from model_token.
+      token_parts[0] = std::hash<std::string>{}(model_token);
+      // bits from params->nodes_to_replace.
+      token_parts[1] = GetHash(params->nodes_to_replace);
+      // bits from params->input_tensors.
+      token_parts[2] = GetHash(params->input_tensors);
+      // bits from params->output_tensors.
+      token_parts[3] = GetHash(params->output_tensors);
+      // NNAPI requires the token to be 256bit long.
+      std::vector<uint8_t> nnapi_cache_token(32, 0);
+      // Copy the token bits.
+      uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
+      for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
+        nnapi_cache_token[i] = p[i];
+      }
+      const int set_caching_result =
+          nnapi_->ANeuralNetworksCompilation_setCaching(
+              compilation, cache_dir, nnapi_cache_token.data());
+      if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
+        nnapi_->ANeuralNetworksCompilation_free(compilation);
+        compilation = nullptr;
+      }
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
+                                      "configuring NNAPI caching", nnapi_errno);
+    }
+    const int finish_result =
+        nnapi_->ANeuralNetworksCompilation_finish(compilation);
+    if (finish_result != ANEURALNETWORKS_NO_ERROR) {
+      nnapi_->ANeuralNetworksCompilation_free(compilation);
+      compilation = nullptr;
+    }
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(
+        context, finish_result, "completing NNAPI compilation", nnapi_errno);
+    nn_compilation_.reset(compilation);
   }
-
-  initialised_ = true;
-
   return kTfLiteOk;
 }
 
 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
                                           TfLiteNode* node, int* nnapi_errno) {
-  if (!initialised_) {
+  if (!nn_compilation_) {
+    // Compilation failed earlier, return error.
     return kTfLiteError;
   }
-
-  if (nn_compilation_) {
-    return kTfLiteOk;
-  }
-
-  const auto delegate_options =
-      StatefulNnApiDelegate::GetOptions(node->delegate);
-  ANeuralNetworksCompilation* compilation = nullptr;
-  if (!nnapi_devices_.empty()) {
-    // Compile for the selected accelerator.
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(
-        context,
-        nnapi_->ANeuralNetworksCompilation_createForDevices(
-            nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
-            &compilation),
-        "creating NNAPI model for given devices", nnapi_errno);
-  } else {
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
-                                    nnapi_->ANeuralNetworksCompilation_create(
-                                        nn_model_.get(), &compilation),
-                                    "creating NNAPI compilation", nnapi_errno);
-  }
-
-  auto preference = delegate_options.execution_preference;
-  if (preference !=
-      StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
-    const int preference_result =
-        nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
-                                                         preference);
-    if (preference_result != ANEURALNETWORKS_NO_ERROR) {
-      nnapi_->ANeuralNetworksCompilation_free(compilation);
-      compilation = nullptr;
-    }
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
-                                    "setting compilation preferences",
-                                    nnapi_errno);
-  }
-
-  if (!nn_compilation_cache_token_.empty()) {
-    const char* cache_dir = delegate_options.cache_dir;
-    const int set_caching_result =
-        nnapi_->ANeuralNetworksCompilation_setCaching(
-            compilation, cache_dir, nn_compilation_cache_token_.data());
-    if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
-      nnapi_->ANeuralNetworksCompilation_free(compilation);
-      compilation = nullptr;
-    }
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
-                                    "configuring NNAPI caching", nnapi_errno);
-  }
-  const int finish_result =
-      nnapi_->ANeuralNetworksCompilation_finish(compilation);
-  if (finish_result != ANEURALNETWORKS_NO_ERROR) {
-    nnapi_->ANeuralNetworksCompilation_free(compilation);
-    compilation = nullptr;
-  }
-  RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
-                                  "completing NNAPI compilation", nnapi_errno);
-  nn_compilation_.reset(compilation);
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
-    TfLiteContext* context, std::vector<int>* supported_nodes,
-    int* nnapi_errno) {
-  if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
-    return kTfLiteError;
-  }
-
-  // Determine the list of operations the device actually supports
-  auto support_flags = absl::make_unique<bool[]>(nodes_.size());
-
-  RETURN_TFLITE_ERROR_IF_NN_ERROR(
-      context,
-      nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
-          nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
-          support_flags.get()),
-      "Checking supported operations for devices", nnapi_errno);
-
-  supported_nodes->clear();
-  for (int i = 0; i < nodes_.size(); i++) {
-    if (support_flags[i]) {
-      supported_nodes->push_back(nodes_[i]);
-    }
-  }
-
   return kTfLiteOk;
 }
 
@@ -3812,35 +3768,6 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
 
 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
 
-StatefulNnApiDelegate::Data::~Data() {
-  std::for_each(std::begin(delegate_state_cache),
-                std::end(delegate_state_cache),
-                [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
-                  delete entry.second;
-                });
-}
-
-void StatefulNnApiDelegate::Data::CacheDelegateKernel(
-    const TfLiteDelegateParams* delegate_params,
-    NNAPIDelegateKernel* delegate_state) {
-  const int cache_key = delegate_params->nodes_to_replace->data[0];
-  delegate_state_cache.emplace(cache_key, delegate_state);
-}
-
-absl::optional<NNAPIDelegateKernel*>
-StatefulNnApiDelegate::Data::GetCachedDelegateKernel(
-    const TfLiteDelegateParams* delegate_params) {
-  const int cache_key = delegate_params->nodes_to_replace->data[0];
-  const auto cached_state = delegate_state_cache.find(cache_key);
-  if (cached_state != std::end(delegate_state_cache)) {
-    auto result = absl::optional<NNAPIDelegateKernel*>(cached_state->second);
-    delegate_state_cache.erase(cached_state);
-    return result;
-  } else {
-    return absl::nullopt;
-  }
-}
-
 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
     : TfLiteDelegate(TfLiteDelegateCreate()),
       delegate_data_(
@@ -3950,8 +3877,7 @@ using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
 
 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                                               TfLiteDelegate* delegate) {
-  auto* delegate_data = static_cast<Data*>(delegate->data_);
-  int* nnapi_errno = &(delegate_data->nnapi_errno);
+  int* nnapi_errno = &(static_cast<Data*>(delegate->data_)->nnapi_errno);
 
   // Resetting the error code when the delegate is initialized
   // by TFLite. This causes the error to be reset if reusing the same
@@ -3966,19 +3892,17 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
   }
 
   int target_sdk_version = nnapi->android_sdk_version;
-  const StatefulNnApiDelegate::Options delegate_options =
-      StatefulNnApiDelegate::GetOptions(delegate);
   // For NNAPI 1.2+, check if there is any accelerator available.
   // If not, don't delegate to NNAPI's CPU reference implementation unless
   // it has been specified as target accelerator.
   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
-    if (ShouldUseTargetDevices(delegate_options)) {
+    if (ShouldUseTargetDevices(delegate)) {
       std::vector<ANeuralNetworksDevice*> devices;
       TF_LITE_ENSURE_STATUS(
           GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
 
       if (devices.empty()) {
-        if (delegate_options.accelerator_name) {
+        if (StatefulNnApiDelegate::GetOptions(delegate).accelerator_name) {
           // There was a selected device and it is not available.
           return kTfLiteError;
         } else {
@@ -4013,13 +3937,13 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
 
   // Check for every node if it is supported
-  const bool is_accelerator_specified = ShouldUseTargetDevices(
-      delegate_options, /*exclude_nnapi_reference=*/true);
   for (int node_index : TfLiteIntArrayView(plan)) {
     TfLiteNode* node;
     TfLiteRegistration* registration;
     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
         context, node_index, &node, &registration));
+    const bool is_accelerator_specified =
+        ShouldUseTargetDevices(delegate, /*exclude_nnapi_reference=*/true);
     if (NNAPIDelegateKernel::Validate(context, registration->builtin_code,
                                       registration->version, target_sdk_version,
                                       node, is_accelerator_specified)) {
@@ -4041,21 +3965,10 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                  size_t length) -> void* {
         const TfLiteDelegateParams* params =
             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
-
-        auto* delegate_data = static_cast<Data*>(params->delegate->data_);
-        int* nnapi_errno = &(delegate_data->nnapi_errno);
-
-        auto delegate_state_maybe =
-            delegate_data->GetCachedDelegateKernel(params);
-
-        NNAPIDelegateKernel* kernel_state;
-        if (delegate_state_maybe.has_value()) {
-          kernel_state = *delegate_state_maybe;
-        } else {
-          kernel_state = new NNAPIDelegateKernel;
-          kernel_state->Init(context, params, nnapi_errno);
-        }
-
+        int* nnapi_errno =
+            &(static_cast<Data*>(params->delegate->data_)->nnapi_errno);
+        NNAPIDelegateKernel* kernel_state = new NNAPIDelegateKernel;
+        kernel_state->Init(context, params, nnapi_errno);
         return kernel_state;
       },
 
@@ -4085,55 +3998,11 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
       .version = 1,
   };
 
-  std::vector<int>& nodes_to_delegate = supported_nodes;
-  if (is_accelerator_specified) {
-    TfLiteDelegateParams* params_array;
-    int num_partitions = 0;
-    // The first entry in the array is the element count
-    std::vector<int> device_supported_nodes(1);
-    TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
-        context, reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
-        &params_array, &num_partitions));
-    // For each partition check if which nodes are actually supported by the
-    // target accelerators.
-    delegate_data->delegate_state_cache.clear();
-    for (int idx = 0; idx < num_partitions; idx++) {
-      const auto& partition_params = params_array[idx];
-      auto kernel_state = absl::make_unique<NNAPIDelegateKernel>();
-      TfLiteDelegateParams params_with_delegate = partition_params;
-      params_with_delegate.delegate = delegate;
-      TF_LITE_ENSURE_STATUS(
-          kernel_state->Init(context, &params_with_delegate, nnapi_errno));
-
-      std::vector<int> supported_partition_nodes;
-      TF_LITE_ENSURE_STATUS(
-          kernel_state->GetOperationsSupportedByTargetNnApiDevices(
-              context, &supported_partition_nodes, nnapi_errno));
-      device_supported_nodes.insert(device_supported_nodes.end(),
-                                    supported_partition_nodes.begin(),
-                                    supported_partition_nodes.end());
-
-      bool model_fully_supported = (supported_partition_nodes.size() ==
-                                    partition_params.nodes_to_replace->size);
-      if (model_fully_supported) {
-        delegate_data->CacheDelegateKernel(&partition_params,
-                                           kernel_state.release());
-      }
-    }
-
-    device_supported_nodes[0] = device_supported_nodes.size() - 1;
-    nodes_to_delegate = device_supported_nodes;
-  }
-
-  if (nodes_to_delegate.empty()) {
-    return kTfLiteOk;
-  } else {
-    // Request TFLite to partition the graph and make kernels
-    // for each independent node sub set a new nnapi_delegate_kernel.
-    return context->ReplaceNodeSubsetsWithDelegateKernels(
-        context, nnapi_delegate_kernel,
-        reinterpret_cast<TfLiteIntArray*>(nodes_to_delegate.data()), delegate);
-  }
+  // Request TFLite to partition the graph and make kernels
+  // for each independent node sub set a new nnapi_delegate_kernel.
+  return context->ReplaceNodeSubsetsWithDelegateKernels(
+      context, nnapi_delegate_kernel,
+      reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()), delegate);
 }
 
 // Returns a singleton NNAPI Delegate that can check for support of ops.
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
index f06e02dd793..022e9ed53ac 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@@ -17,22 +17,14 @@ limitations under the License.
 
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
-#include "absl/types/optional.h"
 #include "tensorflow/lite/c/common.h"
 
 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
 
 namespace tflite {
 
-namespace delegate::nnapi {
-class NNAPIDelegateKernel;
-}  // namespace delegate::nnapi
-
-using tflite::delegate::nnapi::NNAPIDelegateKernel;
-
 // TFliteDelegate to interface with NNAPI.
 class StatefulNnApiDelegate : public TfLiteDelegate {
  public:
@@ -152,21 +144,6 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
     // Constains a non zero value if any NNAPI method call
     // operation returned a non zero result code.
     int nnapi_errno;
-    // Cache of kernels already built in StatefulNnApiDelegate::DoPrepare
-    // when trying to understand if all nodes are supported by the target
-    // accelerators.
-    // The key is the index of the first node in the partition.
-    // Couldn't use unique_ptr because of problems building on gcc
-    std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
-
-    ~Data();
-
-    // Caches an initialised NNAPIDelegateKernel.
-    void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
-                             NNAPIDelegateKernel* delegate_state);
-    // Returns a cached NNAPIDelegateKernel if available.
-    absl::optional<NNAPIDelegateKernel*> GetCachedDelegateKernel(
-        const TfLiteDelegateParams* delegate_params);
   };
 
   // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
index 9501644f43e..6c0f239d7a3 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/test_util.h"
-#include "tensorflow/lite/minimal_logging.h"
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
@@ -87,7 +86,7 @@ struct NnApiDeviceSelectionTest
     ::tflite::delegate::nnapi::NnApiDelegateMockTest::SetUp();
     nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
       *numDevices = 3;
-      return ANEURALNETWORKS_NO_ERROR;
+      return 0;
     };
     nnapi_->ANeuralNetworks_getDevice =
         [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
@@ -103,15 +102,8 @@ struct NnApiDeviceSelectionTest
       } else {
         *name = "nnapi-reference";
       }
-      return ANEURALNETWORKS_NO_ERROR;
+      return 0;
     };
-    nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
-        [](const ANeuralNetworksModel* model,
-           const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
-           bool* supportedOps) -> int {
-          supportedOps[0] = true;
-          return ANEURALNETWORKS_NO_ERROR;
-        });
   }
   void InitWithOptions(tflite::StatefulNnApiDelegate::Options options) {
     m.Init(options, {TensorType_FLOAT32, {1, 2, 2, 1}},
@@ -124,13 +116,13 @@ struct NnApiDeviceSelectionTest
 };
 
 TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWithoutFlags) {
-  nnapi_mock_->StubCompilationCreateForDevicesWith(
+  nnapi_->ANeuralNetworksCompilation_createForDevices =
       [](ANeuralNetworksModel* model,
          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
          ANeuralNetworksCompilation** compilation) -> int {
-        EXPECT_TRUE(false) << "Should not call createForDevices";
-        return 1;
-      });
+    EXPECT_TRUE(false) << "Should not call createForDevices";
+    return 1;
+  };
 
   tflite::StatefulNnApiDelegate::Options options;
   InitWithOptions(options);
@@ -140,20 +132,20 @@ TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWithoutFlags) {
 
 TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
   nnapi_mock_->CompilationCreateReturns<1>();
-  nnapi_mock_->StubCompilationCreateForDevicesWith(
+  nnapi_->ANeuralNetworksCompilation_createForDevices =
       [](ANeuralNetworksModel* model,
          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
          ANeuralNetworksCompilation** compilation) -> int {
-        EXPECT_EQ(numDevices, 1);
-        EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
-        if (numDevices != 1 ||
-            devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
-          return 1;
-        } else {
-          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
-          return ANEURALNETWORKS_NO_ERROR;
-        }
-      });
+    EXPECT_EQ(numDevices, 1);
+    EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
+    if (numDevices != 1 ||
+        devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1)) {
+      return 1;
+    } else {
+      *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
+      return 0;
+    }
+  };
 
   tflite::StatefulNnApiDelegate::Options options;
   options.accelerator_name = "dsp";
@@ -164,22 +156,22 @@ TEST_F(NnApiDeviceSelectionTest, SetsDeviceBasedOnOptions) {
 
 TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
   nnapi_mock_->CompilationCreateReturns<1>();
-  nnapi_mock_->StubCompilationCreateForDevicesWith(
+  nnapi_->ANeuralNetworksCompilation_createForDevices =
       [](ANeuralNetworksModel* model,
          const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
          ANeuralNetworksCompilation** compilation) -> int {
-        EXPECT_EQ(numDevices, 2);
-        EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
-        EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
-        if (numDevices != 2 ||
-            devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
-            devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
-          return 1;
-        } else {
-          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
-          return ANEURALNETWORKS_NO_ERROR;
-        }
-      });
+    EXPECT_EQ(numDevices, 2);
+    EXPECT_EQ(devices[0], reinterpret_cast<ANeuralNetworksDevice*>(1));
+    EXPECT_EQ(devices[1], reinterpret_cast<ANeuralNetworksDevice*>(2));
+    if (numDevices != 2 ||
+        devices[0] != reinterpret_cast<ANeuralNetworksDevice*>(1) ||
+        devices[1] != reinterpret_cast<ANeuralNetworksDevice*>(2)) {
+      return 1;
+    } else {
+      *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
+      return 0;
+    }
+  };
 
   tflite::StatefulNnApiDelegate::Options options;
   options.disallow_nnapi_cpu = true;
@@ -193,14 +185,14 @@ TEST_F(NnApiDeviceSelectionTest,
   // Only nnapi-reference is available on device
   nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
     *numDevices = 1;
-    return ANEURALNETWORKS_NO_ERROR;
+    return 0;
   };
   nnapi_->ANeuralNetworksDevice_getName =
       [](const ANeuralNetworksDevice* device, const char** name) -> int {
     if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
       *name = "nnapi-reference";
     }
-    return ANEURALNETWORKS_NO_ERROR;
+    return 0;
   };
 
   tflite::StatefulNnApiDelegate::Options options;
@@ -216,14 +208,14 @@ TEST_F(NnApiDeviceSelectionTest,
   // Only nnapi-reference is available on device
   nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
     *numDevices = 1;
-    return ANEURALNETWORKS_NO_ERROR;
+    return 0;
   };
   nnapi_->ANeuralNetworksDevice_getName =
       [](const ANeuralNetworksDevice* device, const char** name) -> int {
     if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
       *name = "nnapi-reference";
     }
-    return ANEURALNETWORKS_NO_ERROR;
+    return 0;
   };
 
   tflite::StatefulNnApiDelegate::Options options;
@@ -357,172 +349,6 @@ TEST_F(UnsupportedOperationOnDeviceTest,
       << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
 }
 
-// This is a model with two ops:
-//
-//  input1 ---->
-//                ADD --
-//  input2   -->        |
-//                       -->
-//                          SUB --> output
-//  input3 ---------------->
-//
-class AddSubOpsAcceleratedModel : public MultiOpModel, public AcceleratedModel {
- public:
-  AddSubOpsAcceleratedModel(const TensorData& input1, const TensorData& input2,
-                            const TensorData& input3, const TensorData& output,
-                            ActivationFunctionType activation_type,
-                            const std::string& accelerator_name,
-                            bool allow_fp32_relax_to_fp16 = false)
-      : MultiOpModel(), AcceleratedModel(accelerator_name) {
-    auto* delegate = GetDelegate();
-    this->SetApplyDelegate([delegate](Interpreter* interpreter) {
-      interpreter->ModifyGraphWithDelegate(delegate);
-    });
-    Init(input1, input2, input3, output, activation_type,
-         allow_fp32_relax_to_fp16);
-  }
-
-  int input1() { return input1_; }
-  int input2() { return input2_; }
-  int input3() { return input3_; }
-
-  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
-
- protected:
-  int input1_;
-  int input2_;
-  int input3_;
-  int output_;
-
- private:
-  // Performs initialization logic shared across all constructors.
-  void Init(const TensorData& input1, const TensorData& input2,
-            const TensorData& input3, const TensorData& output,
-            ActivationFunctionType activation_type,
-            bool allow_fp32_relax_to_fp16 = false) {
-    input1_ = AddInput(input1);
-    input2_ = AddInput(input2);
-    input3_ = AddInput(input3);
-    const int add_output = AddInnerTensor<float>(output);
-    output_ = AddOutput(output);
-    AddBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
-                 CreateAddOptions(builder_, activation_type).Union(),
-                 {input1_, input2_}, {add_output});
-    AddBuiltinOp(BuiltinOperator_SUB, BuiltinOptions_SubOptions,
-                 CreateSubOptions(builder_, activation_type).Union(),
-                 {add_output, input3_}, {output_});
-    BuildInterpreter({GetShape(input1_), GetShape(input2_), GetShape(input3_)},
-                     allow_fp32_relax_to_fp16);
-  }
-};
-
-int should_build_model_with_sup_ops_compilation_model_create_count = 0;
-int should_build_model_with_sup_ops_add_operation_count = 0;
-TEST_F(UnsupportedOperationOnDeviceTest,
-       ShouldBuildModelWithOnlyDeviceSupportedOps) {
-  nnapi_mock_->SetNnapiSupportedDevice("test-device");
-
-  nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
-      [](const ANeuralNetworksModel* model,
-         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
-         bool* supportedOps) -> int {
-        // Returning the first as supported since this will leverage
-        // the assertion on caching.
-        supportedOps[0] = true;
-        supportedOps[1] = false;
-        return ANEURALNETWORKS_NO_ERROR;
-      });
-
-  nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
-    ++should_build_model_with_sup_ops_compilation_model_create_count;
-    *model = reinterpret_cast<ANeuralNetworksModel*>(1);
-    return ANEURALNETWORKS_NO_ERROR;
-  });
-
-  nnapi_mock_->StubAddOperationWith(
-      [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
-         uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
-         const uint32_t* outputs) -> int {
-        ++should_build_model_with_sup_ops_add_operation_count;
-        return ANEURALNETWORKS_NO_ERROR;
-      });
-
-  AddSubOpsAcceleratedModel m(
-      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
-      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
-      ActivationFunctionType_NONE, /*accelerator_name=*/"test-device");
-  std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
-  std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
-  m.PopulateTensor<float>(m.input1(), input1);
-  m.PopulateTensor<float>(m.input2(), input2);
-  m.PopulateTensor<float>(m.input3(), input2);
-  m.Invoke();
-
-  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
-  ASSERT_EQ(should_build_model_with_sup_ops_compilation_model_create_count, 2)
-      << "Model with unsupported operations has been cached";
-  EXPECT_EQ(should_build_model_with_sup_ops_add_operation_count, 3)
-      << "The second model should contain only one operation";
-}
-
-TEST_F(UnsupportedOperationOnDeviceTest, ShouldRunOnCpuIfDeviceSupportsNoOps) {
-  nnapi_mock_->SetNnapiSupportedDevice("test-device");
-
-  nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
-      [](const ANeuralNetworksModel* model,
-         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
-         bool* supportedOps) -> int {
-        std::fill(supportedOps, supportedOps + 2, false);
-        return ANEURALNETWORKS_NO_ERROR;
-      });
-
-  AddSubOpsAcceleratedModel m(
-      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
-      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
-      ActivationFunctionType_NONE, /*accelerator_name=*/"test-device");
-  std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
-  std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
-  m.PopulateTensor<float>(m.input1(), input1);
-  m.PopulateTensor<float>(m.input2(), input2);
-  m.PopulateTensor<float>(m.input3(), input2);
-  m.Invoke();
-
-  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 2);
-}
-
-int should_cache_model_compilation_model_create_count = 0;
-TEST_F(UnsupportedOperationOnDeviceTest, ShouldCacheModelCompilation) {
-  nnapi_mock_->SetNnapiSupportedDevice("test-device");
-
-  nnapi_mock_->StubGetSupportedOperationsForDevicesWith(
-      [](const ANeuralNetworksModel* model,
-         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
-         bool* supportedOps) -> int {
-        std::fill(supportedOps, supportedOps + 2, true);
-        return ANEURALNETWORKS_NO_ERROR;
-      });
-
-  nnapi_mock_->StubModelCreateWith([](ANeuralNetworksModel** model) -> int {
-    ++should_cache_model_compilation_model_create_count;
-    *model = reinterpret_cast<ANeuralNetworksModel*>(1);
-    return ANEURALNETWORKS_NO_ERROR;
-  });
-
-  AddSubOpsAcceleratedModel m(
-      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
-      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {}},
-      ActivationFunctionType_NONE, /*accelerator_name=*/"test-device");
-  std::vector<float> input1{-2.0, 0.2, 0.7, 0.9};
-  std::vector<float> input2{0.1, 0.2, 0.3, 0.5};
-  m.PopulateTensor<float>(m.input1(), input1);
-  m.PopulateTensor<float>(m.input2(), input2);
-  m.PopulateTensor<float>(m.input3(), input2);
-  m.Invoke();
-
-  ASSERT_EQ(m.CountOpsExecutedByCpuKernel(), 0);
-  EXPECT_EQ(should_cache_model_compilation_model_create_count, 1);
-}
-
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_disabled.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_disabled.cc
index 3c23054ea25..ef723c14cea 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_disabled.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_disabled.cc
@@ -44,18 +44,4 @@ TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
 
 int StatefulNnApiDelegate::GetNnApiErrno() const { return 0; }
 
-using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
-
-StatefulNnApiDelegate::Data::~Data() {}
-
-void StatefulNnApiDelegate::Data::CacheDelegateKernel(
-    const TfLiteDelegateParams* delegate_params,
-    NNAPIDelegateKernel* delegate_state) {}
-
-absl::optional<NNAPIDelegateKernel*>
-StatefulNnApiDelegate::Data::GetCachedDelegateKernel(
-    const TfLiteDelegateParams* delegate_params) {
-  return absl::nullopt;
-}
-
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
index 709db8118c3..2377ea738d3 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@@ -224,8 +224,7 @@ struct NNAPIValidationFailure {
 class NNAPIDelegateKernel {
  public:
   explicit NNAPIDelegateKernel(const NnApi* nnapi)
-      : initialised_(false),
-        nnapi_(nnapi),
+      : nnapi_(nnapi),
         nn_model_(nullptr, NNFreeModel(nnapi_)),
         nn_compilation_(nullptr, NNFreeCompilation(nnapi_)) {}
   NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
@@ -256,41 +255,23 @@ class NNAPIDelegateKernel {
       // the given node
       std::vector<NNAPIValidationFailure>* map_failures = nullptr);
 
-  // Initialize the kernel (a NN model) and builds the NN Model.
+  // Initialize the kernel (a NN model).
   // Any NNAPI Related error causing this method to fail will have the
   // associated error number stored in nnapi_errno
   TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
                     int* nnapi_errno);
 
-  // Creates the NNAPI Compilation for the NN model. It assumes that Init has
-  // been called and completed successfully.
   // Any NNAPI Related error causing this method to fail will have the
   // associated error number stored in nnapi_errno
   TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
                        int* nnapi_errno);
 
-  // Invoke the NN Model. Expects Init and Prepare to have been completed
-  // successfully.
   // Any NNAPI Related error causing this method to fail will have the
   // associated error number stored in nnapi_errno
   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
                       int* nnapi_errno);
 
-  // Returns the list of operations supported by the current NNAPI model as
-  // built in Prepare. Every operation is identified by the index as provided
-  // in the delegate parameters given to the delegate during the Init call.
-  // It expects the Init method has been called and completed successfully and
-  // returns kTfLiteError if not. Returns an error if any of the NNAPI
-  // operations fails or if the
-  // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
-  // available in the NnApi object.
-  TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
-      TfLiteContext* context, std::vector<int>* supported_nodes,
-      int* nnapi_errno);
-
  private:
-  // True if initialization has been completed successfully
-  bool initialised_;
   // Access to NNApi.
   const NnApi* nnapi_;
   // ANN device handle.
@@ -321,8 +302,6 @@ class NNAPIDelegateKernel {
   std::unique_ptr<NNMemory> nn_input_memory_;
   std::unique_ptr<NNMemory> nn_output_memory_;
 
-  std::vector<uint8_t> nn_compilation_cache_token_;
-
   void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
                                          int builtin_code,
                                          const TfLiteNode* node,
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
index 919c1ddcc2b..dbef50a8c10 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/test_util.h"
 #include "tensorflow/lite/minimal_logging.h"
diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc
index cbb39c27fc5..5e326c32219 100644
--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@@ -344,32 +344,4 @@ int SingleOpModel::CountOpsExecutedByCpuKernel() {
 
 SingleOpModel::~SingleOpModel() { ValidateAcceleration(); }
 
-void MultiOpModel::AddBuiltinOp(
-    BuiltinOperator type, BuiltinOptions builtin_options_type,
-    const flatbuffers::Offset<void>& builtin_options,
-    const std::vector<int32_t>& inputs, const std::vector<int32_t>& outputs) {
-  opcodes_.push_back(CreateOperatorCode(builder_, type, 0));
-  const int opcode_index = opcodes_.size() - 1;
-  operators_.push_back(CreateOperator(
-      builder_, opcode_index, builder_.CreateVector<int32_t>(inputs),
-      builder_.CreateVector<int32_t>(outputs), builtin_options_type,
-      builtin_options,
-      /*custom_options=*/0, CustomOptionsFormat_FLEXBUFFERS));
-}
-
-void MultiOpModel::AddCustomOp(
-    const string& name, const std::vector<uint8_t>& custom_option,
-    const std::function<TfLiteRegistration*()>& registration,
-    const std::vector<int32_t>& inputs, const std::vector<int32_t>& outputs) {
-  custom_registrations_[name] = registration;
-  opcodes_.push_back(
-      CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data()));
-  const int opcode_index = opcodes_.size() - 1;
-  operators_.push_back(CreateOperator(
-      builder_, opcode_index, builder_.CreateVector<int32_t>(inputs),
-      builder_.CreateVector<int32_t>(outputs), BuiltinOptions_NONE, 0,
-      builder_.CreateVector<uint8_t>(custom_option),
-      CustomOptionsFormat_FLEXBUFFERS));
-}
-
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h
index 9a3eddcfcb4..20ba0261267 100644
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@@ -423,74 +423,6 @@ class SingleOpModel {
   std::unique_ptr<tflite::Interpreter> interpreter_;
   std::unique_ptr<OpResolver> resolver_;
 
-  std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
-  std::vector<flatbuffers::Offset<Operator>> operators_;
-  std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
-
-  template <typename T>
-  int AddTensor(TensorData t, std::initializer_list<T> data,
-                bool is_variable = false) {
-    int id = tensors_.size();
-
-    // This is slightly different depending on whether we are adding a
-    // quantized or a regular tensor.
-    bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
-
-    flatbuffers::Offset<QuantizationParameters> q_params = 0;
-
-    if (is_quantized) {
-      if (t.min != 0 || t.max != 0) {
-        if (t.type == TensorType_UINT8) {
-          std::tie(t.scale, t.zero_point) =
-              QuantizationParams<uint8_t>(t.min, t.max);
-        } else if (t.type == TensorType_INT8) {
-          std::tie(t.scale, t.zero_point) =
-              QuantizationParams<int8_t>(t.min, t.max);
-        } else if (t.type == TensorType_INT32) {
-          std::tie(t.scale, t.zero_point) =
-              QuantizationParams<int32_t>(t.min, t.max);
-        } else if (t.type == TensorType_INT16) {
-          std::tie(t.scale, t.zero_point) =
-              QuantizationParams<int16_t>(t.min, t.max);
-        } else {
-          LOG(FATAL) << "No support for the requested quantized type";
-        }
-        t.min = 0;
-        t.max = 0;
-      }
-
-      q_params = CreateQuantizationParameters(
-          builder_, /*min=*/0, /*max=*/0,
-          builder_.CreateVector<float>({t.scale}),
-          builder_.CreateVector<int64_t>({t.zero_point}));
-    }
-
-    int buffer_id = 0;
-    if (data.size()) {
-      // Initialize buffers list with empty buffer to allow for non-const
-      // tensors.
-      if (buffers_.empty()) {
-        buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
-      }
-
-      // Add data as a Buffer to buffers list.
-      buffer_id = buffers_.size();
-      auto data_buffer =
-          builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
-                                sizeof(T) * data.size());
-      buffers_.push_back(CreateBuffer(builder_, data_buffer));
-    }
-
-    tensors_.push_back(CreateTensor(builder_,
-                                    builder_.CreateVector<int>(t.shape), t.type,
-                                    /*buffer=*/buffer_id,
-                                    /*name=*/0, q_params, is_variable));
-
-    tensor_data_[id] = t;
-
-    return id;
-  }
-
  private:
   template <typename T>
   std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
@@ -669,6 +601,70 @@ class SingleOpModel {
     return id;
   }
 
+  template <typename T>
+  int AddTensor(TensorData t, std::initializer_list<T> data,
+                bool is_variable = false) {
+    int id = tensors_.size();
+
+    // This is slightly different depending on whether we are adding a
+    // quantized or a regular tensor.
+    bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
+
+    flatbuffers::Offset<QuantizationParameters> q_params = 0;
+
+    if (is_quantized) {
+      if (t.min != 0 || t.max != 0) {
+        if (t.type == TensorType_UINT8) {
+          std::tie(t.scale, t.zero_point) =
+              QuantizationParams<uint8_t>(t.min, t.max);
+        } else if (t.type == TensorType_INT8) {
+          std::tie(t.scale, t.zero_point) =
+              QuantizationParams<int8_t>(t.min, t.max);
+        } else if (t.type == TensorType_INT32) {
+          std::tie(t.scale, t.zero_point) =
+              QuantizationParams<int32_t>(t.min, t.max);
+        } else if (t.type == TensorType_INT16) {
+          std::tie(t.scale, t.zero_point) =
+              QuantizationParams<int16_t>(t.min, t.max);
+        } else {
+          LOG(FATAL) << "No support for the requested quantized type";
+        }
+        t.min = 0;
+        t.max = 0;
+      }
+
+      q_params = CreateQuantizationParameters(
+          builder_, /*min=*/0, /*max=*/0,
+          builder_.CreateVector<float>({t.scale}),
+          builder_.CreateVector<int64_t>({t.zero_point}));
+    }
+
+    int buffer_id = 0;
+    if (data.size()) {
+      // Initialize buffers list with empty buffer to allow for non-const
+      // tensors.
+      if (buffers_.empty()) {
+        buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
+      }
+
+      // Add data as a Buffer to buffers list.
+      buffer_id = buffers_.size();
+      auto data_buffer =
+          builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
+                                sizeof(T) * data.size());
+      buffers_.push_back(CreateBuffer(builder_, data_buffer));
+    }
+
+    tensors_.push_back(CreateTensor(builder_,
+                                    builder_.CreateVector<int>(t.shape), t.type,
+                                    /*buffer=*/buffer_id,
+                                    /*name=*/0, q_params, is_variable));
+
+    tensor_data_[id] = t;
+
+    return id;
+  }
+
   std::vector<int8_t> QuantizeTensor(int index,
                                      const std::vector<float>& data) {
     TfLiteTensor* t = interpreter_->tensor(index);
@@ -727,7 +723,10 @@ class SingleOpModel {
   std::vector<int32_t> intermediates_;
   std::vector<int32_t> outputs_;
   std::vector<flatbuffers::Offset<Tensor>> tensors_;
+  std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
+  std::vector<flatbuffers::Offset<Operator>> operators_;
   std::vector<flatbuffers::Offset<Buffer>> buffers_;
+  std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
   // A function pointer that gets called after the interpreter is created but
   // before evaluation happens. This is useful for applying a delegate.
   std::function<void(Interpreter*)> apply_delegate_fn_;
@@ -838,28 +837,6 @@ struct TypeUnion<uint8_t> {
   typedef uint8_t ScalarType;
 };
 
-class MultiOpModel : public SingleOpModel {
- public:
-  MultiOpModel() : SingleOpModel() {}
-  ~MultiOpModel() {}
-
-  void AddBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
-                    const flatbuffers::Offset<void>& builtin_options,
-                    const std::vector<int32_t>& inputs,
-                    const std::vector<int32_t>& outputs);
-
-  void AddCustomOp(const string& name,
-                   const std::vector<uint8_t>& custom_option,
-                   const std::function<TfLiteRegistration*()>& registration,
-                   const std::vector<int32_t>& inputs,
-                   const std::vector<int32_t>& outputs);
-
-  template <typename T>
-  int AddInnerTensor(TensorData t) {
-    return AddTensor<T>(t, {}, false);
-  }
-};
-
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
diff --git a/tensorflow/lite/nnapi/nnapi_handler.h b/tensorflow/lite/nnapi/nnapi_handler.h
index a8a1670d996..0bcdda26a46 100644
--- a/tensorflow/lite/nnapi/nnapi_handler.h
+++ b/tensorflow/lite/nnapi/nnapi_handler.h
@@ -16,7 +16,6 @@ limitations under the License.
 #define TENSORFLOW_LITE_NNAPI_NNAPI_HANDLER_H_
 
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
 
 namespace tflite {
@@ -98,10 +97,6 @@ class NnApiHandler {
     };
   }
 
-  void StubModelCreateWith(int(stub)(ANeuralNetworksModel** model)) {
-    nnapi_->ANeuralNetworksModel_create = stub;
-  }
-
   template <int Value>
   void AddOperandReturns() {
     nnapi_->ANeuralNetworksModel_addOperand =
@@ -124,13 +119,6 @@ class NnApiHandler {
            const uint32_t* outputs) { return Value; };
   }
 
-  void StubAddOperationWith(
-      int(stub)(ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
-                uint32_t inputCount, const uint32_t* inputs,
-                uint32_t outputCount, const uint32_t* outputs)) {
-    nnapi_->ANeuralNetworksModel_addOperation = stub;
-  }
-
   template <int Value>
   void IdentifyInputAndOutputsReturns() {
     nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs =
@@ -183,12 +171,6 @@ class NnApiHandler {
         };
   }
 
-  void StubCompilationCreateForDevicesWith(int(stub)(
-      ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices,
-      uint32_t numDevices, ANeuralNetworksCompilation** compilation)) {
-    nnapi_->ANeuralNetworksCompilation_createForDevices = stub;
-  }
-
   template <int Value>
   void CompilationFinishReturns() {
     nnapi_->ANeuralNetworksCompilation_finish =
@@ -228,21 +210,6 @@ class NnApiHandler {
         [](ANeuralNetworksExecution* execution) { return Value; };
   }
 
-  template <int Value>
-  void GetSupportedOperationsForDevicesReturns() {
-    nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices =
-        [](const ANeuralNetworksModel* model,
-           const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
-           bool* supportedOps) { return Value; };
-  }
-
-  void StubGetSupportedOperationsForDevicesWith(
-      int(stub)(const ANeuralNetworksModel* model,
-                const ANeuralNetworksDevice* const* devices,
-                uint32_t numDevices, bool* supportedOps)) {
-    nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices = stub;
-  }
-
   void SetAndroidSdkVersion(int version);
 
  protected: