Restoring change: If a target accelerator is specified, use its feature level to determine operations to delegate instead of SDK version.

PiperOrigin-RevId: 290573620 Change-Id: I4d87331932c9ff993ec65102e2ac72a68fbbed62
2020-01-20 01:11:21 -08:00 · 2020-01-20 01:11:21 -08:00 · ca9b58dfcf
commit ca9b58dfcf
parent 082872d859
10 changed files with 531 additions and 87 deletions
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@ -34,6 +34,7 @@ cc_library(
        "//tensorflow/lite/c:common",
        "//tensorflow/lite/kernels:kernel_util",
        "//tensorflow/lite/nnapi:nnapi_implementation",
+        "//tensorflow/lite/nnapi:nnapi_lib",
        "//tensorflow/lite/nnapi:nnapi_util",
    ],
 )
@ -105,6 +106,7 @@ cc_library(
        ":nnapi_delegate",
        "//tensorflow/lite/nnapi:nnapi_handler",
        "//tensorflow/lite/nnapi:nnapi_implementation",
+        "//tensorflow/lite/nnapi:nnapi_lib",
        "@com_google_absl//absl/memory",
        "@com_google_googletest//:gtest",
    ],
@ -122,6 +124,7 @@ cc_test(
    ],
    deps = [
        ":nnapi_delegate",
+        ":nnapi_delegate_mock_test",
        "//tensorflow/lite:framework",
        "//tensorflow/lite:minimal_logging",
        "//tensorflow/lite/c:common",
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -28,9 +28,6 @@ limitations under the License.
 #include <tuple>
 #include <vector>

-// This section needs to be before the import of nnapi_delegate_kernel
-// because the code changes according to  the definition of
-// TFLITE_NNAPI_ALLOW_MMAP_SHARING
 #ifdef __ANDROID__
 #include <sys/system_properties.h>
 #endif
@ -299,12 +296,14 @@ static size_t getNumPaddingBytes(size_t byte_size) {
  return num_padding_bytes;
 }

-// Return NNAPI device handle with the provided null-terminated device name. If
-// no matching device could be found, nullptr will be returned.
-ANeuralNetworksDevice* GetDeviceHandle(TfLiteContext* context,
-                                       const char* device_name_ptr) {
-  if (!device_name_ptr) return nullptr;
-  ANeuralNetworksDevice* device_handle = nullptr;
+// Return NNAPI device handle with the provided null-terminated device name.
+// Returns kTfLiteError in case of any NNAPI error and if no device with the
+// given name can be found.
+TfLiteStatus GetDeviceHandle(TfLiteContext* context,
+                             const char* device_name_ptr,
+                             ANeuralNetworksDevice** result, int* nnapi_errno) {
+  if (!device_name_ptr) return kTfLiteError;
+  *result = nullptr;
  std::string device_name(device_name_ptr);
  uint32_t num_devices = 0;
  NnApiImplementation()->ANeuralNetworks_getDeviceCount(&num_devices);
@ -312,21 +311,27 @@ ANeuralNetworksDevice* GetDeviceHandle(TfLiteContext* context,
  for (uint32_t i = 0; i < num_devices; i++) {
    ANeuralNetworksDevice* device = nullptr;
    const char* buffer = nullptr;
-    NnApiImplementation()->ANeuralNetworks_getDevice(i, &device);
-    NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer);
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(
+        context, NnApiImplementation()->ANeuralNetworks_getDevice(i, &device),
+        "Searching for target device", nnapi_errno);
+
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(
+        context,
+        NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer),
+        "Searching for target device", nnapi_errno);
+
    if (device_name == buffer) {
-      device_handle = device;
-      break;
+      *result = device;
+      return kTfLiteOk;
    }
  }
-  if (!device_handle) {
-    context->ReportError(context,
-                         "Could not find the specified NNAPI accelerator: %s. "
-                         "Must be one of: {%s}.",
-                         device_name_ptr,
-                         nnapi::GetStringDeviceNamesList().c_str());
-  }
-  return device_handle;
+
+  context->ReportError(context,
+                       "Could not find the specified NNAPI accelerator: %s. "
+                       "Must be one of: {%s}.",
+                       device_name_ptr,
+                       nnapi::GetStringDeviceNamesList().c_str());
+  return kTfLiteError;
 }

 // Compute the hash of a TfLiteIntArray.
@ -354,6 +359,112 @@ enum {
  NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
 };

+// Returns the SDK level to target when delegating to the given devices.
+// The SDK level is the max of the ones supported by the devices or
+// the current Android SDK level if no device is present.
+TfLiteStatus GetTargetSdkVersion(
+    TfLiteContext* context, const NnApi* nnapi,
+    const std::vector<ANeuralNetworksDevice*>& device_handles,
+    int* target_sdk_version, int* nnapi_errno) {
+  *target_sdk_version = nnapi->android_sdk_version;
+  int64_t devices_sdk_version = -1;
+  for (const auto* device_handle : device_handles) {
+    int64_t curr_device_sdk_version;
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(
+        context,
+        nnapi->ANeuralNetworksDevice_getFeatureLevel(device_handle,
+                                                     &curr_device_sdk_version),
+        "Searching for target device", nnapi_errno);
+
+    devices_sdk_version =
+        std::max(curr_device_sdk_version, devices_sdk_version);
+  }
+
+  if ((devices_sdk_version > 0) &&
+      // This second check is necessary since if the nnapi-reference device is
+      // in the list of target devices the devices_sdk_version value will be
+      // 1000.
+      (devices_sdk_version < nnapi->android_sdk_version)) {
+    TFLITE_LOG(TFLITE_LOG_INFO,
+               "Changing Android NN SDK version %d to version "
+               "supported by target devices: %d",
+               nnapi->android_sdk_version, devices_sdk_version);
+
+    *target_sdk_version = devices_sdk_version;
+  }
+
+  return kTfLiteOk;
+}
+
+// Returns true if this delegate is configured to use a specific set of devices.
+// This will happen either if:
+// - accelerator_name option has been specified
+// - NNAPI CPU implementation has been explicitly disabled.
+// If exclude_nnapi_reference is true this method will return false if the
+// accelerator_name in the delegate options is equal to "nnapi-reference"
+bool ShouldUseTargetDevices(TfLiteDelegate* delegate,
+                            bool exclude_nnapi_reference = false) {
+  const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
+  const char* device_name_ptr = delegate_options.accelerator_name;
+  std::string nnapi_cpu("nnapi-reference");
+  bool has_selected_accelerator = device_name_ptr != nullptr;
+  if (exclude_nnapi_reference && has_selected_accelerator) {
+    has_selected_accelerator = nnapi_cpu != device_name_ptr;
+  }
+  return (delegate_options.disallow_nnapi_cpu) || has_selected_accelerator;
+}
+
+// Fills the given result vector with the list of devices the given delegate
+// is referring to.
+// There are three possible results:
+// - an empty array (not the full list of available accelerators,
+//   for efficiency reasons) if no accelerator is chosen and the
+//   disallow_nnapi_cpu delegate option is false.
+// - A single element array with the target processor, if an accelerator name
+//   is specified in the delegate options.
+// - The full list of devices available on device less the nnapi reference
+//   implementation if the delegate option disallow_nnapi_cpu has been
+//   specified.
+TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
+                              const NnApi* nnapi, int* nnapi_errno,
+                              std::vector<ANeuralNetworksDevice*>* result) {
+  if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
+    return kTfLiteError;
+  }
+
+  const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
+  const char* device_name_ptr = delegate_options.accelerator_name;
+
+  if (device_name_ptr != nullptr) {
+    // User specified an accelerator to use.
+    ANeuralNetworksDevice* nnapi_device = nullptr;
+    TF_LITE_ENSURE_STATUS(
+        GetDeviceHandle(context, device_name_ptr, &nnapi_device, nnapi_errno));
+    result->push_back(nnapi_device);
+  } else if (delegate_options.disallow_nnapi_cpu) {
+    std::string nnapi_cpu("nnapi-reference");
+    uint32_t num_devices = 0;
+    NnApiImplementation()->ANeuralNetworks_getDeviceCount(&num_devices);
+
+    for (uint32_t i = 0; i < num_devices; i++) {
+      ANeuralNetworksDevice* device = nullptr;
+      const char* buffer = nullptr;
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(
+          context, NnApiImplementation()->ANeuralNetworks_getDevice(i, &device),
+          "Getting list of available devices", nnapi_errno);
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(
+          context,
+          NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer),
+          "Getting list of available devices", nnapi_errno);
+      if (nnapi_cpu != buffer) {
+        result->push_back(device);
+      }
+    }
+  }
+
+  return kTfLiteOk;
+}
+
 }  // namespace

 namespace delegate {
@ -2899,35 +3010,15 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,

  const auto delegate_options =
      StatefulNnApiDelegate::GetOptions(params->delegate);
-  const char* device_name_ptr = delegate_options.accelerator_name;
-  if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12) {
-    if (device_name_ptr != nullptr) {
-      // User specified an accelerator to use.
-      ANeuralNetworksDevice* nnapi_device =
-          GetDeviceHandle(context, device_name_ptr);
-      if (nnapi_device == nullptr) {
-        return kTfLiteError;
-      }
-      nnapi_devices_.push_back(nnapi_device);
-    } else if (delegate_options.disallow_nnapi_cpu) {
-      std::string nnapi_cpu("nnapi-reference");
-      uint32_t num_devices = 0;
-      NnApiImplementation()->ANeuralNetworks_getDeviceCount(&num_devices);
+  if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
+      ShouldUseTargetDevices(params->delegate)) {
+    TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
+                                           nnapi_errno, &nnapi_devices_));

-      for (uint32_t i = 0; i < num_devices; i++) {
-        ANeuralNetworksDevice* device = nullptr;
-        const char* buffer = nullptr;
-        NnApiImplementation()->ANeuralNetworks_getDevice(i, &device);
-        NnApiImplementation()->ANeuralNetworksDevice_getName(device, &buffer);
-        if (nnapi_cpu != buffer) {
-          nnapi_devices_.push_back(device);
-        }
-      }
-      if (nnapi_devices_.empty()) {
-        context->ReportError(
-            context, "NNAPI delegate requested but no accelerators available.");
-        return kTfLiteError;
-      }
+    if (nnapi_devices_.empty()) {
+      context->ReportError(
+          context, "NNAPI delegate requested but no accelerators available.");
+      return kTfLiteError;
    }
  }

@ -3504,11 +3595,20 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context,
            builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
      }
    }
+
+    // If we have target accelerators the target SDK version might be
+    // different than the current android version.
+    int target_sdk_version = nnapi_->android_sdk_version;
+    if (!nnapi_devices_.empty()) {
+      TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
+          context, nnapi_, nnapi_devices_, &target_sdk_version, nnapi_errno));
+    }
+
    // Get op type and operands
-    // Fails if the Map function failed
+    // Fails if the Validate function failed
    int nn_op_type;
    TF_LITE_ENSURE_STATUS(Map(context, reg->builtin_code, reg->version,
-                              nnapi_->android_sdk_version,
+                              target_sdk_version,
                              {context, &builder, node, &model_state_outputs_,
                               &model_state_tfl_inputs_, &feedback_loops_},
                              &nn_op_type));
@ -3755,20 +3855,30 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
      !nnapi->nnapi_exists) {
    return kTfLiteOk;
  }
-  bool is_accelerator_specified = false;
+
+  int target_sdk_version = nnapi->android_sdk_version;
  // For NNAPI 1.2+, check if there is any accelerator available.
-  // If not, don't delegate to NNAPI's CPU reference implementation.
+  // If not, don't delegate to NNAPI's CPU reference implementation unless
+  // it has been specified as target accelerator.
  if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
-    // Check if user specified an acclelerator to use.
-    const char* device_name_ptr = GetOptions(delegate).accelerator_name;
-    if (device_name_ptr) {
-      if (!GetDeviceHandle(context, device_name_ptr)) {
-        return kTfLiteError;
-      } else {
-        // also check if the selected device is not CPU reference impl.
-        const string kNnapiReferenceImplName = "nnapi-reference";
-        is_accelerator_specified = kNnapiReferenceImplName != device_name_ptr;
+    if (ShouldUseTargetDevices(delegate)) {
+      std::vector<ANeuralNetworksDevice*> devices;
+      TF_LITE_ENSURE_STATUS(
+          GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
+
+      if (devices.empty()) {
+        if (StatefulNnApiDelegate::GetOptions(delegate).accelerator_name) {
+          // There was a selected device and it is not available.
+          return kTfLiteError;
+        } else {
+          // Only nnapi-reference is available but was disabled by the delegate
+          // options
+          return kTfLiteOk;
+        }
      }
+
+      TF_LITE_ENSURE_STATUS(GetTargetSdkVersion(
+          context, nnapi, devices, &target_sdk_version, nnapi_errno));
    } else {
      // If no accelerator is specified, only use NNAPI if an accelerator is
      // available. Any available accelerator will make the device_count larger
@ -3791,16 +3901,17 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  TfLiteIntArray* plan;
  TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));

-  int android_sdk_version = NnApiImplementation()->android_sdk_version;
  // Check for every node if it is supported
  for (int node_index : TfLiteIntArrayView(plan)) {
    TfLiteNode* node;
    TfLiteRegistration* registration;
    TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
        context, node_index, &node, &registration));
-    if (NNAPIDelegateKernel::Validate(
-            context, registration->builtin_code, registration->version,
-            android_sdk_version, node, is_accelerator_specified)) {
+    const bool is_accelerator_specified =
+        ShouldUseTargetDevices(delegate, /*exclude_nnapi_reference=*/true);
+    if (NNAPIDelegateKernel::Validate(context, registration->builtin_code,
+                                      registration->version, target_sdk_version,
+                                      node, is_accelerator_specified)) {
      supported_nodes.push_back(node_index);
    }
  }
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
@ -180,6 +180,52 @@ TEST_F(NnApiDeviceSelectionTest, DisallowsCPUBasedOnOptions) {
  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
 }

+TEST_F(NnApiDeviceSelectionTest,
+       DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuEnabled) {
+  // Only nnapi-reference is available on device
+  nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
+    *numDevices = 1;
+    return 0;
+  };
+  nnapi_->ANeuralNetworksDevice_getName =
+      [](const ANeuralNetworksDevice* device, const char** name) -> int {
+    if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
+      *name = "nnapi-reference";
+    }
+    return 0;
+  };
+
+  tflite::StatefulNnApiDelegate::Options options;
+  options.disallow_nnapi_cpu = false;
+  InitWithOptions(options);
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
+}
+
+TEST_F(NnApiDeviceSelectionTest,
+       DoesNotDelegateIfOnlyReferenceDeviceIsAvailable_CpuDisabled) {
+  // Only nnapi-reference is available on device
+  nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
+    *numDevices = 1;
+    return 0;
+  };
+  nnapi_->ANeuralNetworksDevice_getName =
+      [](const ANeuralNetworksDevice* device, const char** name) -> int {
+    if (device == reinterpret_cast<ANeuralNetworksDevice*>(1)) {
+      *name = "nnapi-reference";
+    }
+    return 0;
+  };
+
+  tflite::StatefulNnApiDelegate::Options options;
+  options.disallow_nnapi_cpu = true;
+  InitWithOptions(options);
+  m.Invoke();
+  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
+  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1);
+}
+
 }  // namespace
 }  // namespace tflite

--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@ -288,6 +288,8 @@ class NNAPIDelegateKernel {
  const NnApi* nnapi_;
  // ANN device handle.
  std::vector<ANeuralNetworksDevice*> nnapi_devices_;
+  // Name of the nnapi device, empty if nnapi_devices_ is empty;
+  std::string device_name_;
  // ANN API state.
  std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
  std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
@ -28,6 +28,7 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include "absl/memory/memory.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
 #include "tensorflow/lite/nnapi/nnapi_handler.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"

@ -52,21 +53,22 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
      return open("/dev/zero", O_RDWR);
    };

-    GetDeviceCountReturns<0>();
-    ModelCreateReturns<0>();
-    AddOperandReturns<0>();
-    SetOperandValueReturns<0>();
-    AddOperationReturns<0>();
-    IdentifyInputAndOutputsReturns<0>();
-    RelaxComputationFloatReturns<0>();
-    ModelFinishReturns<0>();
-    MemoryCreateFromFdReturns<0>();
-    CompilationCreateReturns<0>();
-    CompilationFinishReturns<0>();
-    ExecutionCreateReturns<0>();
-    ExecutionSetInputFromMemoryReturns<0>();
-    ExecutionSetOutputFromMemoryReturns<0>();
-    ExecutionComputeReturns<0>();
+    ModelCreateReturns<ANEURALNETWORKS_NO_ERROR>();
+    AddOperandReturns<ANEURALNETWORKS_NO_ERROR>();
+    SetOperandValueReturns<ANEURALNETWORKS_NO_ERROR>();
+    AddOperationReturns<ANEURALNETWORKS_NO_ERROR>();
+    IdentifyInputAndOutputsReturns<ANEURALNETWORKS_NO_ERROR>();
+    RelaxComputationFloatReturns<ANEURALNETWORKS_NO_ERROR>();
+    ModelFinishReturns<ANEURALNETWORKS_NO_ERROR>();
+    MemoryCreateFromFdReturns<ANEURALNETWORKS_NO_ERROR>();
+    CompilationCreateReturns<ANEURALNETWORKS_NO_ERROR>();
+    CompilationCreateForDevicesReturns<ANEURALNETWORKS_NO_ERROR>();
+    CompilationFinishReturns<ANEURALNETWORKS_NO_ERROR>();
+    ExecutionCreateReturns<ANEURALNETWORKS_NO_ERROR>();
+    ExecutionSetInputFromMemoryReturns<ANEURALNETWORKS_NO_ERROR>();
+    ExecutionSetOutputFromMemoryReturns<ANEURALNETWORKS_NO_ERROR>();
+    ExecutionComputeReturns<ANEURALNETWORKS_NO_ERROR>();
+    SetNnapiSupportedDevice("test-device", android_sdk_version);
  }

  ~NnApiMock() { Reset(); }
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
@ -18,6 +18,7 @@ limitations under the License.

 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/test_util.h"
 #include "tensorflow/lite/minimal_logging.h"
@ -1895,7 +1896,7 @@ class BaseActivationsOpModel : public SingleOpModelWithNNAPI {
 public:
  // Most activations don't take any options, so this constructor works for
  // them.
-  BaseActivationsOpModel(BuiltinOperator type, TensorData input) {
+  BaseActivationsOpModel(BuiltinOperator type, const TensorData& input) {
    input_ = AddInput(input);
    if (input.type == TensorType_UINT8) {
      output_ = AddOutput({input.type, {}, 0, 0, 1. / 256});
@ -3031,19 +3032,19 @@ class LSTMOpModel : public SingleOpModelWithNNAPI {
    PopulateTensor(projection_bias_, f);
  }

-  void SetInputLayerNormCoefficients(std::vector<float> f) {
+  void SetInputLayerNormCoefficients(const std::vector<float>& f) {
    PopulateTensor(input_layer_norm_coefficients_, f);
  }

-  void SetForgetLayerNormCoefficients(std::vector<float> f) {
+  void SetForgetLayerNormCoefficients(const std::vector<float>& f) {
    PopulateTensor(forget_layer_norm_coefficients_, f);
  }

-  void SetCellLayerNormCoefficients(std::vector<float> f) {
+  void SetCellLayerNormCoefficients(const std::vector<float>& f) {
    PopulateTensor(cell_layer_norm_coefficients_, f);
  }

-  void SetOutputLayerNormCoefficients(std::vector<float> f) {
+  void SetOutputLayerNormCoefficients(const std::vector<float>& f) {
    PopulateTensor(output_layer_norm_coefficients_, f);
  }

@ -5122,6 +5123,129 @@ TEST(QuantizedPadV2OpTest, Int8AdvancedDynamicValuedTest) {
  AdvancedDynamicValuedTest<int8_t, TensorType_INT8>();
 }

+struct UnsupportedOperationOnDeviceTest
+    : ::tflite::delegate::nnapi::NnApiDelegateMockTest {};
+
+class AcceleratedModel {
+ public:
+  StatefulNnApiDelegate* GetDelegate() { return stateful_delegate_.get(); }
+
+ protected:
+  // build a delegate with a target accelerator name.
+  explicit AcceleratedModel(const std::string& accelerator_name) {
+    StatefulNnApiDelegate::Options options;
+    options.accelerator_name = accelerator_name.c_str();
+    stateful_delegate_.reset(new StatefulNnApiDelegate(options));
+  }
+
+  // build a delegate with no target accelerator name, can disable the NNAPI CPU
+  // fallback implementation using the disallow_nnapi_cpu flag.
+  explicit AcceleratedModel(bool disallow_nnapi_cpu) {
+    StatefulNnApiDelegate::Options options;
+    options.disallow_nnapi_cpu = disallow_nnapi_cpu;
+    stateful_delegate_.reset(new StatefulNnApiDelegate(options));
+  }
+
+ private:
+  std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
+};
+
+class ArgMaxOpModel : public SingleOpModel, public AcceleratedModel {
+ public:
+  ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
+                int axis_value, TensorType output_type, const char* device_name)
+      : SingleOpModel(), AcceleratedModel(device_name) {
+    Init(input_shape, input_type, axis_value, output_type);
+  }
+
+  ArgMaxOpModel(std::initializer_list<int> input_shape, TensorType input_type,
+                int axis_value, TensorType output_type, bool disallow_nnapi_cpu)
+      : SingleOpModel(), AcceleratedModel(disallow_nnapi_cpu) {
+    Init(input_shape, input_type, axis_value, output_type);
+  }
+
+  int input() const { return input_; }
+
+ protected:
+  int input_;
+  int axis_;
+  int output_;
+
+  void Init(std::initializer_list<int> input_shape, TensorType input_type,
+            int axis_value, TensorType output_type) {
+    auto* delegate = GetDelegate();
+    this->SetApplyDelegate([delegate](Interpreter* interpreter) {
+      interpreter->ModifyGraphWithDelegate(delegate);
+    });
+    input_ = AddInput(input_type);
+    axis_ = AddConstInput(TensorType_INT32, {axis_value}, {1});
+    output_ = AddOutput(output_type);
+
+    SetBuiltinOp(BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions,
+                 CreateArgMaxOptions(builder_, output_type).Union());
+    BuildInterpreter({input_shape, {1}});
+  }
+};
+
+TEST_F(UnsupportedOperationOnDeviceTest,
+       ShouldUseDeviceFeatureLevelWhenSpecifyingTargetDevice) {
+  nnapi_mock_->SetAndroidSdkVersion(29);
+  nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
+
+  ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
+                  TensorType_INT32, "test-device");
+  m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
+  m.Invoke();
+
+  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
+      << "Expected Max not to be delegates since it not supported before NNAPI "
+         "1.2 and device declares to support only NNAPI 1.1.";
+
+  nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
+
+  ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
+                   TensorType_INT32, "test-device");
+  m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
+  m1.Invoke();
+
+  EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
+      << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
+}
+
+TEST_F(UnsupportedOperationOnDeviceTest,
+       ShouldUseDeviceFeatureLevelWhenDisablingCPU) {
+  nnapi_mock_->SetAndroidSdkVersion(29);
+  nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/28);
+
+  ArgMaxOpModel m({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
+                  TensorType_INT32, /*disallow_nnapi_cpu=*/true);
+  m.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
+  m.Invoke();
+
+  EXPECT_EQ(m.CountOpsExecutedByCpuKernel(), 1)
+      << "Expected Max not to be delegates since it not supported before NNAPI "
+         "1.2 and device declares to support only NNAPI 1.1.";
+
+  ArgMaxOpModel m1({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
+                   TensorType_INT32, /*disallow_nnapi_cpu=*/false);
+  m1.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
+  m1.Invoke();
+
+  EXPECT_EQ(m1.CountOpsExecutedByCpuKernel(), 0)
+      << "Expected Max op to be delegated since we enabled NNAPI CPU "
+         "implementation.";
+
+  nnapi_mock_->SetNnapiSupportedDevice("test-device", /* feature_level=*/29);
+
+  ArgMaxOpModel m2({1, 1, 1, 4}, TensorType_FLOAT32, /*axis_value=*/3,
+                   TensorType_INT32, /*disallow_nnapi_cpu=*/true);
+  m2.PopulateTensor<float>(m.input(), {0.1, 0.9, 0.7, 0.3});
+  m2.Invoke();
+
+  EXPECT_EQ(m2.CountOpsExecutedByCpuKernel(), 0)
+      << "Expected Max op to be delegated since it is supported in NNAPI 1.2.";
+}
+
 }  // namespace
 }  // namespace tflite

--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@ -295,6 +295,22 @@ int CountPartitionsDelegatedTo(Interpreter* interpreter,
  return result;
 }

+// Returns the number of nodes that will be executed on the CPU
+int CountPartitionsExecutedByCpuKernel(const Interpreter* interpreter) {
+  int result = 0;
+  for (int node_idx : interpreter->execution_plan()) {
+    TfLiteNode node;
+    TfLiteRegistration reg;
+    std::tie(node, reg) = *(interpreter->node_and_registration(node_idx));
+
+    if (node.delegate == nullptr) {
+      ++result;
+    }
+  }
+
+  return result;
+}
+
 }  // namespace

 void SingleOpModel::ExpectOpAcceleratedWithNnapi(const std::string& test_id) {
@ -322,6 +338,10 @@ void SingleOpModel::ValidateAcceleration() {
  }
 }

+int SingleOpModel::CountOpsExecutedByCpuKernel() {
+  return CountPartitionsExecutedByCpuKernel(interpreter_.get());
+}
+
 SingleOpModel::~SingleOpModel() { ValidateAcceleration(); }

 }  // namespace tflite
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@ -377,6 +377,7 @@ class SingleOpModel {
  // Enables NNAPI delegate application during interpreter creation.
  static void SetForceUseNnapi(bool use_nnapi);
  static bool GetForceUseNnapi();
+  int CountOpsExecutedByCpuKernel();

 protected:
  int32_t GetTensorSize(int index) const;
--- a/tensorflow/lite/nnapi/nnapi_handler.cc
+++ b/tensorflow/lite/nnapi/nnapi_handler.cc
@ -21,6 +21,16 @@ limitations under the License.
 namespace tflite {
 namespace nnapi {

+// static
+const char NnApiHandler::kNnapiReferenceDeviceName[] = "nnapi-reference";
+// static
+const int NnApiHandler::kNnapiReferenceDevice = 1;
+// static
+const int NnApiHandler::kNnapiDevice = 2;
+
+char* NnApiHandler::nnapi_device_name_ = nullptr;
+int NnApiHandler::nnapi_device_feature_level_;
+
 const NnApi* NnApiPassthroughInstance() {
  static const NnApi orig_nnapi_copy = *NnApiImplementation();
  return &orig_nnapi_copy;
@ -40,5 +50,73 @@ void NnApiHandler::Reset() {
  *nnapi_ = *NnApiPassthroughInstance();
 }

+void NnApiHandler::SetAndroidSdkVersion(int version) {
+  nnapi_->android_sdk_version = version;
+}
+
+void NnApiHandler::SetDeviceName(const std::string& name) {
+  delete[] nnapi_device_name_;
+  nnapi_device_name_ = new char[name.size() + 1];
+  std::strcpy(nnapi_device_name_, name.c_str());  // NOLINT
+}
+
+void NnApiHandler::GetDeviceNameReturnsName(const std::string& name) {
+  NnApiHandler::SetDeviceName(name);
+  GetDeviceNameReturns<0>();
+}
+
+void NnApiHandler::SetNnapiSupportedDevice(const std::string& name,
+                                           int feature_level) {
+  NnApiHandler::SetDeviceName(name);
+  nnapi_device_feature_level_ = feature_level;
+
+  GetDeviceCountReturnsCount<2>();
+  nnapi_->ANeuralNetworks_getDevice =
+      [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
+    if (devIndex > 1) {
+      return ANEURALNETWORKS_BAD_DATA;
+    }
+
+    if (devIndex == 1) {
+      *device =
+          reinterpret_cast<ANeuralNetworksDevice*>(NnApiHandler::kNnapiDevice);
+    } else {
+      *device = reinterpret_cast<ANeuralNetworksDevice*>(
+          NnApiHandler::kNnapiReferenceDevice);
+    }
+    return ANEURALNETWORKS_NO_ERROR;
+  };
+  nnapi_->ANeuralNetworksDevice_getName =
+      [](const ANeuralNetworksDevice* device, const char** name) -> int {
+    if (device ==
+        reinterpret_cast<ANeuralNetworksDevice*>(NnApiHandler::kNnapiDevice)) {
+      *name = NnApiHandler::nnapi_device_name_;
+      return ANEURALNETWORKS_NO_ERROR;
+    }
+    if (device == reinterpret_cast<ANeuralNetworksDevice*>(
+                      NnApiHandler::kNnapiReferenceDevice)) {
+      *name = NnApiHandler::kNnapiReferenceDeviceName;
+      return ANEURALNETWORKS_NO_ERROR;
+    }
+
+    return ANEURALNETWORKS_BAD_DATA;
+  };
+  nnapi_->ANeuralNetworksDevice_getFeatureLevel =
+      [](const ANeuralNetworksDevice* device, int64_t* featureLevel) -> int {
+    if (device ==
+        reinterpret_cast<ANeuralNetworksDevice*>(NnApiHandler::kNnapiDevice)) {
+      *featureLevel = NnApiHandler::nnapi_device_feature_level_;
+      return ANEURALNETWORKS_NO_ERROR;
+    }
+    if (device == reinterpret_cast<ANeuralNetworksDevice*>(
+                      NnApiHandler::kNnapiReferenceDevice)) {
+      *featureLevel = 1000;
+      return ANEURALNETWORKS_NO_ERROR;
+    }
+
+    return ANEURALNETWORKS_BAD_DATA;
+  };
+}
+
 }  // namespace nnapi
 }  // namespace tflite
--- a/tensorflow/lite/nnapi/nnapi_handler.h
+++ b/tensorflow/lite/nnapi/nnapi_handler.h
@ -46,15 +46,49 @@ class NnApiHandler {
  template <int Value>
  void GetDeviceCountReturns() {
    nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
-      *numDevices = 2;
+      *numDevices = 1;
      return Value;
    };
  }

+  template <int DeviceCount>
+  void GetDeviceCountReturnsCount() {
+    nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
+      *numDevices = DeviceCount;
+      return ANEURALNETWORKS_NO_ERROR;
+    };
+  }
+
  void StubGetDeviceCountWith(int(stub)(uint32_t*)) {
    nnapi_->ANeuralNetworks_getDeviceCount = stub;
  }

+  template <int Value>
+  void GetDeviceReturns() {
+    nnapi_->ANeuralNetworks_getDevice =
+        [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
+      *device =
+          reinterpret_cast<ANeuralNetworksDevice*>(NnApiHandler::kNnapiDevice);
+      return Value;
+    };
+  }
+
+  template <int Value>
+  void GetDeviceNameReturns() {
+    nnapi_->ANeuralNetworksDevice_getName =
+        [](const ANeuralNetworksDevice* device, const char** name) -> int {
+      *name = NnApiHandler::nnapi_device_name_;
+      return Value;
+    };
+  }
+
+  void GetDeviceNameReturnsName(const std::string& name);
+
+  // Configure all the functions related to device browsing to support
+  // a device with the given name and the cpu fallback nnapi-reference.
+  // The extra device will return support the specified feature level
+  void SetNnapiSupportedDevice(const std::string& name, int feature_level = 29);
+
  template <int Value>
  void ModelCreateReturns() {
    nnapi_->ANeuralNetworksModel_create = [](ANeuralNetworksModel** model) {
@ -126,6 +160,17 @@ class NnApiHandler {
        };
  }

+  template <int Value>
+  void CompilationCreateForDevicesReturns() {
+    nnapi_->ANeuralNetworksCompilation_createForDevices =
+        [](ANeuralNetworksModel* model,
+           const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
+           ANeuralNetworksCompilation** compilation) {
+          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
+          return Value;
+        };
+  }
+
  template <int Value>
  void CompilationFinishReturns() {
    nnapi_->ANeuralNetworksCompilation_finish =
@ -165,10 +210,22 @@ class NnApiHandler {
        [](ANeuralNetworksExecution* execution) { return Value; };
  }

+  void SetAndroidSdkVersion(int version);
+
 protected:
  explicit NnApiHandler(NnApi* nnapi) : nnapi_(nnapi) { DCHECK(nnapi); }

  NnApi* nnapi_;
+
+  static const char kNnapiReferenceDeviceName[];
+  static const int kNnapiReferenceDevice;
+  static const int kNnapiDevice;
+
+  static void SetDeviceName(const std::string& name);
+
+ private:
+  static char* nnapi_device_name_;
+  static int nnapi_device_feature_level_;
 };

 // Returns a pointer to an unaltered instance of NNAPI. Is intended