Disallow use of NNAPI CPU implementation by default on Android 10 and later. TFLite's own implementation typically performs better.

PiperOrigin-RevId: 339195098 Change-Id: If0d09d60c9e6fb7288abcf0b4f3f8b732c58e724
2020-10-27 00:13:05 -07:00 · 2020-10-27 00:13:05 -07:00 · 9c38a2aeb7
commit 9c38a2aeb7
parent 51e1e188a2
10 changed files with 42 additions and 17 deletions
--- a/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
+++ b/tensorflow/lite/delegates/nnapi/java/src/main/java/org/tensorflow/lite/nnapi/NnApiDelegate.java
@ -76,8 +76,8 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
    }

    /**
-     * Configure the location to be used to store model compilation cache entries. If either
-     * {@code cacheDir} or {@code modelToken} parameters are unset NNAPI caching will be disabled.
+     * Configure the location to be used to store model compilation cache entries. If either {@code
+     * cacheDir} or {@code modelToken} parameters are unset NNAPI caching will be disabled.
     *
     * <p>Only effective on Android 10 (API level 29) and above.
     */
@ -151,7 +151,7 @@ public class NnApiDelegate implements Delegate, AutoCloseable {
            /*overrideDisallowCpu=*/ options.useNnapiCpu != null,
            /*disallowCpuValue=*/ options.useNnapiCpu != null
                ? !options.useNnapiCpu.booleanValue()
-                : false,
+                : true,
            options.allowFp16 != null ? options.allowFp16 : false);
  }

--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@ -565,14 +565,18 @@ TfLiteStatus GetTargetSdkVersion(
 // If exclude_nnapi_reference is true this method will return false if the
 // accelerator_name in the delegate options is equal to "nnapi-reference"
 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
+                            const NnApi* nnapi,
                            bool exclude_nnapi_reference = false) {
  const char* device_name_ptr = delegate_options.accelerator_name;
  std::string nnapi_cpu("nnapi-reference");
  bool has_selected_accelerator = device_name_ptr != nullptr;
  if (exclude_nnapi_reference && has_selected_accelerator) {
-    has_selected_accelerator = nnapi_cpu != device_name_ptr;
+    if (nnapi_cpu == device_name_ptr) return false;
  }
-  return (delegate_options.disallow_nnapi_cpu) || has_selected_accelerator;
+  return (delegate_options.disallow_nnapi_cpu &&
+          nnapi->android_sdk_version >=
+              delegate::nnapi::kMinSdkVersionForNNAPI12) ||
+         has_selected_accelerator;
 }

 // Fills the given result vector with the list of devices the given delegate
@ -3479,7 +3483,7 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
  const auto delegate_options =
      StatefulNnApiDelegate::GetOptions(params->delegate);
  if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
-      ShouldUseTargetDevices(delegate_options)) {
+      ShouldUseTargetDevices(delegate_options, nnapi_)) {
    TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
                                           nnapi_errno, &nnapi_devices_));

@ -4809,7 +4813,7 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
  // If not, don't delegate to NNAPI's CPU reference implementation unless
  // it has been specified as target accelerator.
  if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
-    if (ShouldUseTargetDevices(delegate_options)) {
+    if (ShouldUseTargetDevices(delegate_options, nnapi)) {
      std::vector<ANeuralNetworksDevice*> devices;
      TF_LITE_ENSURE_STATUS(
          GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
@ -4849,7 +4853,7 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,

  // Check for every node if it is supported
  const bool is_accelerator_specified = ShouldUseTargetDevices(
-      delegate_options, /*exclude_nnapi_reference=*/true);
+      delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
  for (int node_index : TfLiteIntArrayView(plan)) {
    TfLiteNode* node;
    TfLiteRegistration* registration;
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@ -79,7 +79,7 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
    // above. The NNAPI CPU typically performs less well than built-in TfLite
    // kernels, but allowing CPU allows partial acceleration of models. If this
    // is set to true, NNAPI is only used if the whole model is accelerated.
-    bool disallow_nnapi_cpu = false;
+    bool disallow_nnapi_cpu = true;

    // Specifies the max number of partitions to delegate. A value <= 0 means
    // no limit.
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_device_selection_test.cc
@ -114,7 +114,7 @@ struct NnApiDeviceSelectionTest
  FloatAddOpModel m;
 };

-TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWithoutFlags) {
+TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWhenCpuAllowed) {
  nnapi_mock_->StubCompilationCreateForDevicesWith(
      [](ANeuralNetworksModel* model,
         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
@ -124,6 +124,7 @@ TEST_F(NnApiDeviceSelectionTest, DoesntSetDevicesWithoutFlags) {
      });

  tflite::StatefulNnApiDelegate::Options options;
+  options.disallow_nnapi_cpu = false;
  InitWithOptions(options);
  m.Invoke();
  EXPECT_EQ(m.GetCompilationStatus(), kTfLiteOk);
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_errno_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_errno_test.cc
@ -16,6 +16,7 @@ limitations under the License.

 #include <gtest/gtest.h>
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/core/subgraph.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
 #include "tensorflow/lite/interpreter.h"
@ -30,7 +31,8 @@ namespace {
 class SingleOpModelWithNNAPI : public SingleOpModel {
 public:
  explicit SingleOpModelWithNNAPI(const NnApi* nnapi) {
-    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
+    options_.disallow_nnapi_cpu = false;
+    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options_));
    this->SetDelegate(stateful_delegate_.get());
  }

@ -42,6 +44,7 @@ class SingleOpModelWithNNAPI : public SingleOpModel {

 private:
  std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
+  StatefulNnApiDelegate::Options options_;
 };

 class FloatAddOpModel : public SingleOpModelWithNNAPI {
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_signed_quantization_test.cc
@ -41,7 +41,8 @@ class SingleOpModelWithNNAPI : public SingleOpModel {
 public:
  SingleOpModelWithNNAPI() = default;
  void Init(const NnApi* nnapi) {
-    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi));
+    options_.disallow_nnapi_cpu = false;
+    stateful_delegate_.reset(new StatefulNnApiDelegate(nnapi, options_));
    SetDelegate(stateful_delegate_.get());
  }

@ -54,6 +55,7 @@ class SingleOpModelWithNNAPI : public SingleOpModel {

 protected:
  std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
+  StatefulNnApiDelegate::Options options_;
  TfLiteStatus compilation_status_;
 };

--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
@ -47,11 +47,17 @@ MATCHER(QuantizedNear, "") {

 class SingleOpModelWithNNAPI : public SingleOpModel {
 public:
-  SingleOpModelWithNNAPI() { SetDelegate(NnApiDelegate()); }
+  SingleOpModelWithNNAPI() {
+    options_.disallow_nnapi_cpu = false;
+    stateful_delegate_.reset(new StatefulNnApiDelegate(options_));
+    SetDelegate(stateful_delegate_.get());
+  }

  explicit SingleOpModelWithNNAPI(
      const StatefulNnApiDelegate::Options& options) {
-    stateful_delegate_.reset(new StatefulNnApiDelegate(options));
+    options_ = options;
+    options_.disallow_nnapi_cpu = false;
+    stateful_delegate_.reset(new StatefulNnApiDelegate(options_));
    SetDelegate(stateful_delegate_.get());
  }

@ -107,6 +113,7 @@ class SingleOpModelWithNNAPI : public SingleOpModel {
 private:
  // Stateful NNAPI delegate. This is valid only if the state-ful constructor is
  // used.
+  StatefulNnApiDelegate::Options options_;
  std::unique_ptr<StatefulNnApiDelegate> stateful_delegate_;
 };

--- a/tensorflow/lite/kernels/test_main.cc
+++ b/tensorflow/lite/kernels/test_main.cc
@ -34,6 +34,7 @@ void InitKernelTest(int* argc, char** argv) {
    auto* params = delegate_providers->MutableParams();
    if (!params->HasValueSet<std::string>("nnapi_accelerator_name")) {
      params->Set<std::string>("nnapi_accelerator_name", "nnapi-reference");
+      params->Set("disable_nnapi_cpu", false);
    }
  }
 }
--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@ -230,6 +230,13 @@ TfLiteStatus SingleOpModel::ApplyDelegate() {
    ++num_applied_delegates_;
  } else {
    auto* delegate_providers = tflite::KernelTestDelegateProviders::Get();
+    // Most TFLite NNAPI delegation tests have been written to run against the
+    // NNAPI CPU path. We'll enable that for tests. However, need to first check
+    // if the parameter is present - it will not be if the NNAPI delegate
+    // provider is not linked into the test.
+    if (delegate_providers->ConstParams().HasParam("disable_nnapi_cpu")) {
+      delegate_providers->MutableParams()->Set("disable_nnapi_cpu", false);
+    }
    for (auto& one : delegate_providers->CreateAllDelegates()) {
      // The raw ptr always points to the actual TfLiteDegate object.
      auto* delegate_raw_ptr = one.get();
--- a/tensorflow/lite/tools/delegates/nnapi_delegate_provider.cc
+++ b/tensorflow/lite/tools/delegates/nnapi_delegate_provider.cc
@ -33,7 +33,7 @@ class NnapiDelegateProvider : public DelegateProvider {
    default_params_.AddParam("nnapi_accelerator_name",
                             ToolParam::Create<std::string>(""));
    default_params_.AddParam("disable_nnapi_cpu",
-                             ToolParam::Create<bool>(false));
+                             ToolParam::Create<bool>(true));
    default_params_.AddParam("nnapi_allow_fp16",
                             ToolParam::Create<bool>(false));
  }
@ -104,8 +104,8 @@ TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate(
        params.Get<std::string>("nnapi_accelerator_name");
    if (!accelerator_name.empty()) {
      options.accelerator_name = accelerator_name.c_str();
-    } else if (params.Get<bool>("disable_nnapi_cpu")) {
-      options.disallow_nnapi_cpu = true;
+    } else {
+      options.disallow_nnapi_cpu = params.Get<bool>("disable_nnapi_cpu");
    }

    if (params.Get<bool>("nnapi_allow_fp16")) {