Introduces a new experimental package that:

- Defines a schema for configuring delegates - Defines a C++ plugin mechanism using the schema, so that code can support configuring arbitrary delegates without a build-time dependency PiperOrigin-RevId: 316678829 Change-Id: I36ce8a6175b550d83dfe9cf1f237a04173fb8b16
2020-06-16 07:41:44 -07:00 · 2020-06-16 07:41:44 -07:00 · cb60e1c14b
parent 426f62af5e
commit cb60e1c14b
10 changed files with 1021 additions and 0 deletions
--- a/tensorflow/lite/experimental/acceleration/configuration/BUILD
+++ b/tensorflow/lite/experimental/acceleration/configuration/BUILD
@ -0,0 +1,165 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library", "flatbuffer_java_library", "flatc_path")
+
+package(
+    default_visibility = [
+        "//visibility:public",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+genrule(
+    name = "configuration_schema",
+    srcs = ["configuration.proto"],
+    outs = ["configuration.fbs"],
+    # We rename the namespace since otherwise the proto classes and flatbuffer
+    # classes would have the same names.
+    cmd = """
+    $(location {}) --proto -o $(@D) $(location :configuration.proto)
+    perl -p -i -e 's/tflite.proto/tflite/' $(@D)/configuration.fbs
+    """.format(flatc_path),
+    tools = [
+        flatc_path,
+    ],
+)
+
+genrule(
+    name = "configuration_fbs_contents_cc",
+    srcs = ["configuration.fbs"],
+    outs = ["configuration_fbs_contents-inl.h"],
+    cmd = """
+      echo 'constexpr char configuration_fbs_contents[] = R"Delimiter(' > $(@)
+      cat < $(<) >> $(@)
+      echo ')Delimiter";' >> $(@)
+    """,
+)
+
+proto_library(
+    name = "configuration_proto",
+    srcs = [
+        "configuration.proto",
+    ],
+)
+
+cc_proto_library(
+    name = "configuration_cc_proto",
+    deps = [":configuration_proto"],
+)
+
+java_lite_proto_library(
+    name = "configuration_java_proto_lite",
+    deps = [":configuration_proto"],
+)
+
+flatbuffer_cc_library(
+    name = "configuration_fbs",
+    srcs = [":configuration.fbs"],
+)
+
+flatbuffer_java_library(
+    name = "configuration_fbs_java",
+    srcs = [":configuration.fbs"],
+)
+
+cc_library(
+    name = "proto_to_flatbuffer",
+    srcs = [
+        "configuration_fbs_contents-inl.h",
+        "proto_to_flatbuffer.cc",
+    ],
+    hdrs = ["proto_to_flatbuffer.h"],
+    deps = [
+        ":configuration_cc_proto",
+        ":configuration_fbs",
+        "//tensorflow/core/platform:protobuf",
+        "//tensorflow/lite:minimal_logging",
+        "@flatbuffers",
+    ],
+)
+
+cc_library(
+    name = "delegate_registry",
+    srcs = ["delegate_registry.cc"],
+    hdrs = ["delegate_registry.h"],
+    deps = [
+        ":configuration_fbs",
+        "//tensorflow/lite/c:common",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "nnapi_plugin",
+    srcs = ["nnapi_plugin.cc"],
+    deps = [
+        ":configuration_fbs",
+        ":delegate_registry",
+        "//tensorflow/lite/delegates/nnapi:nnapi_delegate",
+        "@com_google_absl//absl/memory",
+    ],
+    alwayslink = 1,  # For registration to always run.
+)
+
+cc_test(
+    name = "nnapi_plugin_test",
+    srcs = ["nnapi_plugin_test.cc"],
+    tags = [
+        "no_mac",
+        "no_windows",
+        "tflite_not_portable_ios",
+    ],
+    deps = [
+        ":configuration_fbs",
+        ":delegate_registry",
+        ":nnapi_plugin",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/delegates/nnapi:nnapi_delegate",
+        "//tensorflow/lite/delegates/nnapi:nnapi_delegate_mock_test",
+        "//tensorflow/lite/kernels:test_util",
+        "@com_google_googletest//:gtest_main",
+        "@flatbuffers",
+    ],
+)
+
+cc_library(
+    name = "hexagon_plugin",
+    srcs = ["hexagon_plugin.cc"],
+    deps = [
+        ":configuration_fbs",
+        ":delegate_registry",
+        "@com_google_absl//absl/memory",
+    ] + select({
+        "//tensorflow:android": [
+            "//tensorflow/lite/delegates/hexagon:hexagon_delegate",
+        ],
+        "//conditions:default": [],
+    }),
+    alwayslink = 1,  # For registration to always run.
+)
+
+cc_library(
+    name = "gpu_plugin",
+    srcs = ["gpu_plugin.cc"],
+    deps = [
+        ":configuration_fbs",
+        ":delegate_registry",
+        "//tensorflow/lite/delegates/gpu:delegate",
+        "@com_google_absl//absl/memory",
+    ],
+    alwayslink = 1,  # For registration to always run.
+)
--- a/tensorflow/lite/experimental/acceleration/configuration/configuration.proto
+++ b/tensorflow/lite/experimental/acceleration/configuration/configuration.proto
@ -0,0 +1,208 @@
+// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This schema defines how to configure TFLite for delegation. These
+// definitions can be used in multiple ways: as output of a compatibility list,
+// in benchmarking tools and to decouple delegate instantiation from code.
+//
+// The schema is work-in-progress, covering the most broadly used delegates and
+// options.
+
+syntax = "proto2";
+
+package tflite.proto;
+
+// ExecutionPreference is used to match accelerators against the preferences of
+// the current application or usecase. Some of the values here can appear both
+// in the compatibility list and as input, some only as input.
+//
+// These are separate from NNAPIExecutionPreference - the compatibility list
+// design doesn't assume a one-to-one mapping between which usecases
+// compatibility list entries have been developed for and what settings are used
+// for NNAPI.
+enum ExecutionPreference {
+  // Match any selected preference. Whitelist (semantically - value is same as
+  // on input).
+  ANY = 0;
+  // Match low latency preference. Both compatibility list and input.
+  LOW_LATENCY = 1;
+  // Math low power preference. Both compatibility list and input.
+  LOW_POWER = 2;
+  // Never accelerate. Can be used for input to compatibility list or for
+  // standalone Acceleration configuration.
+  FORCE_CPU = 3;
+}
+
+// TFLite delegate to use.
+enum Delegate {
+  NONE = 0;
+  NNAPI = 1;
+  GPU = 2;
+  HEXAGON = 3;
+  XNNPACK = 4;
+  // TODO(b/157893534): Support exposing edgetpu tflite delegate creation
+  // options.
+  EDGETPU = 5;
+}
+
+enum NNAPIExecutionPreference {
+  // Undefined.
+  UNDEFINED = 0;
+  // Prefer executing in a way that minimizes battery drain.
+  NNAPI_LOW_POWER = 1;
+  // Prefer returning a single answer as fast as possible, even if this causes
+  // more power consumption.
+  NNAPI_FAST_SINGLE_ANSWER = 2;
+  // Prefer maximizing the throughput of successive frames, for example when
+  // processing successive frames coming from the camera.
+  NNAPI_SUSTAINED_SPEED = 3;
+}
+
+// One possible acceleration configuration.
+message ComputeSettings {
+  // Which preference to use this accelerator for.
+  optional ExecutionPreference preference = 1;
+  // How to configure TFLite
+  optional TFLiteSettings tflite_settings = 2;
+  // Identifiers to use for instrumentation and telemetry.
+  optional string model_namespace_for_statistics = 3;
+  optional string model_identifier_for_statistics = 4;
+}
+
+// NNAPI delegate settings.
+message NNAPISettings {
+  // Which instance (NNAPI accelerator) to use. One driver may provide several
+  // accelerators (though a driver may also hide several back-ends behind one
+  // name, at the choice of the driver vendor).
+  // Note that driver introspection is only available in Android Q and later.
+  optional string accelerator_name = 1;
+
+  // NNAPI model compilation caching settings to be passed to
+  // tflite::StatefulNnApiDelegate
+  optional string cache_directory = 2;
+  optional string model_token = 3;
+
+  // NNAPI execution preference to pass. See
+  // https://developer.android.com/ndk/reference/group/neural-networks.html
+  optional NNAPIExecutionPreference execution_preference = 4;
+
+  // Number of instances to cache for the same model (for input size
+  // changes). This is mandatory for getting reasonable performance in that
+  // case.
+  optional int32 no_of_nnapi_instances_to_cache = 5;
+
+  // Whether to automatically fall back to TFLite CPU path.
+  optional FallbackSettings fallback_settings = 6;
+
+  // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
+  // 10+ when an accelerator name is not specified. The NNAPI CPU typically
+  // performs less well than the TfLite built-in kernels; but allowing allows a
+  // model to be partially accelerated which may be a win.
+  optional bool allow_nnapi_cpu_on_android_10_plus = 7;
+}
+
+// Which GPU backend to select. Default behaviour on Android is to try OpenCL
+// and if it's not available fall back to OpenGL.
+enum GPUBackend {
+  UNSET = 0;
+  OPENCL = 1;
+  OPENGL = 2;
+  // Not yet supported.
+  // VULKAN = 3;
+  // METAL = 4;
+}
+
+// GPU Delegate settings.
+//
+// See
+// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
+message GPUSettings {
+  optional bool is_precision_loss_allowed = 1;
+  optional bool enable_quantized_inference = 2 [default = true];
+  optional GPUBackend force_backend = 3;
+  // TODO(b/152019007): add remaining options.
+}
+
+// Hexagon Delegate settings.
+//
+// See
+// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
+message HexagonSettings {
+  optional int32 debug_level = 1;
+  optional int32 powersave_level = 2;
+  optional bool print_graph_profile = 3;
+  optional bool print_graph_debug = 4;
+}
+
+// XNNPack Delegate settings.
+//
+// See
+// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
+message XNNPackSettings {
+  optional int32 num_threads = 1;
+}
+
+message CPUSettings {
+  optional int32 num_threads = 1;
+}
+
+// How to configure TFLite.
+message TFLiteSettings {
+  // Which delegate to use.
+  optional Delegate delegate = 1;
+
+  // How to configure the chosen delegate.
+  // (In principle we would like to use 'oneof', but flatc turns that into an
+  // nested anonymous table rather than a union. See
+  // https://github.com/google/flatbuffers/issues/4628).
+  optional NNAPISettings nnapi_settings = 2;
+  optional GPUSettings gpu_settings = 3;
+  optional HexagonSettings hexagon_settings = 4;
+  optional XNNPackSettings xnnpack_settings = 5;
+
+  // How to configure CPU execution.
+  optional CPUSettings cpu_settings = 6;
+
+  // Shared delegation settings.
+  optional int32 max_delegated_partitions = 7;
+}
+
+// Whether to automatically fallback to TFLite CPU path on delegation errors.
+//
+// Typically fallback is enabled in production use but disabled in tests and
+// benchmarks to ensure they test the intended path.
+message FallbackSettings {
+  // Whether to allow automatically falling back to TfLite CPU path on
+  // compilation failure. Default is not allowing automatic fallback.
+  //
+  // This is useful in naive production usecases where the caller would prefer
+  // for the model to run even if it's not accelerated. More advanced users will
+  // implement fallback themselves; e.g., by using a different model on CPU.
+  //
+  // Note that compilation errors may occur either at initial
+  // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
+  // resizing.
+  optional bool allow_automatic_fallback_on_compilation_error = 7;
+  // Whether to allow automatically falling back to TfLite CPU path on
+  // execution error. Default is not allowing automatic fallback.
+  //
+  // Experimental, use with care (only when you have complete control over the
+  // client code).
+  //
+  // The caveat above for compilation error holds.  Additionally, execution-time
+  // errors are harder to handle automatically as they require invalidating the
+  // TfLite interpreter which most client code has not been designed to deal
+  // with.
+  optional bool allow_automatic_fallback_on_execution_error = 8;
+}
--- a/tensorflow/lite/experimental/acceleration/configuration/delegate_registry.cc
+++ b/tensorflow/lite/experimental/acceleration/configuration/delegate_registry.cc
@ -0,0 +1,60 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
+
+#include "absl/synchronization/mutex.h"
+
+namespace tflite {
+namespace delegates {
+
+void DelegatePluginRegistry::RegisterImpl(
+    const std::string& name,
+    std::function<
+        std::unique_ptr<DelegatePluginInterface>(const TFLiteSettings&)>
+        creator_function) {
+  absl::MutexLock lock(&mutex_);
+  factories_[name] = creator_function;
+}
+
+std::unique_ptr<DelegatePluginInterface> DelegatePluginRegistry::CreateImpl(
+    const std::string& name, const TFLiteSettings& settings) {
+  absl::MutexLock lock(&mutex_);
+  auto it = factories_.find(name);
+  if (it != factories_.end()) {
+    return it->second(settings);
+  } else {
+    return nullptr;
+  }
+}
+
+DelegatePluginRegistry* DelegatePluginRegistry::GetSingleton() {
+  static auto* instance = new DelegatePluginRegistry();
+  return instance;
+}
+
+std::unique_ptr<DelegatePluginInterface> DelegatePluginRegistry::CreateByName(
+    const std::string& name, const TFLiteSettings& settings) {
+  auto* const instance = DelegatePluginRegistry::GetSingleton();
+  return instance->CreateImpl(name, settings);
+}
+
+DelegatePluginRegistry::Register::Register(const std::string& name,
+                                           CreatorFunction creator_function) {
+  auto* const instance = DelegatePluginRegistry::GetSingleton();
+  instance->RegisterImpl(name, creator_function);
+}
+
+}  // namespace delegates
+}  // namespace tflite
--- a/tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h
+++ b/tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h
@ -0,0 +1,95 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_CONFIGURATION_DELEGATE_REGISTRY_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_CONFIGURATION_DELEGATE_REGISTRY_H_
+
+#include <memory>
+#include <unordered_map>
+
+#include "absl/synchronization/mutex.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
+
+// Defines an interface for TFLite delegate plugins.
+//
+// The acceleration library aims to support all TFLite delegates based on
+// configuration expressed as data (flatbuffers). However, consumers tend to
+// care about size and also use a subset of delegates. Hence we don't want to
+// statically build against all delegates.
+//
+// This interface allows plugins to handle specific delegates.
+//
+// Goal of this interface is not to abstract away all the differences between
+// delegates. The goal is only to avoid static linking.
+//
+// Note to implementers: this interface may change if new delegates don't fit
+// into the same design.
+namespace tflite {
+namespace delegates {
+
+// Same w/ Interpreter::TfLiteDelegatePtr to avoid pulling
+// tensorflow/lite/interpreter.h dependency
+using TfLiteDelegatePtr =
+    std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
+
+class DelegatePluginInterface {
+ public:
+  virtual TfLiteDelegatePtr Create() = 0;
+  virtual int GetDelegateErrno(TfLiteDelegate* from_delegate) = 0;
+  virtual ~DelegatePluginInterface() = default;
+};
+
+// A stripped-down registry that allows delegate plugins to be created by name.
+//
+// Limitations:
+// - Doesn't allow deregistration.
+// - Doesn't check for duplication registration.
+//
+class DelegatePluginRegistry {
+ public:
+  typedef std::function<std::unique_ptr<DelegatePluginInterface>(
+      const TFLiteSettings&)>
+      CreatorFunction;
+  // Returns a DelegatePluginInterface registered with `name` or nullptr if no
+  // matching plugin found.
+  // TFLiteSettings is per-plugin, so that the corresponding delegate options
+  // data lifetime is maintained.
+  static std::unique_ptr<DelegatePluginInterface> CreateByName(
+      const std::string& name, const TFLiteSettings& settings);
+
+  // Struct to be statically allocated for registration.
+  struct Register {
+    Register(const std::string& name, CreatorFunction creator_function);
+  };
+
+ private:
+  void RegisterImpl(const std::string& name, CreatorFunction creator_function);
+  std::unique_ptr<DelegatePluginInterface> CreateImpl(
+      const std::string& name, const TFLiteSettings& settings);
+  static DelegatePluginRegistry* GetSingleton();
+  std::unordered_map<std::string, CreatorFunction> factories_;
+  absl::Mutex mutex_;
+};
+
+}  // namespace delegates
+}  // namespace tflite
+
+#define TFLITE_REGISTER_DELEGATE_FACTORY_FUNCTION_VNAME(name, f) \
+  static auto* g_delegate_plugin_##name##_ =                     \
+      new DelegatePluginRegistry::Register(#name, f);
+#define TFLITE_REGISTER_DELEGATE_FACTORY_FUNCTION(name, f) \
+  TFLITE_REGISTER_DELEGATE_FACTORY_FUNCTION_VNAME(name, f);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_CONFIGURATION_DELEGATE_REGISTRY_H_
--- a/tensorflow/lite/experimental/acceleration/configuration/gpu_plugin.cc
+++ b/tensorflow/lite/experimental/acceleration/configuration/gpu_plugin.cc
@ -0,0 +1,62 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+
+#include "absl/memory/memory.h"
+#include "tensorflow/lite/delegates/gpu/delegate.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
+
+namespace tflite {
+namespace delegates {
+class GpuPlugin : public DelegatePluginInterface {
+ public:
+  TfLiteDelegatePtr Create() override {
+    return TfLiteDelegatePtr(TfLiteGpuDelegateV2Create(&options_),
+                             TfLiteGpuDelegateV2Delete);
+  }
+  int GetDelegateErrno(TfLiteDelegate* from_delegate) override { return 0; }
+  static std::unique_ptr<DelegatePluginInterface> New(
+      const TFLiteSettings& acceleration) {
+    return absl::make_unique<GpuPlugin>(acceleration);
+  }
+  explicit GpuPlugin(const TFLiteSettings& tflite_settings)
+      : options_(TfLiteGpuDelegateOptionsV2Default()) {
+    const auto* gpu_settings = tflite_settings.gpu_settings();
+    if (gpu_settings) {
+      options_.inference_priority1 =
+          gpu_settings->is_precision_loss_allowed()
+              ? TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY
+              : TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION;
+      if (gpu_settings->enable_quantized_inference()) {
+        options_.experimental_flags |=
+            TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT;
+      }
+      if (gpu_settings->force_backend() == GPUBackend_OPENCL) {
+        options_.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY;
+      } else if (gpu_settings->force_backend() == GPUBackend_OPENGL) {
+        options_.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY;
+      }
+    }
+  }
+
+ private:
+  TfLiteGpuDelegateOptionsV2 options_;
+};
+
+TFLITE_REGISTER_DELEGATE_FACTORY_FUNCTION(GpuPlugin, GpuPlugin::New);
+
+}  // namespace delegates
+}  // namespace tflite
--- a/tensorflow/lite/experimental/acceleration/configuration/hexagon_plugin.cc
+++ b/tensorflow/lite/experimental/acceleration/configuration/hexagon_plugin.cc
@ -0,0 +1,73 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+
+#include "absl/memory/memory.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
+
+#if defined(__ARM_ARCH)
+#include "tensorflow/lite/delegates/hexagon/hexagon_delegate.h"
+#endif
+
+namespace tflite {
+namespace delegates {
+class HexagonPlugin : public DelegatePluginInterface {
+ public:
+  TfLiteDelegatePtr Create() override {
+#if defined(__ARM_ARCH)
+    TfLiteHexagonInit();
+    auto* delegate_ptr = TfLiteHexagonDelegateCreate(&options_);
+    TfLiteDelegatePtr delegate(delegate_ptr, [](TfLiteDelegate* delegate) {
+      TfLiteHexagonDelegateDelete(delegate);
+      TfLiteHexagonTearDown();
+    });
+    return delegate;
+#else   // !defined(__ARM_ARCH)
+    return TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
+#endif  // defined(__ARM_ARCH)
+  }
+  int GetDelegateErrno(TfLiteDelegate* /* from_delegate */) override {
+    return 0;
+  }
+  static std::unique_ptr<HexagonPlugin> New(
+      const TFLiteSettings& tflite_settings) {
+    return absl::make_unique<HexagonPlugin>(tflite_settings);
+  }
+  explicit HexagonPlugin(const TFLiteSettings& tflite_settings) {
+    const HexagonSettings* settings = tflite_settings.hexagon_settings();
+#if defined(__ARM_ARCH)
+    options_ = TfLiteHexagonDelegateOptions({0});
+    if (settings) {
+      options_.debug_level = settings->debug_level();
+      options_.powersave_level = settings->powersave_level();
+      options_.print_graph_profile = settings->print_graph_profile();
+      options_.print_graph_debug = settings->print_graph_debug();
+    }
+#else
+    (void)settings;
+#endif
+  }
+
+ private:
+#if defined(__ARM_ARCH)
+  TfLiteHexagonDelegateOptions options_;
+#endif
+};
+
+TFLITE_REGISTER_DELEGATE_FACTORY_FUNCTION(HexagonPlugin, HexagonPlugin::New);
+
+}  // namespace delegates
+}  // namespace tflite
--- a/tensorflow/lite/experimental/acceleration/configuration/nnapi_plugin.cc
+++ b/tensorflow/lite/experimental/acceleration/configuration/nnapi_plugin.cc
@ -0,0 +1,93 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+
+#include "absl/memory/memory.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
+
+namespace tflite {
+namespace delegates {
+
+inline tflite::StatefulNnApiDelegate::Options::ExecutionPreference
+ConvertExecutionPrefence(
+    NNAPIExecutionPreference from_compatibility_preference) {
+  using TflitePreference =
+      tflite::StatefulNnApiDelegate::Options::ExecutionPreference;
+  switch (from_compatibility_preference) {
+    case NNAPIExecutionPreference_NNAPI_LOW_POWER:
+      return TflitePreference::kLowPower;
+    case NNAPIExecutionPreference_NNAPI_FAST_SINGLE_ANSWER:
+      return TflitePreference::kFastSingleAnswer;
+    case NNAPIExecutionPreference_NNAPI_SUSTAINED_SPEED:
+      return TflitePreference::kSustainedSpeed;
+    default:
+      return TflitePreference::kUndefined;
+  }
+}
+
+class NnapiPlugin : public DelegatePluginInterface {
+ public:
+  TfLiteDelegatePtr Create() override {
+    auto nnapi_delegate =
+        absl::make_unique<tflite::StatefulNnApiDelegate>(options_);
+    return TfLiteDelegatePtr(
+        nnapi_delegate.release(), [](TfLiteDelegate* delegate) {
+          delete reinterpret_cast<tflite::StatefulNnApiDelegate*>(delegate);
+        });
+  }
+  int GetDelegateErrno(TfLiteDelegate* from_delegate) override {
+    auto nnapi_delegate =
+        reinterpret_cast<tflite::StatefulNnApiDelegate*>(from_delegate);
+    return nnapi_delegate->GetNnApiErrno();
+  }
+  static std::unique_ptr<NnapiPlugin> New(
+      const TFLiteSettings& tflite_settings) {
+    return absl::make_unique<NnapiPlugin>(tflite_settings);
+  }
+  explicit NnapiPlugin(const TFLiteSettings& tflite_settings) {
+    const NNAPISettings* nnapi_settings = tflite_settings.nnapi_settings();
+    if (!nnapi_settings) return;
+    if (nnapi_settings->accelerator_name() &&
+        nnapi_settings->accelerator_name()->Length() != 0) {
+      accelerator_ = nnapi_settings->accelerator_name()->str();
+      options_.accelerator_name = accelerator_.c_str();
+    }
+    if (nnapi_settings->cache_directory() &&
+        nnapi_settings->cache_directory()->Length() != 0) {
+      cache_dir_ = nnapi_settings->cache_directory()->str();
+      options_.cache_dir = cache_dir_.c_str();
+    }
+    if (nnapi_settings->model_token() &&
+        nnapi_settings->model_token()->Length() != 0) {
+      model_token_ = nnapi_settings->model_token()->str();
+      options_.model_token = model_token_.c_str();
+    }
+    options_.execution_preference =
+        ConvertExecutionPrefence(nnapi_settings->execution_preference());
+    options_.disallow_nnapi_cpu =
+        !nnapi_settings->allow_nnapi_cpu_on_android_10_plus();
+  }
+
+ private:
+  std::string accelerator_, cache_dir_, model_token_;
+  tflite::StatefulNnApiDelegate::Options options_;
+};
+
+TFLITE_REGISTER_DELEGATE_FACTORY_FUNCTION(NnapiPlugin, NnapiPlugin::New);
+
+}  // namespace delegates
+}  // namespace tflite
--- a/tensorflow/lite/experimental/acceleration/configuration/nnapi_plugin_test.cc
+++ b/tensorflow/lite/experimental/acceleration/configuration/nnapi_plugin_test.cc
@ -0,0 +1,175 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/test_util.h"
+
+// Tests for checking that the NNAPI Delegate plugin correctly handles all the
+// options from the flatbuffer.
+//
+// Checking done at NNAPI call level, as that is where we have a mockable
+// layer.
+namespace tflite {
+namespace {
+
+using delegate::nnapi::NnApiMock;
+
+class SingleAddOpModel : tflite::SingleOpModel {
+ public:
+  void Build() {
+    int input = AddInput({tflite::TensorType_FLOAT32, {1, 2, 2}});
+    int constant = AddConstInput({tflite::TensorType_FLOAT32, {1, 2, 2}},
+                                 {1.0f, 1.0f, 1.0f, 1.0f});
+    AddOutput({tflite::TensorType_FLOAT32, {}});
+
+    SetBuiltinOp(tflite::BuiltinOperator_ADD, tflite::BuiltinOptions_AddOptions,
+                 tflite::CreateAddOptions(builder_).Union());
+    BuildInterpreter({GetShape(input), GetShape(constant)});
+  }
+
+  tflite::Interpreter* Interpreter() const { return interpreter_.get(); }
+};
+
+class NNAPIPluginTest : public ::testing::Test {
+ protected:
+  NNAPIPluginTest() : delegate_(nullptr, [](TfLiteDelegate*) {}) {}
+  void SetUp() override {
+    nnapi_ = const_cast<NnApi*>(NnApiImplementation());
+    nnapi_mock_ = absl::make_unique<NnApiMock>(nnapi_);
+    nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices =
+        [](const ANeuralNetworksModel* model,
+           const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
+           bool* supportedOps) -> int {
+      supportedOps[0] = true;
+      return 0;
+    };
+    model_.Build();
+  }
+  template <NNAPIExecutionPreference input, int output>
+  void CheckExecutionPreference() {
+    // Note - this uses a template since the NNAPI functions are C function
+    // pointers rather than lambdas so can't capture variables.
+    nnapi_->ANeuralNetworksCompilation_setPreference =
+        [](ANeuralNetworksCompilation* compilation, int32_t preference) {
+          return preference - output;
+        };
+    CreateDelegate(CreateNNAPISettings(fbb_, 0, 0, 0, input));
+    // Since delegation succeeds, the model becomes immutable and hence can't
+    // reuse it.
+    SingleAddOpModel model;
+    model.Build();
+    EXPECT_EQ(model.Interpreter()->ModifyGraphWithDelegate(delegate_.get()),
+              kTfLiteOk)
+        << " given input: " << input << " expected output: " << output;
+  }
+
+  void CreateDelegate(flatbuffers::Offset<NNAPISettings> settings) {
+    settings_ = flatbuffers::GetTemporaryPointer(
+        fbb_, CreateTFLiteSettings(fbb_, tflite::Delegate_NNAPI, settings));
+
+    plugin_ = delegates::DelegatePluginRegistry::CreateByName("NnapiPlugin",
+                                                              *settings_);
+    delegate_ = plugin_->Create();
+  }
+
+  NnApi* nnapi_;
+  std::unique_ptr<NnApiMock> nnapi_mock_;
+  SingleAddOpModel model_;
+  flatbuffers::FlatBufferBuilder fbb_;
+  const TFLiteSettings* settings_ = nullptr;
+  delegates::TfLiteDelegatePtr delegate_;
+  std::unique_ptr<delegates::DelegatePluginInterface> plugin_;
+};
+
+TEST_F(NNAPIPluginTest, PassesAcceleratorName) {
+  // Fails with non-existent "foo".
+  CreateDelegate(CreateNNAPISettings(fbb_, fbb_.CreateString("foo")));
+  EXPECT_EQ(model_.Interpreter()->ModifyGraphWithDelegate(delegate_.get()),
+            kTfLiteDelegateError);
+
+  // Succeeds with "test-device" supported by the mock.
+  CreateDelegate(CreateNNAPISettings(fbb_, fbb_.CreateString("test-device")));
+  EXPECT_EQ(model_.Interpreter()->ModifyGraphWithDelegate(delegate_.get()),
+            kTfLiteOk);
+}
+
+TEST_F(NNAPIPluginTest, PassesExecutionPreference) {
+  CheckExecutionPreference<NNAPIExecutionPreference_UNDEFINED,
+                           StatefulNnApiDelegate::Options::kUndefined>();
+  CheckExecutionPreference<NNAPIExecutionPreference_NNAPI_LOW_POWER,
+                           StatefulNnApiDelegate::Options::kLowPower>();
+  CheckExecutionPreference<NNAPIExecutionPreference_NNAPI_FAST_SINGLE_ANSWER,
+                           StatefulNnApiDelegate::Options::kFastSingleAnswer>();
+  CheckExecutionPreference<NNAPIExecutionPreference_NNAPI_SUSTAINED_SPEED,
+                           StatefulNnApiDelegate::Options::kSustainedSpeed>();
+}
+
+TEST_F(NNAPIPluginTest, PassesCachingParameters) {
+  nnapi_->ANeuralNetworksCompilation_setCaching =
+      [](ANeuralNetworksCompilation* compilation, const char* cacheDir,
+         const uint8_t* token) -> int {
+    if (std::string(cacheDir) != "d") return 1;
+    // Token is hashed with other bits, just check that it's not empty.
+    if (std::string(reinterpret_cast<const char*>(token)).empty()) return 2;
+    return 0;
+  };
+  CreateDelegate(CreateNNAPISettings(fbb_, 0, fbb_.CreateString("d"),
+                                     fbb_.CreateString("t")));
+  EXPECT_EQ(model_.Interpreter()->ModifyGraphWithDelegate(delegate_.get()),
+            kTfLiteOk);
+}
+
+TEST_F(NNAPIPluginTest, PassesFalseNNAPICpuFlag) {
+  CreateDelegate(CreateNNAPISettings(fbb_, 0, 0, 0,
+                                     NNAPIExecutionPreference_UNDEFINED, 0, 0,
+                                     /* allow CPU */ false));
+  nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices =
+      [](const ANeuralNetworksModel* model,
+         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
+         bool* supportedOps) -> int {
+    supportedOps[0] = true;
+    // Since no CPU, should only pass one device.
+    return numDevices - 1;
+  };
+  EXPECT_EQ(model_.Interpreter()->ModifyGraphWithDelegate(delegate_.get()),
+            kTfLiteOk);
+}
+
+TEST_F(NNAPIPluginTest, PassesTrueNNAPICpuFlag) {
+  CreateDelegate(CreateNNAPISettings(fbb_, 0, 0, 0,
+                                     NNAPIExecutionPreference_UNDEFINED, 0, 0,
+                                     /* allow CPU */ true));
+  nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices =
+      [](const ANeuralNetworksModel* model,
+         const ANeuralNetworksDevice* const* devices, uint32_t numDevices,
+         bool* supportedOps) -> int {
+    supportedOps[0] = true;
+    // With CPU allowed, should pass two devices.
+    return numDevices - 2;
+  };
+  EXPECT_EQ(model_.Interpreter()->ModifyGraphWithDelegate(delegate_.get()),
+            kTfLiteOk);
+}
+
+}  // namespace
+}  // namespace tflite
--- a/tensorflow/lite/experimental/acceleration/configuration/proto_to_flatbuffer.cc
+++ b/tensorflow/lite/experimental/acceleration/configuration/proto_to_flatbuffer.cc
@ -0,0 +1,58 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/experimental/acceleration/configuration/proto_to_flatbuffer.h"
+
+#include <string>
+
+#include "flatbuffers/idl.h"  // from @flatbuffers
+#include "flatbuffers/util.h"  // from @flatbuffers
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/lite/minimal_logging.h"
+
+namespace tflite {
+
+namespace {
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_fbs_contents-inl.h"
+}
+
+const ComputeSettings* ConvertFromProto(
+    flatbuffers::Parser* parser, const proto::ComputeSettings& proto_settings) {
+  std::string json;
+  tensorflow::protobuf::util::JsonPrintOptions options;
+  options.preserve_proto_field_names = true;
+  options.always_print_primitive_fields = true;  // For catching problems.
+  auto status = tensorflow::protobuf::util::MessageToJsonString(proto_settings,
+                                                                &json, options);
+  if (!status.ok()) {
+    TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "Failed to convert to Json: %s",
+                    status.ToString().c_str());
+    return nullptr;
+  }
+  if (!parser->Parse(configuration_fbs_contents)) {
+    TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "Failed to parse schema: %s",
+                    parser->error_.c_str());
+    return nullptr;
+  }
+  parser->SetRootType("tflite.ComputeSettings");
+  if (!parser->Parse(json.c_str())) {
+    TFLITE_LOG_PROD(TFLITE_LOG_ERROR, "Failed to parse json: %s",
+                    parser->error_.c_str());
+    return nullptr;
+  }
+  return flatbuffers::GetRoot<ComputeSettings>(
+      parser->builder_.GetBufferPointer());
+}
+
+}  // namespace tflite
--- a/tensorflow/lite/experimental/acceleration/configuration/proto_to_flatbuffer.h
+++ b/tensorflow/lite/experimental/acceleration/configuration/proto_to_flatbuffer.h
@ -0,0 +1,32 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_CONFIGURATION_PROTO_TO_FLATBUFFER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_CONFIGURATION_PROTO_TO_FLATBUFFER_H_
+
+#include "flatbuffers/idl.h"  // from @flatbuffers
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration.pb.h"
+#include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
+
+namespace tflite {
+
+// Converts the protobuf version ComputeSettings to the flatbuffer version, via
+// json. The parser is used for state - the returned pointer is valid only as
+// long as the parser is kept alive and unmutated.
+const ComputeSettings* ConvertFromProto(
+    flatbuffers::Parser* parser, const proto::ComputeSettings& proto_settings);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_CONFIGURATION_PROTO_TO_FLATBUFFER_H_