Utilize weak symbol to apply XNNPACK delegate by default in TFLite.

PiperOrigin-RevId: 303043141 Change-Id: Icf30aac6717ecbec8efb9f52f9f18f31e3cd947e
2020-03-25 22:27:38 -07:00 · 2020-03-25 22:27:38 -07:00 · 47140aa69d
commit 47140aa69d
parent 268f9834b6
6 changed files with 183 additions and 19 deletions
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@ -308,6 +308,18 @@ cc_library(
    ],
 )
 cc_library(
    name = "tflite_with_xnnpack",
    srcs = ["tflite_with_xnnpack.cc"],
    copts = tflite_copts() + TFLITE_DEFAULT_COPTS,
    linkstatic = True,
    deps = [
        "//tensorflow/lite/c:common",
        "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
    ],
    alwayslink = 1,
 )
 cc_test(
    name = "string_util_test",
    size = "small",
@ -435,6 +447,32 @@ tf_cc_test(
    ],
 )
 # Test model framework with the XNNPACK delegate.
 cc_test(
    name = "model_xnnpack_test",
    size = "small",
    srcs = [
        "model_xnnpack_test.cc",
    ],
    data = [
        "testdata/multi_add.bin",
    ],
    tags = [
        "no_windows",  # No weak symbols with MSVC.
        "tflite_not_portable_android",
        "tflite_not_portable_ios",
    ],
    deps = [
        ":framework",
        ":tflite_with_xnnpack",
        ":util",
        "//tensorflow/lite/c:common",
        "//tensorflow/lite/kernels:builtin_ops",
        "//tensorflow/lite/testing:util",
        "@com_google_googletest//:gtest",
    ],
 )
 # Test OpResolver.
 cc_test(
    name = "mutable_op_resolver_test",
--- a/tensorflow/lite/core/macros.h
+++ b/tensorflow/lite/core/macros.h
@ -32,4 +32,23 @@ limitations under the License.
 #define TFLITE_EXPECT_TRUE(cond) (cond)
 #endif
 // Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but
 // we avoid the absl dependency for binary size reasons.
 #ifdef __has_attribute
 #define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x)
 #else
 #define TFLITE_HAS_ATTRIBUTE(x) 0
 #endif
 #if (TFLITE_HAS_ATTRIBUTE(weak) ||                  \
     (defined(__GNUC__) && !defined(__clang__))) && \
    !(defined(__llvm__) && defined(_WIN32)) && !defined(__MINGW32__)
 #undef TFLITE_ATTRIBUTE_WEAK
 #define TFLITE_ATTRIBUTE_WEAK __attribute__((weak))
 #define TFLITE_HAS_ATTRIBUTE_WEAK 1
 #else
 #define TFLITE_ATTRIBUTE_WEAK
 #define TFLITE_HAS_ATTRIBUTE_WEAK 0
 #endif
 #endif  // TENSORFLOW_LITE_CORE_MACROS_H_
--- a/tensorflow/lite/interpreter_builder.cc
+++ b/tensorflow/lite/interpreter_builder.cc
@ -97,23 +97,25 @@ TfLiteStatus ParseSparseIndexVector(const DimensionMetadata* src,
 const char* kEmptyTensorName = "";
-// Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but
+#if TFLITE_HAS_ATTRIBUTE_WEAK
-// we avoid the absl dependency for binary size reasons.
+// Using weak symbols to create a delegate allows automatic injection of the
-#ifdef __has_attribute
+// delegate simply by adding it as a dependency.
 #define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x)
 #else
 #define TFLITE_HAS_ATTRIBUTE(x) 0
 #endif
-#if TFLITE_HAS_ATTRIBUTE(weak) || (defined(__GNUC__) && !defined(__clang__))
+// For flex delegate, see also the strong override in
 // Using weak symbols for the flex delegate allows automatic injection of the
 // delegate simply by adding it as a dependency. See also the strong override in
 // lite/delegates/flex/delegate.cc.
-__attribute__((weak)) Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() {
+TFLITE_ATTRIBUTE_WEAK Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() {
  return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
 }
 // For XNNPACK delegate, see also the strong override in
 // lite/enable_xnnpack_delegate.cc.
 TFLITE_ATTRIBUTE_WEAK Interpreter::TfLiteDelegatePtr AcquireXNNPACKDelegate(
    int num_threads) {
  return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
 }
 #else
 Interpreter::TfLiteDelegatePtr (*AcquireFlexDelegate)() = nullptr;
 Interpreter::TfLiteDelegatePtr (*AcquireXNNPACKDelegate)(int) = nullptr;
 #endif
 namespace impl {
@ -415,6 +417,7 @@ TfLiteStatus InterpreterBuilder::ParseTensors(
    return kEmptyTensorName;
  };
  num_fp32_tensors_ = 0;
  for (int i = 0; i < tensors->size(); ++i) {
    const auto* tensor = tensors->Get(i);
    std::vector<int> dims = FlatBufferIntArrayToVector(tensor->shape());
@ -425,6 +428,9 @@ TfLiteStatus InterpreterBuilder::ParseTensors(
      status = kTfLiteError;
      continue;
    }
    if (type == kTfLiteFloat32) {
      ++num_fp32_tensors_;
    }
    auto get_readonly_data = [&](const char** buffer_data,
                                 size_t* buffer_size) {
      // TODO(aselle): Check what happens if we have an unspecified size
@ -507,12 +513,23 @@ TfLiteStatus InterpreterBuilder::ParseTensors(
  return status;
 }
-TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter) {
+TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter,
-  // Apply Flex delegate if applicable.
+                                                int num_threads) {
-  if (!has_flex_op_ || AcquireFlexDelegate == nullptr) {
+  // First, apply XNNPACK delegate if applicable.
-    return kTfLiteOk;
+  if (AcquireXNNPACKDelegate && num_fp32_tensors_ > 0) {
-  } else if (auto flex_delegate = AcquireFlexDelegate()) {
+    if (auto xnnpack_delegate = AcquireXNNPACKDelegate(num_threads)) {
-    return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate));
+      // The execution will fall back to default implementation if the XNNPACK
      // delegate fails to be applied. Therefore, we ignore the return status
      // here and let it fall through the rest of the code.
      interpreter->ModifyGraphWithDelegate(std::move(xnnpack_delegate));
    }
  }
  // Secondly, apply Flex delegate if applicable.
  if (has_flex_op_ && AcquireFlexDelegate) {
    if (auto flex_delegate = AcquireFlexDelegate()) {
      return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate));
    }
  }
  return kTfLiteOk;
@ -625,7 +642,7 @@ TfLiteStatus InterpreterBuilder::operator()(
    modified_subgraph->SetVariables(std::move(variables));
  }
-  if (ApplyDelegates(interpreter->get()) != kTfLiteOk)
+  if (ApplyDelegates(interpreter->get(), num_threads) != kTfLiteOk)
    return cleanup_and_error();
  return kTfLiteOk;
--- a/tensorflow/lite/interpreter_builder.h
+++ b/tensorflow/lite/interpreter_builder.h
@ -78,7 +78,7 @@ class InterpreterBuilder {
      const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
      const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors,
      Subgraph* subgraph);
-  TfLiteStatus ApplyDelegates(Interpreter* interpreter);
+  TfLiteStatus ApplyDelegates(Interpreter* interpreter, int num_threads);
  TfLiteStatus ParseQuantization(const QuantizationParameters* src_quantization,
                                 TfLiteQuantization* quantization,
                                 const std::vector<int>& dims);
@ -95,6 +95,7 @@ class InterpreterBuilder {
  const Allocation* allocation_ = nullptr;
  bool has_flex_op_ = false;
  int num_fp32_tensors_ = 0;
 };
 }  // namespace impl
--- a/tensorflow/lite/model_xnnpack_test.cc
+++ b/tensorflow/lite/model_xnnpack_test.cc
@ -0,0 +1,59 @@
 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/model.h"
 #include <string>
 #include <gtest/gtest.h>
 #include "tensorflow/lite/core/macros.h"
 #include "tensorflow/lite/kernels/register.h"
 #include "tensorflow/lite/testing/util.h"
 #include "tensorflow/lite/util.h"
 namespace tflite {
 TEST(FloatModel, WithXnnpackDelegate) {
  // Note: this graph will be fully delegated by the XNNPACK delegate.
  auto model = FlatBufferModel::BuildFromFile(
      "tensorflow/lite/testdata/multi_add.bin");
  ASSERT_TRUE(model);
  std::unique_ptr<Interpreter> interpreter;
  ASSERT_EQ(InterpreterBuilder(*model,
                               ops::builtin::BuiltinOpResolver{})(&interpreter),
            kTfLiteOk);
  ASSERT_TRUE(interpreter);
  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
 #if TFLITE_HAS_ATTRIBUTE_WEAK
  // As the graph is fully delegated by XNNPACK delegate, we will expect the
  // following:
  EXPECT_EQ(1, interpreter->execution_plan().size());
  int first_node_id = interpreter->execution_plan()[0];
  const auto& first_node_reg =
      interpreter->node_and_registration(first_node_id)->second;
  const std::string op_name = GetOpNameByRegistration(first_node_reg);
  EXPECT_EQ("DELEGATE TfLiteXNNPackDelegate", op_name);
 #endif
 }
 }  // namespace tflite
 int main(int argc, char** argv) {
  ::tflite::LogToStderr();
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
 }
--- a/tensorflow/lite/tflite_with_xnnpack.cc
+++ b/tensorflow/lite/tflite_with_xnnpack.cc
@ -0,0 +1,30 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <memory>
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
 namespace tflite {
 // Corresponding weak declaration found in lite/model.cc.
 std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>
 AcquireXNNPACKDelegate(int num_threads) {
  auto opts = TfLiteXNNPackDelegateOptionsDefault();
  // Note that we don't want to use the thread pool for num_threads == 1.
  opts.num_threads = num_threads > 1 ? num_threads : 0;
  return std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
      TfLiteXNNPackDelegateCreate(&opts), TfLiteXNNPackDelegateDelete);
 }
 }  // namespace tflite