diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index a2d8b40bbce..fa9e62186fa 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -308,6 +308,18 @@ cc_library( ], ) +cc_library( + name = "tflite_with_xnnpack", + srcs = ["tflite_with_xnnpack.cc"], + copts = tflite_copts() + TFLITE_DEFAULT_COPTS, + linkstatic = True, + deps = [ + "//tensorflow/lite/c:common", + "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate", + ], + alwayslink = 1, +) + cc_test( name = "string_util_test", size = "small", @@ -435,6 +447,32 @@ tf_cc_test( ], ) +# Test model framework with the XNNPACK delegate. +cc_test( + name = "model_xnnpack_test", + size = "small", + srcs = [ + "model_xnnpack_test.cc", + ], + data = [ + "testdata/multi_add.bin", + ], + tags = [ + "no_windows", # No weak symbols with MSVC. + "tflite_not_portable_android", + "tflite_not_portable_ios", + ], + deps = [ + ":framework", + ":tflite_with_xnnpack", + ":util", + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + # Test OpResolver. cc_test( name = "mutable_op_resolver_test", diff --git a/tensorflow/lite/core/macros.h b/tensorflow/lite/core/macros.h index 5ff00e4814a..034ad8daac5 100644 --- a/tensorflow/lite/core/macros.h +++ b/tensorflow/lite/core/macros.h @@ -32,4 +32,23 @@ limitations under the License. #define TFLITE_EXPECT_TRUE(cond) (cond) #endif +// Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but +// we avoid the absl dependency for binary size reasons. +#ifdef __has_attribute +#define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +#define TFLITE_HAS_ATTRIBUTE(x) 0 +#endif + +#if (TFLITE_HAS_ATTRIBUTE(weak) || \ + (defined(__GNUC__) && !defined(__clang__))) && \ + !(defined(__llvm__) && defined(_WIN32)) && !defined(__MINGW32__) +#undef TFLITE_ATTRIBUTE_WEAK +#define TFLITE_ATTRIBUTE_WEAK __attribute__((weak)) +#define TFLITE_HAS_ATTRIBUTE_WEAK 1 +#else +#define TFLITE_ATTRIBUTE_WEAK +#define TFLITE_HAS_ATTRIBUTE_WEAK 0 +#endif + #endif // TENSORFLOW_LITE_CORE_MACROS_H_ diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc index ef8f5a8773a..5d7807cd291 100644 --- a/tensorflow/lite/interpreter_builder.cc +++ b/tensorflow/lite/interpreter_builder.cc @@ -97,23 +97,25 @@ TfLiteStatus ParseSparseIndexVector(const DimensionMetadata* src, const char* kEmptyTensorName = ""; -// Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but -// we avoid the absl dependency for binary size reasons. -#ifdef __has_attribute -#define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x) -#else -#define TFLITE_HAS_ATTRIBUTE(x) 0 -#endif +#if TFLITE_HAS_ATTRIBUTE_WEAK +// Using weak symbols to create a delegate allows automatic injection of the +// delegate simply by adding it as a dependency. -#if TFLITE_HAS_ATTRIBUTE(weak) || (defined(__GNUC__) && !defined(__clang__)) -// Using weak symbols for the flex delegate allows automatic injection of the -// delegate simply by adding it as a dependency. See also the strong override in +// For flex delegate, see also the strong override in // lite/delegates/flex/delegate.cc. -__attribute__((weak)) Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() { +TFLITE_ATTRIBUTE_WEAK Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() { + return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {}); +} + +// For XNNPACK delegate, see also the strong override in +// lite/enable_xnnpack_delegate.cc. +TFLITE_ATTRIBUTE_WEAK Interpreter::TfLiteDelegatePtr AcquireXNNPACKDelegate( + int num_threads) { return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {}); } #else Interpreter::TfLiteDelegatePtr (*AcquireFlexDelegate)() = nullptr; +Interpreter::TfLiteDelegatePtr (*AcquireXNNPACKDelegate)(int) = nullptr; #endif namespace impl { @@ -415,6 +417,7 @@ TfLiteStatus InterpreterBuilder::ParseTensors( return kEmptyTensorName; }; + num_fp32_tensors_ = 0; for (int i = 0; i < tensors->size(); ++i) { const auto* tensor = tensors->Get(i); std::vector dims = FlatBufferIntArrayToVector(tensor->shape()); @@ -425,6 +428,9 @@ TfLiteStatus InterpreterBuilder::ParseTensors( status = kTfLiteError; continue; } + if (type == kTfLiteFloat32) { + ++num_fp32_tensors_; + } auto get_readonly_data = [&](const char** buffer_data, size_t* buffer_size) { // TODO(aselle): Check what happens if we have an unspecified size @@ -507,12 +513,23 @@ TfLiteStatus InterpreterBuilder::ParseTensors( return status; } -TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter) { - // Apply Flex delegate if applicable. - if (!has_flex_op_ || AcquireFlexDelegate == nullptr) { - return kTfLiteOk; - } else if (auto flex_delegate = AcquireFlexDelegate()) { - return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate)); +TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter, + int num_threads) { + // First, apply XNNPACK delegate if applicable. + if (AcquireXNNPACKDelegate && num_fp32_tensors_ > 0) { + if (auto xnnpack_delegate = AcquireXNNPACKDelegate(num_threads)) { + // The execution will fall back to default implementation if the XNNPACK + // delegate fails to be applied. Therefore, we ignore the return status + // here and let it fall through the rest of the code. + interpreter->ModifyGraphWithDelegate(std::move(xnnpack_delegate)); + } + } + + // Secondly, apply Flex delegate if applicable. + if (has_flex_op_ && AcquireFlexDelegate) { + if (auto flex_delegate = AcquireFlexDelegate()) { + return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate)); + } } return kTfLiteOk; @@ -625,7 +642,7 @@ TfLiteStatus InterpreterBuilder::operator()( modified_subgraph->SetVariables(std::move(variables)); } - if (ApplyDelegates(interpreter->get()) != kTfLiteOk) + if (ApplyDelegates(interpreter->get(), num_threads) != kTfLiteOk) return cleanup_and_error(); return kTfLiteOk; diff --git a/tensorflow/lite/interpreter_builder.h b/tensorflow/lite/interpreter_builder.h index 1d150d6f1d4..1b8ae5a8e68 100644 --- a/tensorflow/lite/interpreter_builder.h +++ b/tensorflow/lite/interpreter_builder.h @@ -78,7 +78,7 @@ class InterpreterBuilder { const flatbuffers::Vector>* buffers, const flatbuffers::Vector>* tensors, Subgraph* subgraph); - TfLiteStatus ApplyDelegates(Interpreter* interpreter); + TfLiteStatus ApplyDelegates(Interpreter* interpreter, int num_threads); TfLiteStatus ParseQuantization(const QuantizationParameters* src_quantization, TfLiteQuantization* quantization, const std::vector& dims); @@ -95,6 +95,7 @@ class InterpreterBuilder { const Allocation* allocation_ = nullptr; bool has_flex_op_ = false; + int num_fp32_tensors_ = 0; }; } // namespace impl diff --git a/tensorflow/lite/model_xnnpack_test.cc b/tensorflow/lite/model_xnnpack_test.cc new file mode 100644 index 00000000000..9c06147f602 --- /dev/null +++ b/tensorflow/lite/model_xnnpack_test.cc @@ -0,0 +1,59 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/model.h" + +#include + +#include +#include "tensorflow/lite/core/macros.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/testing/util.h" +#include "tensorflow/lite/util.h" + +namespace tflite { + +TEST(FloatModel, WithXnnpackDelegate) { + // Note: this graph will be fully delegated by the XNNPACK delegate. + auto model = FlatBufferModel::BuildFromFile( + "tensorflow/lite/testdata/multi_add.bin"); + ASSERT_TRUE(model); + + std::unique_ptr interpreter; + ASSERT_EQ(InterpreterBuilder(*model, + ops::builtin::BuiltinOpResolver{})(&interpreter), + kTfLiteOk); + ASSERT_TRUE(interpreter); + + ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk); + +#if TFLITE_HAS_ATTRIBUTE_WEAK + // As the graph is fully delegated by XNNPACK delegate, we will expect the + // following: + EXPECT_EQ(1, interpreter->execution_plan().size()); + int first_node_id = interpreter->execution_plan()[0]; + const auto& first_node_reg = + interpreter->node_and_registration(first_node_id)->second; + const std::string op_name = GetOpNameByRegistration(first_node_reg); + EXPECT_EQ("DELEGATE TfLiteXNNPackDelegate", op_name); +#endif +} + +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/lite/tflite_with_xnnpack.cc b/tensorflow/lite/tflite_with_xnnpack.cc new file mode 100644 index 00000000000..c8c2c2e02c1 --- /dev/null +++ b/tensorflow/lite/tflite_with_xnnpack.cc @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" + +namespace tflite { +// Corresponding weak declaration found in lite/model.cc. +std::unique_ptr +AcquireXNNPACKDelegate(int num_threads) { + auto opts = TfLiteXNNPackDelegateOptionsDefault(); + // Note that we don't want to use the thread pool for num_threads == 1. + opts.num_threads = num_threads > 1 ? num_threads : 0; + return std::unique_ptr( + TfLiteXNNPackDelegateCreate(&opts), TfLiteXNNPackDelegateDelete); +} +} // namespace tflite