Support six new operators in XNNPACK delegate

- ABS
- CEIL
- FLOOR
- NEG
- ROUND
- SQUARE

PiperOrigin-RevId: 315750785
Change-Id: I382ad28b007d3364b74f50a77562e4758d6990dc
This commit is contained in:
Marat Dukhan 2020-06-10 13:01:21 -07:00 committed by TensorFlower Gardener
parent c03e49d2c2
commit ed57b30415
13 changed files with 1228 additions and 25 deletions

View File

@ -202,6 +202,21 @@ cc_library(
],
)
cc_test(
name = "abs_test",
srcs = ["abs_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "add_test",
srcs = ["add_test.cc"],
@ -232,6 +247,21 @@ cc_test(
],
)
cc_test(
name = "ceil_test",
srcs = ["ceil_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "conv_2d_test",
srcs = ["conv_2d_test.cc"],
@ -293,6 +323,21 @@ cc_test(
],
)
cc_test(
name = "floor_test",
srcs = ["floor_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "hard_swish_test",
srcs = ["hard_swish_test.cc"],
@ -308,6 +353,21 @@ cc_test(
],
)
cc_test(
name = "logistic_test",
srcs = ["logistic_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "max_pool_2d_test",
srcs = ["max_pool_2d_test.cc"],
@ -323,21 +383,6 @@ cc_test(
],
)
cc_test(
name = "logistic_test",
srcs = ["logistic_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "maximum_test",
srcs = ["maximum_test.cc"],
@ -383,6 +428,21 @@ cc_test(
],
)
cc_test(
name = "neg_test",
srcs = ["neg_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "pad_test",
srcs = ["pad_test.cc"],
@ -443,6 +503,21 @@ cc_test(
],
)
cc_test(
name = "round_test",
srcs = ["round_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "softmax_test",
srcs = ["softmax_test.cc"],
@ -458,6 +533,21 @@ cc_test(
],
)
cc_test(
name = "square_test",
srcs = ["square_test.cc"],
linkopts = select({
"//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
"//conditions:default": [],
}),
deps = [
":test_main",
":unary_elementwise_tester",
":xnnpack_delegate_test_mode",
"@com_google_googletest//:gtest",
],
)
cc_test(
name = "squared_difference_test",
srcs = ["squared_difference_test.cc"],

View File

@ -86,6 +86,10 @@ benefit from XNNPACK delegate.
Below is the list of current operators and limitations:
### `ABS`
* Inputs and outputs must be in 32-bit floating-point format.
### `ADD`
* Inputs and outputs must be in 32-bit floating-point format.
@ -100,6 +104,10 @@ Below is the list of current operators and limitations:
* Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
but fused `TANH` and `SIGN_BIT` activations are not.
### `CEIL`
* Inputs and outputs must be in 32-bit floating-point format.
### `CONV_2D`
* Inputs and outputs must be in 32-bit floating-point format.
@ -130,6 +138,10 @@ Below is the list of current operators and limitations:
* Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
but fused `TANH` and `SIGN_BIT` activations are not.
### `FLOOR`
* Inputs and outputs must be in 32-bit floating-point format.
### `HARD_SWISH`
* Inputs and outputs must be in 32-bit floating-point format.
@ -159,6 +171,10 @@ Below is the list of current operators and limitations:
* Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
but fused `TANH` and `SIGN_BIT` activations are not.
### `NEG`
* Inputs and outputs must be in 32-bit floating-point format.
### `PAD`
* The first input and the output must be in 32-bit floating-point format.
@ -185,11 +201,19 @@ Below is the list of current operators and limitations:
* Inputs and outputs must be in 32-bit floating-point format.
### `ROUND`
* Inputs and outputs must be in 32-bit floating-point format.
### `SOFTMAX`
* Inputs and outputs must be in 32-bit floating-point format.
* Only `beta = 1.0` is supported.
### `SQUARE`
* Inputs and outputs must be in 32-bit floating-point format.
### `SQUARED_DIFFERENCE`
* Inputs and outputs must be in 32-bit floating-point format.

View File

@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(Abs, 4D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_ABS, xnnpack_delegate.get());
}
TEST(Abs, 3D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, width, channels})
.Test(BuiltinOperator_ABS, xnnpack_delegate.get());
}
TEST(Abs, 2D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, channels})
.Test(BuiltinOperator_ABS, xnnpack_delegate.get());
}
TEST(Abs, 1D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_ABS,
xnnpack_delegate.get());
}
TEST(Abs, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_ABS, xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(Ceil, 4D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
}
TEST(Ceil, 3D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, width, channels})
.Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
}
TEST(Ceil, 2D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, channels})
.Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
}
TEST(Ceil, 1D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_CEIL,
xnnpack_delegate.get());
}
TEST(Ceil, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(Floor, 4D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
}
TEST(Floor, 3D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, width, channels})
.Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
}
TEST(Floor, 2D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, channels})
.Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
}
TEST(Floor, 1D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_FLOOR,
xnnpack_delegate.get());
}
TEST(Floor, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(Neg, 4D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_NEG, xnnpack_delegate.get());
}
TEST(Neg, 3D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, width, channels})
.Test(BuiltinOperator_NEG, xnnpack_delegate.get());
}
TEST(Neg, 2D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, channels})
.Test(BuiltinOperator_NEG, xnnpack_delegate.get());
}
TEST(Neg, 1D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_NEG,
xnnpack_delegate.get());
}
TEST(Neg, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_NEG, xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(Round, 4D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
}
TEST(Round, 3D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, width, channels})
.Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
}
TEST(Round, 2D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, channels})
.Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
}
TEST(Round, 1D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_ROUND,
xnnpack_delegate.get());
}
TEST(Round, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -0,0 +1,120 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <cstdint>
#include <functional>
#include <memory>
#include <random>
#include <gtest/gtest.h>
#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
namespace tflite {
namespace xnnpack {
TEST(Square, 4D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
}
TEST(Square, 3D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, width, channels})
.Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
}
TEST(Square, 2D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, channels})
.Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
}
TEST(Square, 1D) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_SQUARE,
xnnpack_delegate.get());
}
TEST(Square, MultiThreading) {
TfLiteXNNPackDelegateOptions delegate_options =
TfLiteXNNPackDelegateOptionsDefault();
delegate_options.num_threads = 2;
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
TfLiteXNNPackDelegateDelete);
std::random_device random_device;
auto rng = std::mt19937(random_device());
auto shape_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
const auto batch = shape_rng();
const auto height = shape_rng();
const auto width = shape_rng();
const auto channels = shape_rng();
UnaryElementwiseTester()
.Shape({batch, height, width, channels})
.Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
}
} // namespace xnnpack
} // namespace tflite

View File

@ -86,12 +86,29 @@ void UnaryElementwiseTester::Test(tflite::BuiltinOperator unary_op,
float* delegate_output_data = delegate_interpreter->typed_tensor<float>(
delegate_interpreter->outputs()[0]);
for (size_t i = 0; i < Size(); i++) {
ASSERT_NEAR(
default_output_data[i], delegate_output_data[i],
std::numeric_limits<float>::epsilon() *
std::max(std::abs(default_output_data[i]) * RelativeTolerance(),
1.0f));
switch (unary_op) {
case BuiltinOperator_ABS:
case BuiltinOperator_CEIL:
case BuiltinOperator_FLOOR:
case BuiltinOperator_NEG:
case BuiltinOperator_RELU:
case BuiltinOperator_RELU_N1_TO_1:
case BuiltinOperator_RELU6:
case BuiltinOperator_ROUND:
case BuiltinOperator_SQUARE:
for (size_t i = 0; i < Size(); i++) {
ASSERT_EQ(default_output_data[i], delegate_output_data[i]);
}
break;
default:
for (size_t i = 0; i < Size(); i++) {
ASSERT_NEAR(
default_output_data[i], delegate_output_data[i],
std::numeric_limits<float>::epsilon() *
std::max(std::abs(default_output_data[i]) * RelativeTolerance(),
1.0f));
}
break;
}
}

View File

@ -766,6 +766,9 @@ class Subgraph {
// supressed.
TfLiteContext* logging_context = subgraph == nullptr ? nullptr : context;
switch (registration->builtin_code) {
case kTfLiteBuiltinAbs:
return VisitAbsNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
case kTfLiteBuiltinAdd: {
const TfLiteAddParams* add_params =
static_cast<const TfLiteAddParams*>(node->builtin_data);
@ -781,6 +784,9 @@ class Subgraph {
node, context->tensors, pool_params,
xnnpack_tensors);
}
case kTfLiteBuiltinCeil:
return VisitCeilNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
case kTfLiteBuiltinConv2d: {
const TfLiteConvParams* conv_params =
static_cast<const TfLiteConvParams*>(node->builtin_data);
@ -812,6 +818,9 @@ class Subgraph {
node, context->tensors, fc_params,
quasi_static_tensors, xnnpack_tensors);
}
case kTfLiteBuiltinFloor:
return VisitFloorNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
case kTfLiteBuiltinHardSwish:
return VisitHardSwishNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
@ -839,6 +848,9 @@ class Subgraph {
return VisitMulNode(subgraph, logging_context, node_index, node,
context->tensors, mul_params, xnnpack_tensors);
}
case kTfLiteBuiltinNeg:
return VisitNegNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
case kTfLiteBuiltinPad:
return VisitPadNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
@ -856,6 +868,9 @@ class Subgraph {
case kTfLiteBuiltinRelu6:
return VisitReluNode(subgraph, logging_context, node_index, node,
context->tensors, 0.0f, 6.0f, xnnpack_tensors);
case kTfLiteBuiltinRound:
return VisitRoundNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
case kTfLiteBuiltinSoftmax: {
const TfLiteSoftmaxParams* softmax_params =
static_cast<const TfLiteSoftmaxParams*>(node->builtin_data);
@ -864,6 +879,9 @@ class Subgraph {
context->tensors, softmax_params,
xnnpack_tensors);
}
case kTfLiteBuiltinSquare:
return VisitSquareNode(subgraph, logging_context, node_index, node,
context->tensors, xnnpack_tensors);
case kTfLiteBuiltinSquaredDifference:
return VisitSquaredDifferenceNode(subgraph, logging_context, node_index,
node, context->tensors,
@ -910,6 +928,39 @@ class Subgraph {
}
}
static TfLiteStatus VisitAbsNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
const std::vector<uint32_t>& xnnpack_tensors) {
TF_LITE_ENSURE_STATUS(
CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, input_tensor, node->inputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, input_tensor, node->inputs->data[0], node_index));
const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, output_tensor, node->outputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_abs(
subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
if (status != xnn_status_success) {
TF_LITE_KERNEL_LOG(logging_context, "failed to delegate ABS node #%d",
node_index);
return kTfLiteError;
}
}
return kTfLiteOk;
}
static TfLiteStatus VisitAddNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
@ -1018,6 +1069,39 @@ class Subgraph {
return kTfLiteOk;
}
static TfLiteStatus VisitCeilNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
const std::vector<uint32_t>& xnnpack_tensors) {
TF_LITE_ENSURE_STATUS(
CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, input_tensor, node->inputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, input_tensor, node->inputs->data[0], node_index));
const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, output_tensor, node->outputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_ceiling(
subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
if (status != xnn_status_success) {
TF_LITE_KERNEL_LOG(logging_context, "failed to delegate CEIL node #%d",
node_index);
return kTfLiteError;
}
}
return kTfLiteOk;
}
static TfLiteStatus VisitConv2DNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
@ -1396,6 +1480,39 @@ class Subgraph {
return kTfLiteOk;
}
static TfLiteStatus VisitFloorNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
const std::vector<uint32_t>& xnnpack_tensors) {
TF_LITE_ENSURE_STATUS(
CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, input_tensor, node->inputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, input_tensor, node->inputs->data[0], node_index));
const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, output_tensor, node->outputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_floor(
subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
if (status != xnn_status_success) {
TF_LITE_KERNEL_LOG(logging_context, "failed to delegate FLOOR node #%d",
node_index);
return kTfLiteError;
}
}
return kTfLiteOk;
}
static TfLiteStatus VisitHardSwishNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
@ -1876,6 +1993,39 @@ class Subgraph {
return kTfLiteOk;
}
static TfLiteStatus VisitNegNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
const std::vector<uint32_t>& xnnpack_tensors) {
TF_LITE_ENSURE_STATUS(
CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, input_tensor, node->inputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, input_tensor, node->inputs->data[0], node_index));
const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, output_tensor, node->outputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_negate(
subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
if (status != xnn_status_success) {
TF_LITE_KERNEL_LOG(logging_context, "failed to delegate NEG node #%d",
node_index);
return kTfLiteError;
}
}
return kTfLiteOk;
}
static TfLiteStatus VisitPadNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
@ -2039,6 +2189,39 @@ class Subgraph {
return kTfLiteOk;
}
static TfLiteStatus VisitRoundNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
const std::vector<uint32_t>& xnnpack_tensors) {
TF_LITE_ENSURE_STATUS(
CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, input_tensor, node->inputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, input_tensor, node->inputs->data[0], node_index));
const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, output_tensor, node->outputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_bankers_rounding(
subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
if (status != xnn_status_success) {
TF_LITE_KERNEL_LOG(logging_context, "failed to delegate ROUND node #%d",
node_index);
return kTfLiteError;
}
}
return kTfLiteOk;
}
static TfLiteStatus VisitSoftmaxNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
@ -2082,6 +2265,39 @@ class Subgraph {
return kTfLiteOk;
}
static TfLiteStatus VisitSquareNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,
const std::vector<uint32_t>& xnnpack_tensors) {
TF_LITE_ENSURE_STATUS(
CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, input_tensor, node->inputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, input_tensor, node->inputs->data[0], node_index));
const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
logging_context, output_tensor, node->outputs->data[0], node_index));
TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
logging_context, output_tensor, node->outputs->data[0], node_index));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_square(
subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
if (status != xnn_status_success) {
TF_LITE_KERNEL_LOG(logging_context,
"failed to delegate SQUARE node #%d", node_index);
return kTfLiteError;
}
}
return kTfLiteOk;
}
static TfLiteStatus VisitSquaredDifferenceNode(
xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
TfLiteNode* node, const TfLiteTensor* tensors,

View File

@ -164,11 +164,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
tf_http_archive(
name = "XNNPACK",
sha256 = "30b468db7d85b5f4afb3fd60947d690bc1c29d4eccca8fffeabe5b5328621c0e",
strip_prefix = "XNNPACK-9d3a459441c272d82be14b579656b961066eba2c",
sha256 = "945c0e3c1c3a4cc58f99a5ccf3cdbcf710db99f2ef7f7821eb6be93e59e53534",
strip_prefix = "XNNPACK-5fab409eeff66b0cca5d0482c69b322f2fff6880",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/9d3a459441c272d82be14b579656b961066eba2c.zip",
"https://github.com/google/XNNPACK/archive/9d3a459441c272d82be14b579656b961066eba2c.zip",
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/5fab409eeff66b0cca5d0482c69b322f2fff6880.zip",
"https://github.com/google/XNNPACK/archive/5fab409eeff66b0cca5d0482c69b322f2fff6880.zip",
],
)

130
third_party/cpuinfo/cpuinfo.patch vendored Normal file
View File

@ -0,0 +1,130 @@
diff --git a/include/cpuinfo.h b/include/cpuinfo.h
index 6c67c34..85ce174 100644
--- a/include/cpuinfo.h
+++ b/include/cpuinfo.h
@@ -417,6 +417,8 @@ enum cpuinfo_uarch {
cpuinfo_uarch_cortex_a76 = 0x00300376,
/** ARM Cortex-A77. */
cpuinfo_uarch_cortex_a77 = 0x00300377,
+ /** ARM Cortex-A78. */
+ cpuinfo_uarch_cortex_a78 = 0x00300378,
/** ARM Neoverse N1. */
cpuinfo_uarch_neoverse_n1 = 0x00300400,
@@ -1434,6 +1436,7 @@ static inline bool cpuinfo_has_x86_sha(void) {
bool armv6k;
bool armv7;
bool armv7mp;
+ bool armv8;
bool idiv;
bool vfpv2;
@@ -1521,6 +1524,16 @@ static inline bool cpuinfo_has_arm_v7mp(void) {
#endif
}
+static inline bool cpuinfo_has_arm_v8(void) {
+ #if CPUINFO_ARCH_ARM64
+ return true;
+ #elif CPUINFO_ARCH_ARM
+ return cpuinfo_isa.armv8;
+ #else
+ return false;
+ #endif
+}
+
static inline bool cpuinfo_has_arm_idiv(void) {
#if CPUINFO_ARCH_ARM64
return true;
@@ -1645,6 +1658,16 @@ static inline bool cpuinfo_has_arm_neon_fma(void) {
#endif
}
+static inline bool cpuinfo_has_arm_neon_v8(void) {
+ #if CPUINFO_ARCH_ARM64
+ return true;
+ #elif CPUINFO_ARCH_ARM
+ return cpuinfo_isa.neon && cpuinfo_isa.armv8;
+ #else
+ return false;
+ #endif
+}
+
static inline bool cpuinfo_has_arm_atomics(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.atomics;
diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
index 64dd168..41f9972 100644
--- a/src/arm/linux/aarch32-isa.c
+++ b/src/arm/linux/aarch32-isa.c
@@ -43,6 +43,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
isa->armv6k = true;
isa->armv7 = true;
isa->armv7mp = true;
+ isa->armv8 = true;
isa->thumb = true;
isa->thumb2 = true;
isa->idiv = true;
diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
index 058cfc2..e912de6 100644
--- a/src/arm/mach/init.c
+++ b/src/arm/mach/init.c
@@ -307,6 +307,7 @@ void cpuinfo_arm_mach_init(void) {
case CPU_TYPE_ARM:
switch (cpu_subtype) {
case CPU_SUBTYPE_ARM_V8:
+ cpuinfo_isa.armv8 = true;
cpuinfo_isa.aes = true;
cpuinfo_isa.sha1 = true;
cpuinfo_isa.sha2 = true;
diff --git a/src/arm/midr.h b/src/arm/midr.h
index 34d7780..2638517 100644
--- a/src/arm/midr.h
+++ b/src/arm/midr.h
@@ -183,6 +183,7 @@ inline static uint32_t midr_score_core(uint32_t midr) {
case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
case UINT32_C(0x51002050): /* Kryo Gold */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+ case UINT32_C(0x4100D410): /* Cortex-A78 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index 55b61df..0d7a7d7 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -91,6 +91,9 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD0E: /* Cortex-A76AE */
*uarch = cpuinfo_uarch_cortex_a76;
break;
+ case 0xD41: /* Cortex-A78 */
+ *uarch = cpuinfo_uarch_cortex_a78;
+ break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xD4A:
*uarch = cpuinfo_uarch_neoverse_e1;
diff --git a/tools/cpu-info.c b/tools/cpu-info.c
index 2759068..429bbfa 100644
--- a/tools/cpu-info.c
+++ b/tools/cpu-info.c
@@ -183,6 +183,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Cortex-A76";
case cpuinfo_uarch_cortex_a77:
return "Cortex-A77";
+ case cpuinfo_uarch_cortex_a78:
+ return "Cortex-A78";
case cpuinfo_uarch_scorpion:
return "Scorpion";
case cpuinfo_uarch_krait:
diff --git a/tools/isa-info.c b/tools/isa-info.c
index 98ef919..8365846 100644
--- a/tools/isa-info.c
+++ b/tools/isa-info.c
@@ -121,6 +121,7 @@ int main(int argc, char** argv) {
printf("\tARMv6-K: %s\n", cpuinfo_has_arm_v6k() ? "yes" : "no");
printf("\tARMv7: %s\n", cpuinfo_has_arm_v7() ? "yes" : "no");
printf("\tARMv7 MP: %s\n", cpuinfo_has_arm_v7mp() ? "yes" : "no");
+ printf("\tARMv8: %s\n", cpuinfo_has_arm_v8() ? "yes" : "no");
printf("\tIDIV: %s\n", cpuinfo_has_arm_idiv() ? "yes" : "no");
printf("Floating-Point support:\n");

View File

@ -2,6 +2,11 @@
load("//third_party:repo.bzl", "third_party_http_archive")
# Sanitize a dependency so that it works correctly from code that includes
# TensorFlow as a submodule.
def clean_dep(dep):
return str(Label(dep))
def repo():
third_party_http_archive(
name = "cpuinfo",
@ -12,4 +17,5 @@ def repo():
"https://github.com/pytorch/cpuinfo/archive/6cecd15784fcb6c5c0aa7311c6248879ce2cb8b2.zip",
],
build_file = "//third_party/cpuinfo:BUILD.bazel",
patch_file = clean_dep("//third_party/cpuinfo:cpuinfo.patch"),
)