diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index e2679580e69..9928011d8b7 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -202,6 +202,21 @@ cc_library(
     ],
 )
 
+cc_test(
+    name = "abs_test",
+    srcs = ["abs_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "add_test",
     srcs = ["add_test.cc"],
@@ -232,6 +247,21 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "ceil_test",
+    srcs = ["ceil_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "conv_2d_test",
     srcs = ["conv_2d_test.cc"],
@@ -293,6 +323,21 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "floor_test",
+    srcs = ["floor_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "hard_swish_test",
     srcs = ["hard_swish_test.cc"],
@@ -308,6 +353,21 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "logistic_test",
+    srcs = ["logistic_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "max_pool_2d_test",
     srcs = ["max_pool_2d_test.cc"],
@@ -323,21 +383,6 @@ cc_test(
     ],
 )
 
-cc_test(
-    name = "logistic_test",
-    srcs = ["logistic_test.cc"],
-    linkopts = select({
-        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
-        "//conditions:default": [],
-    }),
-    deps = [
-        ":test_main",
-        ":unary_elementwise_tester",
-        ":xnnpack_delegate_test_mode",
-        "@com_google_googletest//:gtest",
-    ],
-)
-
 cc_test(
     name = "maximum_test",
     srcs = ["maximum_test.cc"],
@@ -383,6 +428,21 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "neg_test",
+    srcs = ["neg_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "pad_test",
     srcs = ["pad_test.cc"],
@@ -443,6 +503,21 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "round_test",
+    srcs = ["round_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "softmax_test",
     srcs = ["softmax_test.cc"],
@@ -458,6 +533,21 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "square_test",
+    srcs = ["square_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":test_main",
+        ":unary_elementwise_tester",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "squared_difference_test",
     srcs = ["squared_difference_test.cc"],
diff --git a/tensorflow/lite/delegates/xnnpack/README.md b/tensorflow/lite/delegates/xnnpack/README.md
index c50ea60a88e..a77eabc4870 100644
--- a/tensorflow/lite/delegates/xnnpack/README.md
+++ b/tensorflow/lite/delegates/xnnpack/README.md
@@ -86,6 +86,10 @@ benefit from XNNPACK delegate.
 
 Below is the list of current operators and limitations:
 
+### `ABS`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+
 ### `ADD`
 
 * Inputs and outputs must be in 32-bit floating-point format.
@@ -100,6 +104,10 @@ Below is the list of current operators and limitations:
 * Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
   but fused `TANH` and `SIGN_BIT` activations are not.
 
+### `CEIL`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+
 ### `CONV_2D`
 
 * Inputs and outputs must be in 32-bit floating-point format.
@@ -130,6 +138,10 @@ Below is the list of current operators and limitations:
 * Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
   but fused `TANH` and `SIGN_BIT` activations are not.
 
+### `FLOOR`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+
 ### `HARD_SWISH`
 
 * Inputs and outputs must be in 32-bit floating-point format.
@@ -159,6 +171,10 @@ Below is the list of current operators and limitations:
 * Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported,
   but fused `TANH` and `SIGN_BIT` activations are not.
 
+### `NEG`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+
 ### `PAD`
 
 * The first input and the output must be in 32-bit floating-point format.
@@ -185,11 +201,19 @@ Below is the list of current operators and limitations:
 
 * Inputs and outputs must be in 32-bit floating-point format.
 
+### `ROUND`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+
 ### `SOFTMAX`
 
 * Inputs and outputs must be in 32-bit floating-point format.
 * Only `beta = 1.0` is supported.
 
+### `SQUARE`
+
+* Inputs and outputs must be in 32-bit floating-point format.
+
 ### `SQUARED_DIFFERENCE`
 
 * Inputs and outputs must be in 32-bit floating-point format.
diff --git a/tensorflow/lite/delegates/xnnpack/abs_test.cc b/tensorflow/lite/delegates/xnnpack/abs_test.cc
new file mode 100644
index 00000000000..b0ccb6f5ddd
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/abs_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(Abs, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_ABS, xnnpack_delegate.get());
+}
+
+TEST(Abs, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, width, channels})
+      .Test(BuiltinOperator_ABS, xnnpack_delegate.get());
+}
+
+TEST(Abs, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, channels})
+      .Test(BuiltinOperator_ABS, xnnpack_delegate.get());
+}
+
+TEST(Abs, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+
+  UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_ABS,
+                                               xnnpack_delegate.get());
+}
+
+TEST(Abs, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_ABS, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/ceil_test.cc b/tensorflow/lite/delegates/xnnpack/ceil_test.cc
new file mode 100644
index 00000000000..5043b4e7d62
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/ceil_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(Ceil, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
+}
+
+TEST(Ceil, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, width, channels})
+      .Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
+}
+
+TEST(Ceil, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, channels})
+      .Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
+}
+
+TEST(Ceil, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+
+  UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_CEIL,
+                                               xnnpack_delegate.get());
+}
+
+TEST(Ceil, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_CEIL, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/floor_test.cc b/tensorflow/lite/delegates/xnnpack/floor_test.cc
new file mode 100644
index 00000000000..5b3b2cad3f6
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/floor_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(Floor, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
+}
+
+TEST(Floor, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, width, channels})
+      .Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
+}
+
+TEST(Floor, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, channels})
+      .Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
+}
+
+TEST(Floor, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+
+  UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_FLOOR,
+                                               xnnpack_delegate.get());
+}
+
+TEST(Floor, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_FLOOR, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/neg_test.cc b/tensorflow/lite/delegates/xnnpack/neg_test.cc
new file mode 100644
index 00000000000..876dceec87b
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/neg_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(Neg, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_NEG, xnnpack_delegate.get());
+}
+
+TEST(Neg, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, width, channels})
+      .Test(BuiltinOperator_NEG, xnnpack_delegate.get());
+}
+
+TEST(Neg, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, channels})
+      .Test(BuiltinOperator_NEG, xnnpack_delegate.get());
+}
+
+TEST(Neg, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+
+  UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_NEG,
+                                               xnnpack_delegate.get());
+}
+
+TEST(Neg, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_NEG, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/round_test.cc b/tensorflow/lite/delegates/xnnpack/round_test.cc
new file mode 100644
index 00000000000..0481762ca19
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/round_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(Round, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
+}
+
+TEST(Round, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, width, channels})
+      .Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
+}
+
+TEST(Round, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, channels})
+      .Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
+}
+
+TEST(Round, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+
+  UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_ROUND,
+                                               xnnpack_delegate.get());
+}
+
+TEST(Round, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_ROUND, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/square_test.cc b/tensorflow/lite/delegates/xnnpack/square_test.cc
new file mode 100644
index 00000000000..4501cc4e6c2
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/square_test.cc
@@ -0,0 +1,120 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(Square, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
+}
+
+TEST(Square, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, width, channels})
+      .Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
+}
+
+TEST(Square, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, channels})
+      .Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
+}
+
+TEST(Square, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+
+  UnaryElementwiseTester().Shape({batch}).Test(BuiltinOperator_SQUARE,
+                                               xnnpack_delegate.get());
+}
+
+TEST(Square, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto channels = shape_rng();
+
+  UnaryElementwiseTester()
+      .Shape({batch, height, width, channels})
+      .Test(BuiltinOperator_SQUARE, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.cc b/tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.cc
index ce787d757bc..84ad00ebf48 100644
--- a/tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.cc
+++ b/tensorflow/lite/delegates/xnnpack/unary_elementwise_tester.cc
@@ -86,12 +86,29 @@ void UnaryElementwiseTester::Test(tflite::BuiltinOperator unary_op,
   float* delegate_output_data = delegate_interpreter->typed_tensor<float>(
       delegate_interpreter->outputs()[0]);
 
-  for (size_t i = 0; i < Size(); i++) {
-    ASSERT_NEAR(
-        default_output_data[i], delegate_output_data[i],
-        std::numeric_limits<float>::epsilon() *
-            std::max(std::abs(default_output_data[i]) * RelativeTolerance(),
-                     1.0f));
+  switch (unary_op) {
+    case BuiltinOperator_ABS:
+    case BuiltinOperator_CEIL:
+    case BuiltinOperator_FLOOR:
+    case BuiltinOperator_NEG:
+    case BuiltinOperator_RELU:
+    case BuiltinOperator_RELU_N1_TO_1:
+    case BuiltinOperator_RELU6:
+    case BuiltinOperator_ROUND:
+    case BuiltinOperator_SQUARE:
+      for (size_t i = 0; i < Size(); i++) {
+        ASSERT_EQ(default_output_data[i], delegate_output_data[i]);
+      }
+      break;
+    default:
+      for (size_t i = 0; i < Size(); i++) {
+        ASSERT_NEAR(
+            default_output_data[i], delegate_output_data[i],
+            std::numeric_limits<float>::epsilon() *
+                std::max(std::abs(default_output_data[i]) * RelativeTolerance(),
+                         1.0f));
+      }
+      break;
   }
 }
 
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
index fe39ac24d52..11d9ec3d4fc 100644
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -766,6 +766,9 @@ class Subgraph {
     // supressed.
     TfLiteContext* logging_context = subgraph == nullptr ? nullptr : context;
     switch (registration->builtin_code) {
+      case kTfLiteBuiltinAbs:
+        return VisitAbsNode(subgraph, logging_context, node_index, node,
+                            context->tensors, xnnpack_tensors);
       case kTfLiteBuiltinAdd: {
         const TfLiteAddParams* add_params =
             static_cast<const TfLiteAddParams*>(node->builtin_data);
@@ -781,6 +784,9 @@ class Subgraph {
                                       node, context->tensors, pool_params,
                                       xnnpack_tensors);
       }
+      case kTfLiteBuiltinCeil:
+        return VisitCeilNode(subgraph, logging_context, node_index, node,
+                             context->tensors, xnnpack_tensors);
       case kTfLiteBuiltinConv2d: {
         const TfLiteConvParams* conv_params =
             static_cast<const TfLiteConvParams*>(node->builtin_data);
@@ -812,6 +818,9 @@ class Subgraph {
                                        node, context->tensors, fc_params,
                                        quasi_static_tensors, xnnpack_tensors);
       }
+      case kTfLiteBuiltinFloor:
+        return VisitFloorNode(subgraph, logging_context, node_index, node,
+                              context->tensors, xnnpack_tensors);
       case kTfLiteBuiltinHardSwish:
         return VisitHardSwishNode(subgraph, logging_context, node_index, node,
                                   context->tensors, xnnpack_tensors);
@@ -839,6 +848,9 @@ class Subgraph {
         return VisitMulNode(subgraph, logging_context, node_index, node,
                             context->tensors, mul_params, xnnpack_tensors);
       }
+      case kTfLiteBuiltinNeg:
+        return VisitNegNode(subgraph, logging_context, node_index, node,
+                            context->tensors, xnnpack_tensors);
       case kTfLiteBuiltinPad:
         return VisitPadNode(subgraph, logging_context, node_index, node,
                             context->tensors, xnnpack_tensors);
@@ -856,6 +868,9 @@ class Subgraph {
       case kTfLiteBuiltinRelu6:
         return VisitReluNode(subgraph, logging_context, node_index, node,
                              context->tensors, 0.0f, 6.0f, xnnpack_tensors);
+      case kTfLiteBuiltinRound:
+        return VisitRoundNode(subgraph, logging_context, node_index, node,
+                              context->tensors, xnnpack_tensors);
       case kTfLiteBuiltinSoftmax: {
         const TfLiteSoftmaxParams* softmax_params =
             static_cast<const TfLiteSoftmaxParams*>(node->builtin_data);
@@ -864,6 +879,9 @@ class Subgraph {
                                 context->tensors, softmax_params,
                                 xnnpack_tensors);
       }
+      case kTfLiteBuiltinSquare:
+        return VisitSquareNode(subgraph, logging_context, node_index, node,
+                               context->tensors, xnnpack_tensors);
       case kTfLiteBuiltinSquaredDifference:
         return VisitSquaredDifferenceNode(subgraph, logging_context, node_index,
                                           node, context->tensors,
@@ -910,6 +928,39 @@ class Subgraph {
     }
   }
 
+  static TfLiteStatus VisitAbsNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_abs(
+          subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context, "failed to delegate ABS node #%d",
+                           node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitAddNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
@@ -1018,6 +1069,39 @@ class Subgraph {
     return kTfLiteOk;
   }
 
+  static TfLiteStatus VisitCeilNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_ceiling(
+          subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context, "failed to delegate CEIL node #%d",
+                           node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitConv2DNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
@@ -1396,6 +1480,39 @@ class Subgraph {
     return kTfLiteOk;
   }
 
+  static TfLiteStatus VisitFloorNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_floor(
+          subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context, "failed to delegate FLOOR node #%d",
+                           node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitHardSwishNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
@@ -1876,6 +1993,39 @@ class Subgraph {
     return kTfLiteOk;
   }
 
+  static TfLiteStatus VisitNegNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_negate(
+          subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context, "failed to delegate NEG node #%d",
+                           node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitPadNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
@@ -2039,6 +2189,39 @@ class Subgraph {
     return kTfLiteOk;
   }
 
+  static TfLiteStatus VisitRoundNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_bankers_rounding(
+          subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context, "failed to delegate ROUND node #%d",
+                           node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitSoftmaxNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
@@ -2082,6 +2265,39 @@ class Subgraph {
     return kTfLiteOk;
   }
 
+  static TfLiteStatus VisitSquareNode(
+      xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
+      TfLiteNode* node, const TfLiteTensor* tensors,
+      const std::vector<uint32_t>& xnnpack_tensors) {
+    TF_LITE_ENSURE_STATUS(
+        CheckNumInputsAndOutputs(logging_context, node, 1, 1, node_index));
+
+    const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, input_tensor, node->inputs->data[0], node_index));
+
+    const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]];
+    TF_LITE_ENSURE_STATUS(CheckTensorFloatType(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+    TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation(
+        logging_context, output_tensor, node->outputs->data[0], node_index));
+
+    if (subgraph != nullptr) {
+      const xnn_status status = xnn_define_square(
+          subgraph, /*input_id=*/xnnpack_tensors[node->inputs->data[0]],
+          /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0);
+      if (status != xnn_status_success) {
+        TF_LITE_KERNEL_LOG(logging_context,
+                           "failed to delegate SQUARE node #%d", node_index);
+        return kTfLiteError;
+      }
+    }
+
+    return kTfLiteOk;
+  }
+
   static TfLiteStatus VisitSquaredDifferenceNode(
       xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index,
       TfLiteNode* node, const TfLiteTensor* tensors,
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 9df2cf1a974..db684e55240 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -164,11 +164,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "XNNPACK",
-        sha256 = "30b468db7d85b5f4afb3fd60947d690bc1c29d4eccca8fffeabe5b5328621c0e",
-        strip_prefix = "XNNPACK-9d3a459441c272d82be14b579656b961066eba2c",
+        sha256 = "945c0e3c1c3a4cc58f99a5ccf3cdbcf710db99f2ef7f7821eb6be93e59e53534",
+        strip_prefix = "XNNPACK-5fab409eeff66b0cca5d0482c69b322f2fff6880",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/9d3a459441c272d82be14b579656b961066eba2c.zip",
-            "https://github.com/google/XNNPACK/archive/9d3a459441c272d82be14b579656b961066eba2c.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/5fab409eeff66b0cca5d0482c69b322f2fff6880.zip",
+            "https://github.com/google/XNNPACK/archive/5fab409eeff66b0cca5d0482c69b322f2fff6880.zip",
         ],
     )
 
diff --git a/third_party/cpuinfo/cpuinfo.patch b/third_party/cpuinfo/cpuinfo.patch
new file mode 100644
index 00000000000..d9a16c559f3
--- /dev/null
+++ b/third_party/cpuinfo/cpuinfo.patch
@@ -0,0 +1,130 @@
+diff --git a/include/cpuinfo.h b/include/cpuinfo.h
+index 6c67c34..85ce174 100644
+--- a/include/cpuinfo.h
++++ b/include/cpuinfo.h
+@@ -417,6 +417,8 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_cortex_a76   = 0x00300376,
+ 	/** ARM Cortex-A77. */
+ 	cpuinfo_uarch_cortex_a77   = 0x00300377,
++	/** ARM Cortex-A78. */
++	cpuinfo_uarch_cortex_a78   = 0x00300378,
+ 
+ 	/** ARM Neoverse N1. */
+ 	cpuinfo_uarch_neoverse_n1  = 0x00300400,
+@@ -1434,6 +1436,7 @@ static inline bool cpuinfo_has_x86_sha(void) {
+ 			bool armv6k;
+ 			bool armv7;
+ 			bool armv7mp;
++			bool armv8;
+ 			bool idiv;
+ 
+ 			bool vfpv2;
+@@ -1521,6 +1524,16 @@ static inline bool cpuinfo_has_arm_v7mp(void) {
+ 	#endif
+ }
+ 
++static inline bool cpuinfo_has_arm_v8(void) {
++	#if CPUINFO_ARCH_ARM64
++		return true;
++	#elif CPUINFO_ARCH_ARM
++		return cpuinfo_isa.armv8;
++	#else
++		return false;
++	#endif
++}
++
+ static inline bool cpuinfo_has_arm_idiv(void) {
+ 	#if CPUINFO_ARCH_ARM64
+ 		return true;
+@@ -1645,6 +1658,16 @@ static inline bool cpuinfo_has_arm_neon_fma(void) {
+ 	#endif
+ }
+ 
++static inline bool cpuinfo_has_arm_neon_v8(void) {
++	#if CPUINFO_ARCH_ARM64
++		return true;
++	#elif CPUINFO_ARCH_ARM
++		return cpuinfo_isa.neon && cpuinfo_isa.armv8;
++	#else
++		return false;
++	#endif
++}
++
+ static inline bool cpuinfo_has_arm_atomics(void) {
+ 	#if CPUINFO_ARCH_ARM64
+ 		return cpuinfo_isa.atomics;
+diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
+index 64dd168..41f9972 100644
+--- a/src/arm/linux/aarch32-isa.c
++++ b/src/arm/linux/aarch32-isa.c
+@@ -43,6 +43,7 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
+ 		isa->armv6k  = true;
+ 		isa->armv7   = true;
+ 		isa->armv7mp = true;
++		isa->armv8   = true;
+ 		isa->thumb  = true;
+ 		isa->thumb2 = true;
+ 		isa->idiv = true;
+diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
+index 058cfc2..e912de6 100644
+--- a/src/arm/mach/init.c
++++ b/src/arm/mach/init.c
+@@ -307,6 +307,7 @@ void cpuinfo_arm_mach_init(void) {
+ 		case CPU_TYPE_ARM:
+ 			switch (cpu_subtype) {
+ 				case CPU_SUBTYPE_ARM_V8:
++					cpuinfo_isa.armv8 = true;
+ 					cpuinfo_isa.aes = true;
+ 					cpuinfo_isa.sha1 = true;
+ 					cpuinfo_isa.sha2 = true;
+diff --git a/src/arm/midr.h b/src/arm/midr.h
+index 34d7780..2638517 100644
+--- a/src/arm/midr.h
++++ b/src/arm/midr.h
+@@ -183,6 +183,7 @@ inline static uint32_t midr_score_core(uint32_t midr) {
+ 		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+ 		case UINT32_C(0x51002050): /* Kryo Gold */
+ 		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
++		case UINT32_C(0x4100D410): /* Cortex-A78 */
+ 		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+ 		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ 		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+diff --git a/src/arm/uarch.c b/src/arm/uarch.c
+index 55b61df..0d7a7d7 100644
+--- a/src/arm/uarch.c
++++ b/src/arm/uarch.c
+@@ -91,6 +91,9 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 				case 0xD0E: /* Cortex-A76AE */
+ 					*uarch = cpuinfo_uarch_cortex_a76;
+ 					break;
++				case 0xD41: /* Cortex-A78 */
++					*uarch = cpuinfo_uarch_cortex_a78;
++					break;
+ #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ 				case 0xD4A:
+ 					*uarch = cpuinfo_uarch_neoverse_e1;
+diff --git a/tools/cpu-info.c b/tools/cpu-info.c
+index 2759068..429bbfa 100644
+--- a/tools/cpu-info.c
++++ b/tools/cpu-info.c
+@@ -183,6 +183,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Cortex-A76";
+ 		case cpuinfo_uarch_cortex_a77:
+ 			return "Cortex-A77";
++		case cpuinfo_uarch_cortex_a78:
++			return "Cortex-A78";
+ 		case cpuinfo_uarch_scorpion:
+ 			return "Scorpion";
+ 		case cpuinfo_uarch_krait:
+diff --git a/tools/isa-info.c b/tools/isa-info.c
+index 98ef919..8365846 100644
+--- a/tools/isa-info.c
++++ b/tools/isa-info.c
+@@ -121,6 +121,7 @@ int main(int argc, char** argv) {
+ 		printf("\tARMv6-K: %s\n", cpuinfo_has_arm_v6k() ? "yes" : "no");
+ 		printf("\tARMv7: %s\n", cpuinfo_has_arm_v7() ? "yes" : "no");
+ 		printf("\tARMv7 MP: %s\n", cpuinfo_has_arm_v7mp() ? "yes" : "no");
++		printf("\tARMv8: %s\n", cpuinfo_has_arm_v8() ? "yes" : "no");
+ 		printf("\tIDIV: %s\n", cpuinfo_has_arm_idiv() ? "yes" : "no");
+ 
+ 	printf("Floating-Point support:\n");
diff --git a/third_party/cpuinfo/workspace.bzl b/third_party/cpuinfo/workspace.bzl
index 7ba603b800d..69b51913799 100644
--- a/third_party/cpuinfo/workspace.bzl
+++ b/third_party/cpuinfo/workspace.bzl
@@ -2,6 +2,11 @@
 
 load("//third_party:repo.bzl", "third_party_http_archive")
 
+# Sanitize a dependency so that it works correctly from code that includes
+# TensorFlow as a submodule.
+def clean_dep(dep):
+    return str(Label(dep))
+
 def repo():
     third_party_http_archive(
         name = "cpuinfo",
@@ -12,4 +17,5 @@ def repo():
             "https://github.com/pytorch/cpuinfo/archive/6cecd15784fcb6c5c0aa7311c6248879ce2cb8b2.zip",
         ],
         build_file = "//third_party/cpuinfo:BUILD.bazel",
+        patch_file = clean_dep("//third_party/cpuinfo:cpuinfo.patch"),
     )