diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index db5bf2b099f..45276e7b560 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -28,6 +28,21 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "xnnpack_delegate_test_mode",
+    srcs = ["xnnpack_delegate.cc"],
+    hdrs = ["xnnpack_delegate.h"],
+    copts = ["-DXNNPACK_DELEGATE_TEST_MODE=1"],
+    linkstatic = True,
+    deps = [
+        "//tensorflow/lite:kernel_api",
+        "//tensorflow/lite:util",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@XNNPACK",
+    ],
+)
+
 ############################## Integration tests ###############################
 
 cc_library(
@@ -42,6 +57,51 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "pool_2d_tester",
+    testonly = 1,
+    srcs = ["pool_2d_tester.cc"],
+    hdrs = ["pool_2d_tester.h"],
+    deps = [
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:schema_fbs_version",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+cc_test(
+    name = "average_pool_2d_test",
+    srcs = ["average_pool_2d_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":pool_2d_tester",
+        ":test_main",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_test(
+    name = "max_pool_2d_test",
+    srcs = ["max_pool_2d_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":pool_2d_tester",
+        ":test_main",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
 cc_test(
     name = "conv_2d_test",
     srcs = ["conv_2d_test.cc"],
diff --git a/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc b/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc
new file mode 100644
index 00000000000..515fec8083f
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc
@@ -0,0 +1,370 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/pool_2d_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(AveragePool2D, EqualPoolAndStrideWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  const int32_t pool_height = pool_rng();
+  const int32_t pool_width = pool_rng();
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_height)
+      .PoolingWidth(pool_width)
+      .StrideHeight(pool_height)
+      .StrideWidth(pool_width)
+      .SamePadding()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, EqualPoolAndStrideWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  const int32_t pool_height = pool_rng();
+  const int32_t pool_width = pool_rng();
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_height)
+      .PoolingWidth(pool_width)
+      .StrideHeight(pool_height)
+      .StrideWidth(pool_width)
+      .ValidPadding()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, LargePoolSmallStrideWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(4, 7), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .SamePadding()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, LargePoolSmallStrideWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(4, 7), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .ValidPadding()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, GlobalPooling) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  const int32_t height = input_rng();
+  const int32_t width = input_rng();
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(height)
+      .InputWidth(width)
+      .Channels(channel_rng())
+      .PoolingHeight(height)
+      .PoolingWidth(width)
+      .ValidPadding()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, ReluActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .ReluActivation()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, Relu6Activation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .Relu6Activation()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, ReluMinus1To1Activation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .ReluMinus1To1Activation()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, DISABLED_TanhActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .TanhActivation()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, DISABLED_SignBitActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .SignBitActivation()
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(AveragePool2D, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc b/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc
new file mode 100644
index 00000000000..aaf217800d8
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc
@@ -0,0 +1,370 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/pool_2d_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(MaxPool2D, EqualPoolAndStrideWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  const int32_t pool_height = pool_rng();
+  const int32_t pool_width = pool_rng();
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_height)
+      .PoolingWidth(pool_width)
+      .StrideHeight(pool_height)
+      .StrideWidth(pool_width)
+      .SamePadding()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, EqualPoolAndStrideWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 7), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  const int32_t pool_height = pool_rng();
+  const int32_t pool_width = pool_rng();
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_height)
+      .PoolingWidth(pool_width)
+      .StrideHeight(pool_height)
+      .StrideWidth(pool_width)
+      .ValidPadding()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, LargePoolSmallStrideWithSamePadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(4, 7), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .SamePadding()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, LargePoolSmallStrideWithValidPadding) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(4, 7), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .ValidPadding()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, GlobalPooling) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  const int32_t height = input_rng();
+  const int32_t width = input_rng();
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(height)
+      .InputWidth(width)
+      .Channels(channel_rng())
+      .PoolingHeight(height)
+      .PoolingWidth(width)
+      .ValidPadding()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, ReluActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .ReluActivation()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, Relu6Activation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .Relu6Activation()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, ReluMinus1To1Activation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .ReluMinus1To1Activation()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, DISABLED_TanhActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .TanhActivation()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, DISABLED_SignBitActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .SignBitActivation()
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+TEST(MaxPool2D, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
+  auto input_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(10, 25), std::ref(rng));
+  auto pool_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(3, 5), std::ref(rng));
+  auto stride_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 3), std::ref(rng));
+  auto channel_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(5, 16), std::ref(rng));
+
+  Pool2DTester()
+      .BatchSize(batch_rng())
+      .InputHeight(input_rng())
+      .InputWidth(input_rng())
+      .Channels(channel_rng())
+      .PoolingHeight(pool_rng())
+      .PoolingWidth(pool_rng())
+      .StrideHeight(stride_rng())
+      .StrideWidth(stride_rng())
+      .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/pool_2d_tester.cc b/tensorflow/lite/delegates/xnnpack/pool_2d_tester.cc
new file mode 100644
index 00000000000..fab83e76fd2
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/pool_2d_tester.cc
@@ -0,0 +1,196 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/xnnpack/pool_2d_tester.h"
+
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+namespace tflite {
+namespace xnnpack {
+
+void Pool2DTester::Test(tflite::BuiltinOperator pool_op,
+                        TfLiteDelegate* delegate) const {
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto range_rng = std::bind(
+      std::uniform_real_distribution<float>(-25.0f, 25.0f), std::ref(rng));
+
+  std::vector<char> buffer = CreateTfLiteModel(pool_op);
+  const tflite::Model* model = tflite::GetModel(buffer.data());
+
+  std::unique_ptr<tflite::Interpreter> delegate_interpreter;
+  ASSERT_EQ(tflite::InterpreterBuilder(
+                model, tflite::ops::builtin::BuiltinOpResolver())(
+                &delegate_interpreter),
+            kTfLiteOk);
+  std::unique_ptr<tflite::Interpreter> default_interpreter;
+  ASSERT_EQ(tflite::InterpreterBuilder(
+                model, tflite::ops::builtin::BuiltinOpResolver())(
+                &default_interpreter),
+            kTfLiteOk);
+
+  ASSERT_TRUE(delegate_interpreter);
+  ASSERT_TRUE(default_interpreter);
+
+  ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
+  ASSERT_EQ(default_interpreter->inputs().size(), 1);
+
+  ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
+  ASSERT_EQ(default_interpreter->outputs().size(), 1);
+
+  ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
+  ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
+
+  ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
+
+  float* default_input_data = default_interpreter->typed_tensor<float>(
+      default_interpreter->inputs()[0]);
+  for (int32_t i = 0; i < BatchSize(); i++) {
+    for (int32_t c = 0; c < Channels(); c++) {
+      // Use the same range of all-positive or all-negative values to generate
+      // all pixels within the same batch index & channel, but different ranges
+      // for different channels or batches. This ensures that no catastrophic
+      // cancellation occur, but test covers both positive and negative inputs.
+      const float range = range_rng();
+      auto value_rng =
+          std::bind(std::uniform_real_distribution<float>(
+                        std::min(range, 0.0f), std::max(range, 0.0f)),
+                    std::ref(rng));
+      for (int32_t y = 0; y < InputHeight(); y++) {
+        for (int32_t x = 0; x < InputWidth(); x++) {
+          const int32_t index =
+              ((i * InputHeight() + y) * InputWidth() + x) * Channels() + c;
+          default_input_data[index] = value_rng();
+        }
+      }
+    }
+  }
+
+  float* xnnpack_input_data = delegate_interpreter->typed_tensor<float>(
+      delegate_interpreter->inputs()[0]);
+  std::copy(default_input_data,
+            default_input_data +
+                BatchSize() * InputHeight() * InputWidth() * Channels(),
+            xnnpack_input_data);
+
+  ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
+  ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
+
+  float* default_output_data = default_interpreter->typed_tensor<float>(
+      default_interpreter->outputs()[0]);
+  float* xnnpack_output_data = delegate_interpreter->typed_tensor<float>(
+      delegate_interpreter->outputs()[0]);
+
+  for (int32_t i = 0; i < BatchSize(); i++) {
+    for (int32_t y = 0; y < OutputHeight(); y++) {
+      for (int32_t x = 0; x < OutputWidth(); x++) {
+        for (int32_t c = 0; c < Channels(); c++) {
+          const int32_t index =
+              ((i * OutputHeight() + y) * OutputWidth() + x) * Channels() + c;
+          if (pool_op == BuiltinOperator_MAX_POOL_2D) {
+            // MaxPooling results must be exact
+            ASSERT_EQ(default_output_data[index], xnnpack_output_data[index])
+                << "batch " << i << " / " << BatchSize() << ", y position " << y
+                << " / " << OutputHeight() << ", x position " << x << " / "
+                << OutputWidth() << ", channel " << c << " / " << Channels();
+          } else {
+            ASSERT_NEAR(default_output_data[index], xnnpack_output_data[index],
+                        std::abs(default_output_data[index]) * 3.0e-6f)
+                << "batch " << i << " / " << BatchSize() << ", y position " << y
+                << " / " << OutputHeight() << ", x position " << x << " / "
+                << OutputWidth() << ", channel " << c << " / " << Channels();
+          }
+        }
+      }
+    }
+  }
+}
+
+std::vector<char> Pool2DTester::CreateTfLiteModel(
+    tflite::BuiltinOperator pool_op) const {
+  flatbuffers::FlatBufferBuilder builder;
+  flatbuffers::Offset<tflite::OperatorCode> operator_code =
+      CreateOperatorCode(builder, pool_op, 0);
+
+  flatbuffers::Offset<tflite::Pool2DOptions> pool_2d_options =
+      CreatePool2DOptions(builder, Padding(), StrideWidth(), StrideHeight(),
+                          PoolingWidth(), PoolingHeight(), Activation());
+
+  const flatbuffers::Offset<tflite::Buffer> null_buffer =
+      tflite::CreateBuffer(builder, builder.CreateVector({}));
+
+  const std::array<int32_t, 4> input_shape{
+      {BatchSize(), InputHeight(), InputWidth(), Channels()}};
+  const std::array<int32_t, 4> output_shape{
+      {BatchSize(), OutputHeight(), OutputWidth(), Channels()}};
+
+  const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
+      tflite::CreateTensor(
+          builder,
+          builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
+          tflite::TensorType_FLOAT32),
+      tflite::CreateTensor(builder,
+                           builder.CreateVector<int32_t>(output_shape.data(),
+                                                         output_shape.size()),
+                           tflite::TensorType_FLOAT32),
+  }};
+
+  const std::array<int32_t, 1> op_inputs{{0}};
+  const std::array<int32_t, 1> op_outputs{{1}};
+
+  flatbuffers::Offset<tflite::Operator> op = CreateOperator(
+      builder, /*opcode_index=*/0,
+      builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
+      builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
+      tflite::BuiltinOptions_Pool2DOptions, pool_2d_options.Union());
+
+  const std::array<int32_t, 1> subgraph_inputs{{0}};
+  const std::array<int32_t, 1> subgraph_outputs{{1}};
+  flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
+      builder, builder.CreateVector(tensors.data(), tensors.size()),
+      builder.CreateVector<int32_t>(subgraph_inputs.data(),
+                                    subgraph_inputs.size()),
+      builder.CreateVector<int32_t>(subgraph_outputs.data(),
+                                    subgraph_outputs.size()),
+      builder.CreateVector(&op, 1));
+
+  flatbuffers::Offset<flatbuffers::String> description =
+      builder.CreateString("Pool2D model");
+
+  flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(
+      builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1),
+      builder.CreateVector(&subgraph, 1), description,
+      builder.CreateVector(&null_buffer, 1));
+
+  builder.Finish(model_buffer);
+
+  return std::vector<char>(builder.GetBufferPointer(),
+                           builder.GetBufferPointer() + builder.GetSize());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/pool_2d_tester.h b/tensorflow/lite/delegates/xnnpack/pool_2d_tester.h
new file mode 100644
index 00000000000..3125e9231f6
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/pool_2d_tester.h
@@ -0,0 +1,183 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_POOL_2D_TESTER_H_
+#define TENSORFLOW_LITE_DELEGATES_XNNPACK_POOL_2D_TESTER_H_
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+namespace tflite {
+namespace xnnpack {
+
+class Pool2DTester {
+ public:
+  Pool2DTester() = default;
+  Pool2DTester(const Pool2DTester&) = delete;
+  Pool2DTester& operator=(const Pool2DTester&) = delete;
+
+  inline Pool2DTester& BatchSize(int32_t batch_size) {
+    EXPECT_GT(batch_size, 0);
+    batch_size_ = batch_size;
+    return *this;
+  }
+
+  inline int32_t BatchSize() const { return batch_size_; }
+
+  inline Pool2DTester& Channels(int32_t channels) {
+    EXPECT_GT(channels, 0);
+    channels_ = channels;
+    return *this;
+  }
+
+  inline int32_t Channels() const { return channels_; }
+
+  inline Pool2DTester& InputHeight(int32_t input_height) {
+    EXPECT_GT(input_height, 0);
+    input_height_ = input_height;
+    return *this;
+  }
+
+  inline int32_t InputHeight() const { return input_height_; }
+
+  inline Pool2DTester& InputWidth(int32_t input_width) {
+    EXPECT_GT(input_width, 0);
+    input_width_ = input_width;
+    return *this;
+  }
+
+  inline int32_t InputWidth() const { return input_width_; }
+
+  inline int32_t OutputWidth() const {
+    if (Padding() == ::tflite::Padding_SAME) {
+      return (InputWidth() - 1) / StrideWidth() + 1;
+    } else {
+      return (InputWidth() - PoolingWidth()) / StrideWidth() + 1;
+    }
+  }
+
+  inline int32_t OutputHeight() const {
+    if (Padding() == ::tflite::Padding_SAME) {
+      return (InputHeight() - 1) / StrideHeight() + 1;
+    } else {
+      return (InputHeight() - PoolingHeight()) / StrideHeight() + 1;
+    }
+  }
+
+  inline Pool2DTester& PoolingHeight(int32_t pooling_height) {
+    EXPECT_GT(pooling_height, 0);
+    pooling_height_ = pooling_height;
+    return *this;
+  }
+
+  inline int32_t PoolingHeight() const { return pooling_height_; }
+
+  inline Pool2DTester& PoolingWidth(int32_t pooling_width) {
+    EXPECT_GT(pooling_width, 0);
+    pooling_width_ = pooling_width;
+    return *this;
+  }
+
+  inline int32_t PoolingWidth() const { return pooling_width_; }
+
+  inline Pool2DTester& StrideHeight(int32_t stride_height) {
+    EXPECT_GT(stride_height, 0);
+    stride_height_ = stride_height;
+    return *this;
+  }
+
+  inline int32_t StrideHeight() const { return stride_height_; }
+
+  inline Pool2DTester& StrideWidth(int32_t stride_width) {
+    EXPECT_GT(stride_width, 0);
+    stride_width_ = stride_width;
+    return *this;
+  }
+
+  inline int32_t StrideWidth() const { return stride_width_; }
+
+  inline Pool2DTester& SamePadding() {
+    padding_ = ::tflite::Padding_SAME;
+    return *this;
+  }
+
+  inline Pool2DTester& ValidPadding() {
+    padding_ = ::tflite::Padding_VALID;
+    return *this;
+  }
+
+  inline Pool2DTester& ReluActivation() {
+    activation_ = ::tflite::ActivationFunctionType_RELU;
+    return *this;
+  }
+
+  inline Pool2DTester& Relu6Activation() {
+    activation_ = ::tflite::ActivationFunctionType_RELU6;
+    return *this;
+  }
+
+  inline Pool2DTester& ReluMinus1To1Activation() {
+    activation_ = ::tflite::ActivationFunctionType_RELU_N1_TO_1;
+    return *this;
+  }
+
+  inline Pool2DTester& TanhActivation() {
+    activation_ = ::tflite::ActivationFunctionType_TANH;
+    return *this;
+  }
+
+  inline Pool2DTester& SignBitActivation() {
+    activation_ = ::tflite::ActivationFunctionType_SIGN_BIT;
+    return *this;
+  }
+
+  void Test(tflite::BuiltinOperator pool_op, TfLiteDelegate* delegate) const;
+
+ private:
+  std::vector<char> CreateTfLiteModel(tflite::BuiltinOperator pool_op) const;
+
+  inline ::tflite::Padding Padding() const { return padding_; }
+
+  inline ::tflite::ActivationFunctionType Activation() const {
+    return activation_;
+  }
+
+  int32_t batch_size_ = 1;
+  int32_t channels_ = 1;
+  int32_t input_height_ = 1;
+  int32_t input_width_ = 1;
+  int32_t pooling_height_ = 1;
+  int32_t pooling_width_ = 1;
+  int32_t stride_height_ = 1;
+  int32_t stride_width_ = 1;
+  ::tflite::Padding padding_ = ::tflite::Padding_VALID;
+  ::tflite::ActivationFunctionType activation_ =
+      ::tflite::ActivationFunctionType_NONE;
+};
+
+}  // namespace xnnpack
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_XNNPACK_POOL_2D_TESTER_H_
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
index bb5c66899b8..ecebacee420 100644
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -1278,6 +1278,20 @@ TfLiteIntArray* GetOpsToReplace(TfLiteContext* context) {
 
     nodes_to_replace->data[nodes_to_replace->size++] = node_index;
   }
+
+#ifdef XNNPACK_DELEGATE_TEST_MODE
+  // In the test mode build (used by unit tests), XNNPACK delegate claims to
+  // support all operators in the execution plan to disable fallback to the
+  // default TensorFlow Lite kernels. Thus, if any of the ops in the model are
+  // not supported by the delegate, they will cause a failure in
+  // ::tflite::Interpreter::ModifyGraphWithDelegate, to be caught in the unit
+  // tests.
+  nodes_to_replace->size = execution_plan->size;
+  std::copy(&execution_plan->data[0],
+            &execution_plan->data[execution_plan->size],
+            &nodes_to_replace->data[0]);
+#endif
+
   return nodes_to_replace;
 }
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index c5efa540186..667a49c59a6 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -156,11 +156,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "XNNPACK",
-        sha256 = "7019a386752afaa5dd941d17201c5ff863b6ff1e77539e8cfcff0d13647a9f4a",
-        strip_prefix = "XNNPACK-68eef3f15735c07774b3722f7b1b1142cebc9fed",
+        sha256 = "583e408c9ab9a6ec241a54e0775bc170ed2ea3d1073668c9379dbfe282fa8acc",
+        strip_prefix = "XNNPACK-24d9a03a9ee036949f8c56878ecec17ab400fc23",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/68eef3f15735c07774b3722f7b1b1142cebc9fed.zip",
-            "https://github.com/google/XNNPACK/archive/68eef3f15735c07774b3722f7b1b1142cebc9fed.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/24d9a03a9ee036949f8c56878ecec17ab400fc23.zip",
+            "https://github.com/google/XNNPACK/archive/24d9a03a9ee036949f8c56878ecec17ab400fc23.zip",
         ],
     )
 
@@ -176,11 +176,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "pthreadpool",
-        sha256 = "f894d845cefc091291329712deec85ce7020546f6eaff200b690ae04b6094535",
-        strip_prefix = "pthreadpool-bfa3b9ce6cb71dc8b792e39d24717320a4f92572",
+        sha256 = "27c039e73846d0bdfe393833e91afafe45e61ba792cc60e1c62808090554ce4d",
+        strip_prefix = "pthreadpool-a61ed1ab70389c62f6f699ca1a30a2421d3ea594",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/Maratyszcza/pthreadpool/archive/bfa3b9ce6cb71dc8b792e39d24717320a4f92572.zip",
-            "https://github.com/Maratyszcza/pthreadpool/archive/bfa3b9ce6cb71dc8b792e39d24717320a4f92572.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/Maratyszcza/pthreadpool/archive/a61ed1ab70389c62f6f699ca1a30a2421d3ea594.zip",
+            "https://github.com/Maratyszcza/pthreadpool/archive/a61ed1ab70389c62f6f699ca1a30a2421d3ea594.zip",
         ],
     )