Pooling tasks modified to be Metal compatible.

Added pooling_test_util with unified tests. Added Metal pooling unit tests. PiperOrigin-RevId: 353513964 Change-Id: I5a58b319509b64f268904312611e4649b35a467a
2021-01-24 08:05:52 -08:00 · 2021-01-24 08:05:52 -08:00 · 4bbdace1a4
commit 4bbdace1a4
parent b65778df51
8 changed files with 262 additions and 150 deletions
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@ -392,7 +392,7 @@ cc_test(
        ":cl_test",
        "//tensorflow/lite/delegates/gpu/common:operations",
        "//tensorflow/lite/delegates/gpu/common:status",
-        "//tensorflow/lite/delegates/gpu/common/tasks:pooling",
+        "//tensorflow/lite/delegates/gpu/common/tasks:pooling_test_util",
        "@com_google_googletest//:gtest_main",
    ],
 )
--- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc
@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#include "tensorflow/lite/delegates/gpu/common/tasks/pooling.h"
-
 #include <vector>

 #include <gmock/gmock.h>
@ -22,9 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
-
-using ::testing::FloatNear;
-using ::testing::Pointwise;
+#include "tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.h"

 namespace tflite {
 namespace gpu {
@ -32,136 +28,23 @@ namespace cl {
 namespace {

 TEST_F(OpenCLOperationTest, AveragePooling) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 2, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
-
-  Pooling2DAttributes attr;
-  attr.padding.prepended = HW(0, 0);
-  attr.padding.appended = HW(0, 0);
-  attr.strides = HW(2, 2);
-  attr.kernel = HW(2, 2);
-  attr.type = PoolingType::AVERAGE;
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePooling(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 1, 1, 2), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {3.0f, 4.0f}));
-    }
-  }
+  auto status = AveragePoolingTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }

 TEST_F(OpenCLOperationTest, AveragePoolingNonEmptyPadding) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 2, 1);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  Pooling2DAttributes attr;
-  attr.padding.prepended = HW(0, 0);
-  attr.padding.appended = HW(1, 1);
-  attr.strides = HW(1, 1);
-  attr.kernel = HW(2, 2);
-  attr.type = PoolingType::AVERAGE;
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePooling(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 2, 2, 1), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data,
-                  Pointwise(FloatNear(eps), {1.5f, 2.0f, 2.5f, 3.0f}));
-    }
-  }
+  auto status = AveragePoolingNonEmptyPaddingTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }

 TEST_F(OpenCLOperationTest, MaxPooling) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 2, 2);
-  src_tensor.data = {8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
-
-  Pooling2DAttributes attr;
-  attr.padding.prepended = HW(0, 0);
-  attr.padding.appended = HW(0, 0);
-  attr.strides = HW(2, 2);
-  attr.kernel = HW(2, 2);
-  attr.type = PoolingType::MAX;
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePooling(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 1, 1, 2), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {8.0f, 7.0f}));
-    }
-  }
+  auto status = MaxPoolingTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }

 TEST_F(OpenCLOperationTest, MaxPoolingIndices) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 2, 2);
-  src_tensor.data = {8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
-
-  Pooling2DAttributes attr;
-  attr.padding.prepended = HW(0, 0);
-  attr.padding.appended = HW(0, 0);
-  attr.strides = HW(2, 2);
-  attr.kernel = HW(2, 2);
-  attr.type = PoolingType::MAX;
-  attr.output_indices = true;
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      TensorFloat32 dst_tensor_ind;
-      GPUOperation operation = CreatePooling(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          {src_tensor}, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          {BHWC(1, 1, 1, 2), BHWC(1, 1, 1, 2)},
-          {&dst_tensor, &dst_tensor_ind}));
-      EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {8.0f, 7.0f}));
-      for (auto& v : dst_tensor_ind.data) {
-        v = static_cast<int>(v);
-      }
-      EXPECT_THAT(dst_tensor_ind.data, Pointwise(FloatNear(eps), {0.0f, 3.0f}));
-    }
-  }
+  auto status = MaxPoolingIndicesTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }

 }  // namespace
--- a/tensorflow/lite/delegates/gpu/common/tasks/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/tasks/BUILD
@ -462,6 +462,19 @@ cc_library(
    ],
 )

+cc_library(
+    name = "pooling_test_util",
+    testonly = 1,
+    srcs = ["pooling_test_util.cc"],
+    hdrs = ["pooling_test_util.h"],
+    deps = [
+        ":pooling",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:status",
+        "//tensorflow/lite/delegates/gpu/common/task:testing_util",
+    ],
+)
+
 cc_library(
    name = "prelu",
    srcs = ["prelu.cc"],
--- a/tensorflow/lite/delegates/gpu/common/tasks/pooling.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/pooling.cc
@ -73,22 +73,21 @@ std::string GetAveragePoolingKernelCode(const OperationDef& op_def,
      op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;

  std::string c;
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
+  c += "MAIN_FUNCTION($0) {\n";
+  c += "  int X = GLOBAL_ID_0;\n";
  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
-    c += "  int linear_id_1 = get_global_id(1);\n";
+    c += "  int linear_id_1 = GLOBAL_ID_1;\n";
    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
    c += "  int D = linear_id_1 % args.dst_tensor.Depth();\n";
  } else {
-    c += "  int Y = get_global_id(1);\n";
+    c += "  int Y = GLOBAL_ID_1;\n";
  }
-  c += "  int Z = get_global_id(2);\n";
+  c += "  int Z = GLOBAL_ID_2;\n";
  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
       "Z >= args.dst_tensor.Slices()) { \n";
  c += "    return; \n";
  c += "  } \n";
-  c += "  float4 r = (float4)(0.0f);\n";
+  c += "  float4 r = INIT_FLOAT4(0.0f);\n";
  c += "  float window_size = 0.0;\n";
  if (stride_correction) {
    c += "  int xs = " +
@ -124,7 +123,7 @@ std::string GetAveragePoolingKernelCode(const OperationDef& op_def,
  if (manual_clamp) {
    c += "     r += !outside ? args.src_tensor.Read<float>(" + src_coord +
         ") : "
-         "(float4)(0.0f);\n";
+         "INIT_FLOAT4(0.0f);\n";
  } else {
    c += "      r += args.src_tensor.Read<float>(" + src_coord + ");\n";
  }
@ -194,24 +193,23 @@ std::string GetMaxPoolingKernelCode(const OperationDef& op_def,
  }

  std::string c;
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
+  c += "MAIN_FUNCTION($0) {\n";
+  c += "  int X = GLOBAL_ID_0;\n";
  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
-    c += "  int linear_id_1 = get_global_id(1);\n";
+    c += "  int linear_id_1 = GLOBAL_ID_1;\n";
    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
    c += "  int D = linear_id_1 % args.dst_tensor.Depth();\n";
  } else {
-    c += "  int Y = get_global_id(1);\n";
+    c += "  int Y = GLOBAL_ID_1;\n";
  }
-  c += "  int Z = get_global_id(2);\n";
+  c += "  int Z = GLOBAL_ID_2;\n";
  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
       "Z >= args.dst_tensor.Slices()) { \n";
  c += "    return; \n";
  c += "  } \n";
-  c += "  FLT4 maximum = (FLT4)(-10000.0f);\n";
+  c += "  FLT4 maximum = INIT_FLT4(-10000.0f);\n";
  if (output_indices) {
-    c += "  FLT4 indexes = (FLT4)(0.0f);\n";
+    c += "  FLT4 indexes = INIT_FLT4(0.0f);\n";
  }
  if (stride_correction) {
    c += "  int xs = " +
@ -246,11 +244,12 @@ std::string GetMaxPoolingKernelCode(const OperationDef& op_def,
  c += "      FLT4 src = args.src_tensor.Read(" + src_coord + ");\n";
  if (output_indices) {
    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
-      c += "      FLT index_counter = (FLT)((ky * args.kernel_size_x + kx) * "
-           "args.kernel_size_z + kz) + (FLT)(0.1f);\n";
+      c +=
+          "      FLT index_counter = INIT_FLT((ky * args.kernel_size_x + kx) * "
+          "args.kernel_size_z + kz) + INIT_FLT(0.1f);\n";
    } else {
-      c += "      FLT index_counter = (FLT)(ky * args.kernel_size_x + kx) + "
-           "(FLT)(0.1f);\n";
+      c += "      FLT index_counter = INIT_FLT(ky * args.kernel_size_x + kx) + "
+           "INIT_FLT(0.1f);\n";
    }
    c += "      if (src.x > maximum.x) {\n";
    c += "        indexes.x = index_counter;\n";
--- a/tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.cc
@ -0,0 +1,162 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.h"
+
+#include <vector>
+
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/pooling.h"
+
+namespace tflite {
+namespace gpu {
+
+absl::Status AveragePoolingTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 2, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
+
+  Pooling2DAttributes attr;
+  attr.padding.prepended = HW(0, 0);
+  attr.padding.appended = HW(0, 0);
+  attr.strides = HW(2, 2);
+  attr.kernel = HW(2, 2);
+  attr.type = PoolingType::AVERAGE;
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePooling(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 1, 1, 2), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({3.0f, 4.0f}, dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status AveragePoolingNonEmptyPaddingTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 2, 1);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  Pooling2DAttributes attr;
+  attr.padding.prepended = HW(0, 0);
+  attr.padding.appended = HW(1, 1);
+  attr.strides = HW(1, 1);
+  attr.kernel = HW(2, 2);
+  attr.type = PoolingType::AVERAGE;
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePooling(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 2, 2, 1), &dst_tensor));
+      RETURN_IF_ERROR(
+          PointWiseNear({1.5f, 2.0f, 2.5f, 3.0f}, dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status MaxPoolingTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 2, 2);
+  src_tensor.data = {8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
+
+  Pooling2DAttributes attr;
+  attr.padding.prepended = HW(0, 0);
+  attr.padding.appended = HW(0, 0);
+  attr.strides = HW(2, 2);
+  attr.kernel = HW(2, 2);
+  attr.type = PoolingType::MAX;
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePooling(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 1, 1, 2), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({8.0f, 7.0f}, dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status MaxPoolingIndicesTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 2, 2);
+  src_tensor.data = {8.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
+
+  Pooling2DAttributes attr;
+  attr.padding.prepended = HW(0, 0);
+  attr.padding.appended = HW(0, 0);
+  attr.strides = HW(2, 2);
+  attr.kernel = HW(2, 2);
+  attr.type = PoolingType::MAX;
+  attr.output_indices = true;
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      TensorFloat32 dst_tensor_ind;
+      GPUOperation operation = CreatePooling(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          {src_tensor}, absl::make_unique<GPUOperation>(std::move(operation)),
+          {BHWC(1, 1, 1, 2), BHWC(1, 1, 1, 2)},
+          {&dst_tensor, &dst_tensor_ind}));
+      RETURN_IF_ERROR(PointWiseNear({8.0f, 7.0f}, dst_tensor.data, eps));
+      for (auto& v : dst_tensor_ind.data) {
+        v = static_cast<int>(v);
+      }
+      RETURN_IF_ERROR(PointWiseNear({0.0f, 3.0f}, dst_tensor_ind.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+}  // namespace gpu
+}  // namespace tflite
--- a/tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.h
+++ b/tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.h
@ -0,0 +1,33 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_POOLING_TEST_UTIL_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_POOLING_TEST_UTIL_H_
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
+
+namespace tflite {
+namespace gpu {
+
+absl::Status AveragePoolingTest(TestExecutionEnvironment* env);
+absl::Status AveragePoolingNonEmptyPaddingTest(TestExecutionEnvironment* env);
+absl::Status MaxPoolingTest(TestExecutionEnvironment* env);
+absl::Status MaxPoolingIndicesTest(TestExecutionEnvironment* env);
+
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_PRELU_TEST_UTIL_H_
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@ -336,6 +336,7 @@ objc_library(
    deps = [
        ":pooling",
        ":test_util",
+        "//tensorflow/lite/delegates/gpu/common/tasks:pooling_test_util",
    ],
 )

@ -753,6 +754,7 @@ objc_library(
        "//tensorflow/lite/delegates/gpu/common/tasks:lstm_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:max_unpooling_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:padding_test_util",
+        "//tensorflow/lite/delegates/gpu/common/tasks:pooling_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:prelu_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:quantize_and_dequantize_test_util",
        "//tensorflow/lite/delegates/gpu/common/tasks:reduce_test_util",
--- a/tensorflow/lite/delegates/gpu/metal/kernels/pooling_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/pooling_test.mm
@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/pooling_test_util.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/util.h"
 #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
@ -39,9 +40,8 @@ using ::tflite::gpu::metal::SingleOpModel;
@interface PoolingTest : XCTestCase
@end

-@implementation PoolingTest
- (void)setUp {
-  [super setUp];
+@implementation PoolingTest {
+  tflite::gpu::metal::MetalExecutionEnvironment exec_env_;
 }

 - (void)testPoolingMaxKernel2x2Stride2x2WithIndices {
@ -130,4 +130,24 @@ using ::tflite::gpu::metal::SingleOpModel;
  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
 }

+- (void)testAveragePooling {
+  auto status = AveragePoolingTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testAveragePoolingNonEmptyPadding {
+  auto status = AveragePoolingNonEmptyPaddingTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testMaxPooling {
+  auto status = MaxPoolingTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testMaxPoolingIndices {
+  auto status = MaxPoolingIndicesTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
@end