From 3eac805944aef39026b4ecae7cd707ae4d7951a7 Mon Sep 17 00:00:00 2001
From: Raman Sarokin <sorokin@google.com>
Date: Wed, 20 Jan 2021 12:32:31 -0800
Subject: [PATCH] Padding task modified to be Metal compatible. Added
 padding_test_util with unified tests. Added Metal padding unit tests.

PiperOrigin-RevId: 352851103
Change-Id: I4dff7a7c9d8c2be57dcfdf258d3ccbd1edd8e184
---
 .../lite/delegates/gpu/cl/kernels/BUILD       |   2 +-
 .../delegates/gpu/cl/kernels/padding_test.cc  | 301 ++--------------
 .../lite/delegates/gpu/common/tasks/BUILD     |  13 +
 .../delegates/gpu/common/tasks/padding.cc     |  13 +-
 .../gpu/common/tasks/padding_test_util.cc     | 326 ++++++++++++++++++
 .../gpu/common/tasks/padding_test_util.h      |  39 +++
 .../lite/delegates/gpu/metal/kernels/BUILD    |   2 +
 .../gpu/metal/kernels/padding_test.mm         |  55 ++-
 8 files changed, 462 insertions(+), 289 deletions(-)
 create mode 100644 tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.cc
 create mode 100644 tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
index d78d0742424..31a694604c4 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD
@@ -375,7 +375,7 @@ cc_test(
         ":cl_test",
         "//tensorflow/lite/delegates/gpu/common:operations",
         "//tensorflow/lite/delegates/gpu/common:status",
-        "//tensorflow/lite/delegates/gpu/common/tasks:padding",
+        "//tensorflow/lite/delegates/gpu/common/tasks:padding_test_util",
         "@com_google_googletest//:gtest_main",
     ],
 )
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
index 888eaa75b47..f40513e014c 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding_test.cc
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/delegates/gpu/common/tasks/padding.h"
-
 #include <vector>
 
 #include <gmock/gmock.h>
@@ -22,9 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h"
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
-
-using ::testing::FloatNear;
-using ::testing::Pointwise;
+#include "tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h"
 
 namespace tflite {
 namespace gpu {
@@ -32,308 +28,53 @@ namespace cl {
 namespace {
 
 TEST_F(OpenCLOperationTest, PaddingAppendWidth) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 0, 0);
-  attr.appended = BHWC(0, 0, 1, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 2, 2, 2), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data,
-                  Pointwise(FloatNear(eps),
-                            {0.0f, 1.0f, 0.0f, 0.0f, 2.0f, 3.0f, 0.0f, 0.0f}));
-    }
-  }
+  auto status = PaddingAppendWidthTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingPrependWidth) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 1, 0);
-  attr.appended = BHWC(0, 0, 0, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 2, 2, 2), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data,
-                  Pointwise(FloatNear(eps),
-                            {0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 2.0f, 3.0f}));
-    }
-  }
+  auto status = PaddingPrependWidthTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingAppendHeight) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 0, 0);
-  attr.appended = BHWC(0, 1, 0, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 3, 1, 2), &dst_tensor));
-      EXPECT_THAT(
-          dst_tensor.data,
-          Pointwise(FloatNear(eps), {0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f}));
-    }
-  }
+  auto status = PaddingAppendHeightTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingPrependHeight) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 1, 0, 0);
-  attr.appended = BHWC(0, 0, 0, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 3, 1, 2), &dst_tensor));
-      EXPECT_THAT(
-          dst_tensor.data,
-          Pointwise(FloatNear(eps), {0.0f, 0.0f, 0.0f, 1.0f, 2.0f, 3.0f}));
-    }
-  }
+  auto status = PaddingPrependHeightTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingAppendChannels) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 0, 0);
-  attr.appended = BHWC(0, 0, 0, 1);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 2, 1, 3), &dst_tensor));
-      EXPECT_THAT(
-          dst_tensor.data,
-          Pointwise(FloatNear(eps), {0.0f, 1.0f, 0.0f, 2.0f, 3.0f, 0.0f}));
-    }
-  }
+  auto status = PaddingAppendChannelsTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingPrependChannels) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 0, 1);
-  attr.appended = BHWC(0, 0, 0, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 2, 1, 3), &dst_tensor));
-      EXPECT_THAT(
-          dst_tensor.data,
-          Pointwise(FloatNear(eps), {0.0f, 0.0f, 1.0f, 0.0f, 2.0f, 3.0f}));
-    }
-  }
+  auto status = PaddingPrependChannelsTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingPrependChannelsX4) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 1, 1, 2);
-  src_tensor.data = {1.0f, 2.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 0, 4);
-  attr.appended = BHWC(0, 0, 0, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 1, 1, 6), &dst_tensor));
-      EXPECT_THAT(
-          dst_tensor.data,
-          Pointwise(FloatNear(eps), {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 2.0f}));
-    }
-  }
+  auto status = PaddingPrependChannelsX4Test(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingComplex) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 2, 1, 2);
-  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 1, 1);
-  attr.appended = BHWC(0, 1, 1, 0);
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 3, 3, 3), &dst_tensor));
-      EXPECT_THAT(
-          dst_tensor.data,
-          Pointwise(FloatNear(eps),
-                    {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
-                     0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f,
-                     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}));
-    }
-  }
+  auto status = PaddingComplexTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingReflectWidth) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 1, 3, 1);
-  src_tensor.data = {1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 2, 0);
-  attr.appended = BHWC(0, 0, 2, 0);
-  attr.type = PaddingContentType::REFLECT;
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 1, 7, 1), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data,
-                  Pointwise(FloatNear(eps),
-                            {3.0f, 2.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f}));
-    }
-  }
+  auto status = PaddingReflectWidthTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 TEST_F(OpenCLOperationTest, PaddingReflectChannels) {
-  TensorFloat32 src_tensor;
-  src_tensor.shape = BHWC(1, 1, 1, 3);
-  src_tensor.data = {1.0f, 2.0f, 3.0f};
-
-  PadAttributes attr;
-  attr.prepended = BHWC(0, 0, 0, 2);
-  attr.appended = BHWC(0, 0, 0, 2);
-  attr.type = PaddingContentType::REFLECT;
-
-  for (auto storage : env_.GetSupportedStorages()) {
-    for (auto precision : env_.GetSupportedPrecisions()) {
-      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
-      OperationDef op_def;
-      op_def.precision = precision;
-      auto data_type = DeduceDataTypeFromPrecision(precision);
-      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
-      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
-      TensorFloat32 dst_tensor;
-      GPUOperation operation = CreatePadding(op_def, attr);
-      ASSERT_OK(ExecuteGPUOperation(
-          src_tensor, creation_context_,
-          absl::make_unique<GPUOperation>(std::move(operation)),
-          BHWC(1, 1, 1, 7), &dst_tensor));
-      EXPECT_THAT(dst_tensor.data,
-                  Pointwise(FloatNear(eps),
-                            {3.0f, 2.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f}));
-    }
-  }
+  auto status = PaddingReflectChannelsTest(&exec_env_);
+  ASSERT_TRUE(status.ok()) << status.error_message();
 }
 
 }  // namespace
diff --git a/tensorflow/lite/delegates/gpu/common/tasks/BUILD b/tensorflow/lite/delegates/gpu/common/tasks/BUILD
index c1513164a61..08c6177fdc2 100644
--- a/tensorflow/lite/delegates/gpu/common/tasks/BUILD
+++ b/tensorflow/lite/delegates/gpu/common/tasks/BUILD
@@ -421,6 +421,19 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "padding_test_util",
+    testonly = 1,
+    srcs = ["padding_test_util.cc"],
+    hdrs = ["padding_test_util.h"],
+    deps = [
+        ":padding",
+        "//tensorflow/lite/delegates/gpu/common:operations",
+        "//tensorflow/lite/delegates/gpu/common:status",
+        "//tensorflow/lite/delegates/gpu/common/task:testing_util",
+    ],
+)
+
 cc_library(
     name = "pooling",
     srcs = ["pooling.cc"],
diff --git a/tensorflow/lite/delegates/gpu/common/tasks/padding.cc b/tensorflow/lite/delegates/gpu/common/tasks/padding.cc
index 9087d947959..9382b872aa8 100644
--- a/tensorflow/lite/delegates/gpu/common/tasks/padding.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/padding.cc
@@ -45,23 +45,22 @@ std::string GetPaddingCode(const OperationDef& op_def,
     c += "}\n\n";
   }
 
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
+  c += "MAIN_FUNCTION($0) {\n";
   if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) {
-    c += "  int linear_id = get_global_id(0);\n";
+    c += "  int linear_id = GLOBAL_ID_0;\n";
     c += "  int X = linear_id / args.dst_tensor.Batch();\n";
     c += "  int B = linear_id % args.dst_tensor.Batch();\n";
     c += "  args.dst_tensor.SetBatchRef(B);\n";
   } else {
-    c += "  int X = get_global_id(0);\n";
+    c += "  int X = GLOBAL_ID_0;\n";
   }
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
+  c += "  int Y = GLOBAL_ID_1;\n";
+  c += "  int Z = GLOBAL_ID_2;\n";
   c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
        "Z >= args.dst_tensor.Slices()) { \n";
   c += "    return; \n";
   c += "  } \n";
-  c += "  FLT4 result = (FLT4)(0.0);\n";
+  c += "  FLT4 result = INIT_FLT4(0.0);\n";
   c += "  int s_x = X - args.prepended_x;\n";
   c += "  int s_y = Y - args.prepended_y;\n";
   if (op_def.src_tensors[0].HasAxis(Axis::BATCH)) {
diff --git a/tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.cc b/tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.cc
new file mode 100644
index 00000000000..e36ec517224
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.cc
@@ -0,0 +1,326 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h"
+
+#include <vector>
+
+#include "tensorflow/lite/delegates/gpu/common/operations.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/padding.h"
+
+namespace tflite {
+namespace gpu {
+
+absl::Status PaddingAppendWidthTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 0, 0);
+  attr.appended = BHWC(0, 0, 1, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 2, 2, 2), &dst_tensor));
+      RETURN_IF_ERROR(
+          PointWiseNear({0.0f, 1.0f, 0.0f, 0.0f, 2.0f, 3.0f, 0.0f, 0.0f},
+                        dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingPrependWidthTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 1, 0);
+  attr.appended = BHWC(0, 0, 0, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 2, 2, 2), &dst_tensor));
+      RETURN_IF_ERROR(
+          PointWiseNear({0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 2.0f, 3.0f},
+                        dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingAppendHeightTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 0, 0);
+  attr.appended = BHWC(0, 1, 0, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 3, 1, 2), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingPrependHeightTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 1, 0, 0);
+  attr.appended = BHWC(0, 0, 0, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 3, 1, 2), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({0.0f, 0.0f, 0.0f, 1.0f, 2.0f, 3.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingAppendChannelsTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 0, 0);
+  attr.appended = BHWC(0, 0, 0, 1);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 2, 1, 3), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({0.0f, 1.0f, 0.0f, 2.0f, 3.0f, 0.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingPrependChannelsTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 0, 1);
+  attr.appended = BHWC(0, 0, 0, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 2, 1, 3), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({0.0f, 0.0f, 1.0f, 0.0f, 2.0f, 3.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingPrependChannelsX4Test(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 1, 1, 2);
+  src_tensor.data = {1.0f, 2.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 0, 4);
+  attr.appended = BHWC(0, 0, 0, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 1, 1, 6), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 2.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingComplexTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 2, 1, 2);
+  src_tensor.data = {0.0f, 1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 1, 1);
+  attr.appended = BHWC(0, 1, 1, 0);
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 3, 3, 3), &dst_tensor));
+      RETURN_IF_ERROR(
+          PointWiseNear({0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
+                         0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 3.0f, 0.0f, 0.0f, 0.0f,
+                         0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+                        dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingReflectWidthTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 1, 3, 1);
+  src_tensor.data = {1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 2, 0);
+  attr.appended = BHWC(0, 0, 2, 0);
+  attr.type = PaddingContentType::REFLECT;
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 1, 7, 1), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({3.0f, 2.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+absl::Status PaddingReflectChannelsTest(TestExecutionEnvironment* env) {
+  TensorFloat32 src_tensor;
+  src_tensor.shape = BHWC(1, 1, 1, 3);
+  src_tensor.data = {1.0f, 2.0f, 3.0f};
+
+  PadAttributes attr;
+  attr.prepended = BHWC(0, 0, 0, 2);
+  attr.appended = BHWC(0, 0, 0, 2);
+  attr.type = PaddingContentType::REFLECT;
+
+  for (auto storage : env->GetSupportedStorages()) {
+    for (auto precision : env->GetSupportedPrecisions()) {
+      const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
+      OperationDef op_def;
+      op_def.precision = precision;
+      auto data_type = DeduceDataTypeFromPrecision(precision);
+      op_def.src_tensors.push_back({data_type, storage, Layout::HWC});
+      op_def.dst_tensors.push_back({data_type, storage, Layout::HWC});
+      TensorFloat32 dst_tensor;
+      GPUOperation operation = CreatePadding(op_def, attr);
+      RETURN_IF_ERROR(env->ExecuteGPUOperation(
+          src_tensor, absl::make_unique<GPUOperation>(std::move(operation)),
+          BHWC(1, 1, 1, 7), &dst_tensor));
+      RETURN_IF_ERROR(PointWiseNear({3.0f, 2.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f},
+                                    dst_tensor.data, eps));
+    }
+  }
+  return absl::OkStatus();
+}
+
+}  // namespace gpu
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h b/tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h
new file mode 100644
index 00000000000..f5b402a24fd
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h
@@ -0,0 +1,39 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_PADDING_TEST_UTIL_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_PADDING_TEST_UTIL_H_
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/task/testing_util.h"
+
+namespace tflite {
+namespace gpu {
+
+absl::Status PaddingAppendWidthTest(TestExecutionEnvironment* env);
+absl::Status PaddingPrependWidthTest(TestExecutionEnvironment* env);
+absl::Status PaddingAppendHeightTest(TestExecutionEnvironment* env);
+absl::Status PaddingPrependHeightTest(TestExecutionEnvironment* env);
+absl::Status PaddingAppendChannelsTest(TestExecutionEnvironment* env);
+absl::Status PaddingPrependChannelsTest(TestExecutionEnvironment* env);
+absl::Status PaddingPrependChannelsX4Test(TestExecutionEnvironment* env);
+absl::Status PaddingComplexTest(TestExecutionEnvironment* env);
+absl::Status PaddingReflectWidthTest(TestExecutionEnvironment* env);
+absl::Status PaddingReflectChannelsTest(TestExecutionEnvironment* env);
+
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_PADDING_TEST_UTIL_H_
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
index 75b113d26cb..acfa58b9db2 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD
@@ -373,6 +373,7 @@ objc_library(
     deps = [
         ":padding",
         ":test_util",
+        "//tensorflow/lite/delegates/gpu/common/tasks:padding_test_util",
     ],
 )
 
@@ -878,6 +879,7 @@ objc_library(
         "//tensorflow/lite/delegates/gpu/common/tasks:concat_test_util",
         "//tensorflow/lite/delegates/gpu/common/tasks:elementwise_test_util",
         "//tensorflow/lite/delegates/gpu/common/tasks:lstm_test_util",
+        "//tensorflow/lite/delegates/gpu/common/tasks:padding_test_util",
         "//tensorflow/lite/delegates/gpu/common/tasks:prelu_test_util",
         "//tensorflow/lite/delegates/gpu/common/tasks:quantize_and_dequantize_test_util",
         "//tensorflow/lite/delegates/gpu/common/tasks:relu_test_util",
diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm
index f417fff8b54..811ab94da8d 100644
--- a/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm
+++ b/tensorflow/lite/delegates/gpu/metal/kernels/padding_test.mm
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/common/operations.h"
 #include "tensorflow/lite/delegates/gpu/common/shape.h"
 #include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/common/tasks/padding_test_util.h"
 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
 #include "tensorflow/lite/delegates/gpu/common/util.h"
 #include "tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h"
@@ -49,7 +50,10 @@ using ::tflite::gpu::metal::SingleOpModel;
             expected:(std::vector<float>&&)expected;
 @end
 
-@implementation PaddingTest
+@implementation PaddingTest {
+  tflite::gpu::metal::MetalExecutionEnvironment exec_env_;
+}
+
 - (void)setUp {
   [super setUp];
 }
@@ -191,5 +195,54 @@ using ::tflite::gpu::metal::SingleOpModel;
   XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
 }
 
+- (void)testPaddingAppendWidth {
+  auto status = PaddingAppendWidthTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingPrependWidth {
+  auto status = PaddingPrependWidthTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingAppendHeight {
+  auto status = PaddingAppendHeightTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingPrependHeight {
+  auto status = PaddingPrependHeightTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingAppendChannels {
+  auto status = PaddingAppendChannelsTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingPrependChannels {
+  auto status = PaddingPrependChannelsTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingPrependChannelsX4 {
+  auto status = PaddingPrependChannelsX4Test(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingComplex {
+  auto status = PaddingComplexTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingReflectWidth {
+  auto status = PaddingReflectWidthTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
+
+- (void)testPaddingReflectChannels {
+  auto status = PaddingReflectChannelsTest(&exec_env_);
+  XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
+}
 
 @end