diff --git a/RELEASE.md b/RELEASE.md
index 6890352cf8a..6fb025a23bd 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -183,11 +183,13 @@
       checkpoint saved in the `variables/` folder in the SavedModel.
     * When restoring, `save_path` can be a path to a SavedModel. The function
       will automatically find the checkpoint in the SavedModel.
+*   `tf.nn`:
+    * `tf.nn.max_pool2d` now supports explicit padding.
 *   Other:
     * We have replaced uses of "whitelist" and "blacklist" with "allowlist"
   and "denylist" where possible. Please see 
   https://developers.google.com/style/word-list#blacklist for more context.
-    * <ADD RELEASE NOTES HERE>
+  <ADD RELEASE NOTES HERE>
 
 ## Thanks to our Contributors
 
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
index 47cfaecd3fb..fbab452d596 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td
@@ -149,6 +149,7 @@ def LegalizeMaxPool2D : Pat<
               IsIntList1XY1:$ksize,
               IsIntList1XY1:$strides,
               $padding,
+              $explicit_paddings,
               IsDataFormatNHWC:$format),
           (TFL_MaxPool2DOp $value,
               /*padding=*/$padding,
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
index 9b201ba878e..420f795461b 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -6311,7 +6311,8 @@ def TF_MaxPoolOp : TF_Op<"MaxPool", [NoSideEffect, TF_FoldOperandsTransposeInter
 
     Confined<I64ArrayAttr, [ArrayMinCount<4>]>:$ksize,
     Confined<I64ArrayAttr, [ArrayMinCount<4>]>:$strides,
-    TF_AnyStrAttrOf<["SAME", "VALID"]>:$padding,
+    TF_AnyStrAttrOf<["SAME", "VALID", "EXPLICIT"]>:$padding,
+    DefaultValuedAttr<I64ArrayAttr, "{}">:$explicit_paddings,
     DefaultValuedAttr<TF_AnyStrAttrOf<["NHWC", "NCHW", "NCHW_VECT_C"]>, "NHWC">:$data_format
   );
 
@@ -6380,7 +6381,8 @@ def TF_MaxPoolGradOp : TF_Op<"MaxPoolGrad", [NoSideEffect]> {
 
     Confined<I64ArrayAttr, [ArrayMinCount<4>]>:$ksize,
     Confined<I64ArrayAttr, [ArrayMinCount<4>]>:$strides,
-    TF_AnyStrAttrOf<["SAME", "VALID"]>:$padding,
+    TF_AnyStrAttrOf<["SAME", "VALID", "EXPLICIT"]>:$padding,
+    DefaultValuedAttr<I64ArrayAttr, "{}">:$explicit_paddings,
     DefaultValuedAttr<TF_ConvnetDataFormatAttr, "NHWC">:$data_format
   );
 
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
index a05cb0b2d8f..4e68b1d223e 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
@@ -1269,6 +1269,15 @@ func @maxpool_3d_same_padding(%arg0: tensor<2x8x13x25x7xf32>) -> tensor<2x8x4x7x
   return %0 : tensor<2x8x4x7x7xf32>
 }
 
+// CHECK-LABEL: maxpool_explicit_padding
+func @maxpool_explicit_padding(%arg0: tensor<2x12x20x7xi32>) -> tensor<2x3x5x7xi32> {
+  // CHECK: tf.MaxPool
+  // TODO(b/165938852): need to support explicit padding in max_pool.
+
+  %0 = "tf.MaxPool"(%arg0) {data_format = "NHWC", ksize = [1, 2, 2, 1], padding = "EXPLICIT", strides = [1, 4, 4, 1]} : (tensor<2x12x20x7xi32>) -> tensor<2x3x5x7xi32>
+  return %0 : tensor<2x3x5x7xi32>
+}
+
 //===----------------------------------------------------------------------===//
 // MaxPoolGrad op legalizations.
 //===----------------------------------------------------------------------===//
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index 0332dbee589..5f64971e7d7 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -2474,6 +2474,12 @@ class ConvertMaxPoolOp : public OpRewritePattern<OpTy> {
     Type element_type =
         op.input().getType().template cast<TensorType>().getElementType();
     if (!element_type.isSignlessIntOrFloat()) return failure();
+    tensorflow::Padding padding;
+    if (!GetPaddingFromString(op.padding().str(), &padding).ok())
+      return failure();
+    if (padding == tensorflow::Padding::EXPLICIT) {
+      return failure();
+    }
     Location loc = op.getLoc();
     ConstOp init = GetScalarLimitConstOfType(element_type, loc,
                                              hlo::kInfinityLowest, &rewriter);
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 8157f4ee01d..c54418ba648 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -1477,7 +1477,8 @@ Status MatrixSetDiagV2Shape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
-Status MaxPoolShape(shape_inference::InferenceContext* c) {
+Status MaxPoolShapeImpl(shape_inference::InferenceContext* c,
+                        bool supports_explicit_padding) {
   string data_format_str;
   TensorFormat data_format;
   Status s = c->GetAttr("data_format", &data_format_str);
@@ -1530,14 +1531,39 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
 
+  std::vector<int64> explicit_paddings;
+  if (supports_explicit_padding) {
+    Status status = c->GetAttr("explicit_paddings", &explicit_paddings);
+    // Use the default value, which is an empty list, if the attribute is not
+    // found. Otherwise return the error to the caller.
+    if (!status.ok() && !errors::IsNotFound(status)) {
+      return status;
+    }
+    TF_RETURN_IF_ERROR(CheckValidPadding(padding, explicit_paddings,
+                                         /*num_dims=*/4, data_format));
+  } else {
+    DCHECK(padding != Padding::EXPLICIT);
+  }
+
   ShapeHandle output_shape;
   DimensionHandle output_rows, output_cols, output_depth;
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(
-      c, in_rows_dim, kernel_rows, stride_rows, padding, &output_rows));
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(
-      c, in_cols_dim, kernel_cols, stride_cols, padding, &output_cols));
-  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(
-      c, in_depth_dim, kernel_depth, stride_depth, padding, &output_depth));
+  int64 pad_rows_before = -1, pad_rows_after = -1;
+  int64 pad_cols_before = -1, pad_cols_after = -1;
+  if (padding == Padding::EXPLICIT) {
+    GetExplicitPaddingForDim(explicit_paddings, data_format, 'H',
+                             &pad_rows_before, &pad_rows_after);
+    GetExplicitPaddingForDim(explicit_paddings, data_format, 'W',
+                             &pad_cols_before, &pad_cols_after);
+  }
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, in_rows_dim, kernel_rows, /*dilation_rate=*/1, stride_rows, padding,
+      pad_rows_before, pad_rows_after, &output_rows));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, in_cols_dim, kernel_cols, /*dilation_rate=*/1, stride_cols, padding,
+      pad_cols_before, pad_cols_after, &output_cols));
+  TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDimsV2(
+      c, in_depth_dim, kernel_depth, /*dilation_rate=*/1, stride_depth, padding,
+      /*pad_before*/ 0, /*pad_after*/ 0, &output_depth));
 
   TF_RETURN_IF_ERROR(MakeShapeFromFormat(data_format, batch_size_dim,
                                          {output_rows, output_cols},
@@ -1547,6 +1573,14 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
+Status MaxPoolShape(shape_inference::InferenceContext* c) {
+  return MaxPoolShapeImpl(c, /*supports_explicit_padding=*/false);
+}
+
+Status MaxPoolShapeWithExplicitPadding(shape_inference::InferenceContext* c) {
+  return MaxPoolShapeImpl(c, /*supports_explicit_padding=*/true);
+}
+
 Status MaxPoolV2Shape(shape_inference::InferenceContext* c, int num_inputs) {
   string data_format_str;
   TensorFormat data_format;
diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
index f3e02638f54..3b14666305e 100644
--- a/tensorflow/core/framework/common_shape_fns.h
+++ b/tensorflow/core/framework/common_shape_fns.h
@@ -168,7 +168,11 @@ Status MatrixDiagV2Shape(shape_inference::InferenceContext* c);
 // Shape function for MatrixSetDiagV2 and MatrixSetDiagV3 operations.
 Status MatrixSetDiagV2Shape(shape_inference::InferenceContext* c);
 
-// Shape function for MaxPool-like operations.
+// Shape function for MaxPool-like operations that support explicit padding.
+Status MaxPoolShapeWithExplicitPadding(shape_inference::InferenceContext* c);
+
+// Shape function for MaxPool-like operations that do not support explicit
+// padding.
 Status MaxPoolShape(shape_inference::InferenceContext* c);
 
 // Shape function for MaxPoolV2-like operations.
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index 20df833a934..58004d1789f 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -81,8 +81,13 @@ class AvgPoolingOp : public UnaryOp<T> {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
-    PoolParameters params{context,  ksize_,       stride_,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          data_format_,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -146,8 +151,13 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
-    PoolParameters params{context,  ksize_,       stride_,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          data_format_,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -169,14 +179,14 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
 
 #if CUDNN_VERSION >= 7300
     DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, ksize_,
-                             stride_, padding_, data_format_, tensor_in,
-                             output_shape,
+                             stride_, padding_, /*explicit_paddings=*/{},
+                             data_format_, tensor_in, output_shape,
                              /*propagate_nans=*/false);
 #else
     if (data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kAverage, ksize_,
-                               stride_, padding_, data_format_, tensor_in,
-                               output_shape,
+                               stride_, padding_, /*explicit_paddings=*/{},
+                               data_format_, tensor_in, output_shape,
                                /*propagate_nans=*/false);
     } else {
       Tensor* output = nullptr;
@@ -446,10 +456,10 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
       return;
     }
 
-    DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage,
-                                 ksize_, stride_, padding_, data_format_,
-                                 nullptr, nullptr, out_backprop, output_shape,
-                                 /*propagate_nans=*/false);
+    DnnPoolingGradOp<T>::Compute(
+        context, se::dnn::PoolingMode::kAverage, ksize_, stride_, padding_,
+        /*explicit_paddings=*/{}, data_format_, nullptr, nullptr, out_backprop,
+        output_shape, /*propagate_nans=*/false);
   }
 
  private:
@@ -533,7 +543,8 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
 
 #if CUDNN_VERSION >= 7300
     DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage,
-                                 ksize_, stride_, padding_, data_format_,
+                                 ksize_, stride_, padding_,
+                                 /*explicit_paddings=*/{}, data_format_,
                                  nullptr, nullptr, out_backprop, output_shape,
                                  /*propagate_nans=*/false);
 #else
@@ -589,7 +600,8 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
                                 context->eigen_gpu_device());   // d
     } else {
       DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage,
-                                   ksize_, stride_, padding_, data_format_,
+                                   ksize_, stride_, padding_,
+                                   /*explicit_paddings=*/{}, data_format_,
                                    nullptr, nullptr, out_backprop, output_shape,
                                    /*propagate_nans=*/false);
     }
diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h
index 88e9742b136..d65cca6dd36 100644
--- a/tensorflow/core/kernels/conv_2d.h
+++ b/tensorflow/core/kernels/conv_2d.h
@@ -316,7 +316,7 @@ struct PadInput {
                   const std::array<int, NDIMS - 2>& padding_left,
                   const std::array<int, NDIMS - 2>& padding_right,
                   typename TTypes<T, NDIMS, IndexType>::Tensor out,
-                  TensorFormat format) {
+                  TensorFormat format, T padding_value = T{}) {
     Eigen::array<Eigen::IndexPair<IndexType>, NDIMS> padding;
     padding[GetTensorDimIndex<NDIMS - 2>(format, 'N')] = {0, 0};
     for (int i = 0; i < NDIMS - 2; ++i) {
@@ -324,7 +324,7 @@ struct PadInput {
           padding_left[i], padding_right[i]};
     }
     padding[GetTensorDimIndex<NDIMS - 2>(format, 'C')] = {0, 0};
-    out.device(d) = in.pad(padding);
+    out.device(d) = in.pad(padding, padding_value);
   }
 };
 
diff --git a/tensorflow/core/kernels/conv_2d_gpu.h b/tensorflow/core/kernels/conv_2d_gpu.h
index 85ca2b5722a..31aa04247fa 100644
--- a/tensorflow/core/kernels/conv_2d_gpu.h
+++ b/tensorflow/core/kernels/conv_2d_gpu.h
@@ -417,14 +417,12 @@ __global__ void SwapDimension1And2InTensor3UsingTiles(
 }
 
 // A Gpu custom kernel that convert input to output, given proper padding on
-// the left and the top. The padded value is zero.
+// the left and the top.
 template <typename T, int NDIMS>
-__global__ void PadInputCustomKernelNHWC(int nthreads,
-                                         const T* __restrict__ input,
-                                         Dimension<NDIMS> input_dims,
-                                         T* __restrict__ output,
-                                         Dimension<NDIMS> output_dims,
-                                         Dimension<NDIMS - 2> padding_left) {
+__global__ void PadInputCustomKernelNHWC(
+    int nthreads, const T* __restrict__ input, Dimension<NDIMS> input_dims,
+    T* __restrict__ output, Dimension<NDIMS> output_dims,
+    Dimension<NDIMS - 2> padding_left, T padding_value) {
   GPU_1D_KERNEL_LOOP(index, nthreads) {
     int output_index = index;
     Index<NDIMS> output_tensor_index =
@@ -444,18 +442,16 @@ __global__ void PadInputCustomKernelNHWC(int nthreads,
       const int input_index = TensorIndexToFlat(input_tensor_index, input_dims);
       output[output_index] = input[input_index];
     } else {
-      output[output_index] = T(0);
+      output[output_index] = padding_value;
     }
   }
 }
 
 template <typename T, int NDIMS>
-__global__ void PadInputCustomKernelNCHW(int nthreads,
-                                         const T* __restrict__ input,
-                                         Dimension<NDIMS> input_dims,
-                                         T* __restrict__ output,
-                                         Dimension<NDIMS> output_dims,
-                                         Dimension<NDIMS - 2> padding_left) {
+__global__ void PadInputCustomKernelNCHW(
+    int nthreads, const T* __restrict__ input, Dimension<NDIMS> input_dims,
+    T* __restrict__ output, Dimension<NDIMS> output_dims,
+    Dimension<NDIMS - 2> padding_left, T padding_value) {
   GPU_1D_KERNEL_LOOP(index, nthreads) {
     int output_index = index;
     Index<NDIMS> output_tensor_index =
@@ -475,7 +471,7 @@ __global__ void PadInputCustomKernelNCHW(int nthreads,
       const int input_index = TensorIndexToFlat(input_tensor_index, input_dims);
       output[output_index] = input[input_index];
     } else {
-      output[output_index] = T(0);
+      output[output_index] = padding_value;
     }
   }
 }
@@ -572,7 +568,7 @@ struct PadInput<GPUDevice, T, int, NDIMS> {
                   const std::array<int, NDIMS - 2>& padding_left,
                   const std::array<int, NDIMS - 2>& padding_right,
                   typename TTypes<T, NDIMS, int>::Tensor out,
-                  TensorFormat format) {
+                  TensorFormat format, T padding_value = T{}) {
     GpuLaunchConfig config = GetGpuLaunchConfig(out.size(), d);
     Dimension<NDIMS> input_dims;
     for (int i = 0; i < NDIMS; ++i) {
@@ -589,12 +585,14 @@ struct PadInput<GPUDevice, T, int, NDIMS> {
       TF_CHECK_OK(GpuLaunchKernel(
           PadInputCustomKernelNHWC<T, NDIMS>, config.block_count,
           config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
-          in.data(), input_dims, out.data(), output_dims, padding_left_dim));
+          in.data(), input_dims, out.data(), output_dims, padding_left_dim,
+          padding_value));
     } else if (format == FORMAT_NCHW) {
       TF_CHECK_OK(GpuLaunchKernel(
           PadInputCustomKernelNCHW<T, NDIMS>, config.block_count,
           config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
-          in.data(), input_dims, out.data(), output_dims, padding_left_dim));
+          in.data(), input_dims, out.data(), output_dims, padding_left_dim,
+          padding_value));
     } else {
       LOG(FATAL) << "Invalid data format: " << format;
     }
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index a923df5c477..840d2c13c77 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -1135,19 +1135,20 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
 
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
-#define DECLARE_GPU_SPEC(T)                                              \
-  template <>                                                            \
-  void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
-      const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
-      typename TTypes<T, 4, int>::ConstTensor in,                        \
-      typename TTypes<T, 4, int>::Tensor out);                           \
-  extern template struct TransformFilter<GPUDevice, T, int, 4>;          \
-  template <>                                                            \
-  void PadInput<GPUDevice, T, int, 4>::operator()(                       \
-      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,    \
-      const std::array<int, 2>& padding_left,                            \
-      const std::array<int, 2>& padding_right,                           \
-      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format); \
+#define DECLARE_GPU_SPEC(T)                                             \
+  template <>                                                           \
+  void TransformFilter<GPUDevice, T, int, 4>::operator()(               \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,         \
+      typename TTypes<T, 4, int>::ConstTensor in,                       \
+      typename TTypes<T, 4, int>::Tensor out);                          \
+  extern template struct TransformFilter<GPUDevice, T, int, 4>;         \
+  template <>                                                           \
+  void PadInput<GPUDevice, T, int, 4>::operator()(                      \
+      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,   \
+      const std::array<int, 2>& padding_left,                           \
+      const std::array<int, 2>& padding_right,                          \
+      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format, \
+      T padding_value);                                                 \
   extern template struct PadInput<GPUDevice, T, int, 4>;
 
 DECLARE_GPU_SPEC(float);
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 158f93fdec1..c1dadcf767c 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -1333,19 +1333,20 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
 
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
-#define DECLARE_GPU_SPEC(T)                                              \
-  template <>                                                            \
-  void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
-      const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
-      typename TTypes<T, 4, int>::ConstTensor in,                        \
-      typename TTypes<T, 4, int>::Tensor out);                           \
-  extern template struct TransformFilter<GPUDevice, T, int, 4>;          \
-  template <>                                                            \
-  void PadInput<GPUDevice, T, int, 4>::operator()(                       \
-      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,    \
-      const std::array<int, 2>& padding_left,                            \
-      const std::array<int, 2>& padding_right,                           \
-      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format); \
+#define DECLARE_GPU_SPEC(T)                                             \
+  template <>                                                           \
+  void TransformFilter<GPUDevice, T, int, 4>::operator()(               \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,         \
+      typename TTypes<T, 4, int>::ConstTensor in,                       \
+      typename TTypes<T, 4, int>::Tensor out);                          \
+  extern template struct TransformFilter<GPUDevice, T, int, 4>;         \
+  template <>                                                           \
+  void PadInput<GPUDevice, T, int, 4>::operator()(                      \
+      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,   \
+      const std::array<int, 2>& padding_left,                           \
+      const std::array<int, 2>& padding_right,                          \
+      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format, \
+      T padding_value);                                                 \
   extern template struct PadInput<GPUDevice, T, int, 4>;
 
 DECLARE_GPU_SPEC(float);
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 8f811138823..7c61dc92604 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1082,7 +1082,8 @@ namespace functor {
       const GPUDevice& d, typename TTypes<T, 5, int>::ConstTensor in, \
       const std::array<int, 3>& padding_left,                         \
       const std::array<int, 3>& padding_right,                        \
-      typename TTypes<T, 5, int>::Tensor out, TensorFormat format);
+      typename TTypes<T, 5, int>::Tensor out, TensorFormat format,    \
+      T padding_value);
 
 DECLARE_GPU_SPEC(Eigen::half);
 DECLARE_GPU_SPEC(float);
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index ca2abce0b15..b8c2671e7d2 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -1183,7 +1183,8 @@ namespace functor {
       const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,       \
       const std::array<int, 2>& padding_left,                               \
       const std::array<int, 2>& padding_right,                              \
-      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format);    \
+      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format,     \
+      T padding_value);                                                     \
   extern template struct PadInput<GPUDevice, T, int, 4>
 
 DECLARE_GPU_SPEC(float);
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index 289a083acfb..660f4b6febc 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -539,7 +539,8 @@ namespace functor {
       const GPUDevice& d, typename TTypes<T, 5, int>::ConstTensor in, \
       const std::array<int, 3>& padding_left,                         \
       const std::array<int, 3>& padding_right,                        \
-      typename TTypes<T, 5, int>::Tensor out, TensorFormat format);   \
+      typename TTypes<T, 5, int>::Tensor out, TensorFormat format,    \
+      T padding_value);                                               \
   template <>                                                         \
   void NHWCToNCHW<GPUDevice, T, 5>::operator()(                       \
       const GPUDevice& d, typename TTypes<T, 5>::ConstTensor in,      \
diff --git a/tensorflow/core/kernels/conv_ops_fused_impl.h b/tensorflow/core/kernels/conv_ops_fused_impl.h
index 43aa6c6e7fb..ce60afecc55 100644
--- a/tensorflow/core/kernels/conv_ops_fused_impl.h
+++ b/tensorflow/core/kernels/conv_ops_fused_impl.h
@@ -804,19 +804,20 @@ class FusedConv2DOp : public OpKernel {
 
 #if GOOGLE_CUDA
 
-#define DECLARE_FUNCTOR_GPU_SPEC(T)                                      \
-  template <>                                                            \
-  void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
-      const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
-      typename TTypes<T, 4, int>::ConstTensor in,                        \
-      typename TTypes<T, 4, int>::Tensor out);                           \
-  extern template struct TransformFilter<GPUDevice, T, int, 4>;          \
-  template <>                                                            \
-  void PadInput<GPUDevice, T, int, 4>::operator()(                       \
-      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,    \
-      const std::array<int, 2>& padding_left,                            \
-      const std::array<int, 2>& padding_right,                           \
-      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format); \
+#define DECLARE_FUNCTOR_GPU_SPEC(T)                                     \
+  template <>                                                           \
+  void TransformFilter<GPUDevice, T, int, 4>::operator()(               \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,         \
+      typename TTypes<T, 4, int>::ConstTensor in,                       \
+      typename TTypes<T, 4, int>::Tensor out);                          \
+  extern template struct TransformFilter<GPUDevice, T, int, 4>;         \
+  template <>                                                           \
+  void PadInput<GPUDevice, T, int, 4>::operator()(                      \
+      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,   \
+      const std::array<int, 2>& padding_left,                           \
+      const std::array<int, 2>& padding_right,                          \
+      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format, \
+      T padding_value);                                                 \
   extern template struct PadInput<GPUDevice, T, int, 4>
 
 // Registration of the GPU implementations.
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 36ab1d71671..53168105aba 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -105,8 +105,8 @@ static void SpatialMaxPoolWithArgMaxHelper(
     const int32 depth = params.depth;
     const int32 in_rows = params.tensor_in_rows;
     const int32 in_cols = params.tensor_in_cols;
-    const int32 pad_rows = params.pad_rows;
-    const int32 pad_cols = params.pad_cols;
+    const int32 pad_top = params.pad_top;
+    const int32 pad_left = params.pad_left;
     const int32 window_rows = params.window_rows;
     const int32 window_cols = params.window_cols;
     const int32 row_stride = params.row_stride;
@@ -131,8 +131,8 @@ static void SpatialMaxPoolWithArgMaxHelper(
         for (int w = 0; w < in_cols; ++w) {
           // (h_start, h_end) * (w_start, w_end) is the range that the input
           // vector projects to.
-          const int hpad = h + pad_rows;
-          const int wpad = w + pad_cols;
+          const int hpad = h + pad_top;
+          const int wpad = w + pad_left;
           const int h_start =
               (hpad < window_rows) ? 0 : (hpad - window_rows) / row_stride + 1;
           const int h_end = std::min(hpad / row_stride + 1, out_height);
@@ -243,6 +243,13 @@ class MaxPoolingGradOp : public OpKernel {
     }
 
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+
+    if (padding_ == Padding::EXPLICIT) {
+      OP_REQUIRES_OK(
+          context, context->GetAttr("explicit_paddings", &explicit_paddings_));
+      OP_REQUIRES_OK(context, CheckValidPadding(padding_, explicit_paddings_,
+                                                /*num_dims=*/4, data_format_));
+    }
   }
 
   void Compute(OpKernelContext* context) override {
@@ -297,8 +304,13 @@ class MaxPoolingGradOp : public OpKernel {
         errors::Unimplemented(
             "MaxPoolingGrad is not yet supported on the depth dimension."));
 
-    PoolParameters params{context,  ksize,       stride,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize,
+                          stride,
+                          padding_,
+                          explicit_paddings_,
+                          FORMAT_NHWC,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -316,6 +328,7 @@ class MaxPoolingGradOp : public OpKernel {
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  std::vector<int64> explicit_paddings_;
   TensorFormat data_format_;
 };
 
@@ -347,7 +360,12 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
                       "Pooling is not yet supported on the batch dimension."));
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
-
+    if (padding_ == Padding::EXPLICIT) {
+      OP_REQUIRES_OK(
+          context, context->GetAttr("explicit_paddings", &explicit_paddings_));
+      OP_REQUIRES_OK(context, CheckValidPadding(padding_, explicit_paddings_,
+                                                /*num_dims=*/4, data_format_));
+    }
     TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
                                    &propagate_nans_));
   }
@@ -392,16 +410,26 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
+    int64 pad_top, pad_bottom, pad_left, pad_right;
+    if (padding_ == Padding::EXPLICIT) {
+      GetExplicitPaddingForDim(explicit_paddings_, data_format_, 'H',
+                               /*pad_top=*/&pad_top,
+                               /*pad_bottom=*/&pad_bottom);
+      GetExplicitPaddingForDim(explicit_paddings_, data_format_, 'W',
+                               /*pad_left=*/&pad_left,
+                               /*pad_right=*/&pad_right);
+    }
     DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kMaximum, ksize,
-                                 stride, padding_, data_format_, &tensor_in,
-                                 &tensor_out, out_backprop, output_shape,
-                                 propagate_nans_);
+                                 stride, padding_, explicit_paddings_,
+                                 data_format_, &tensor_in, &tensor_out,
+                                 out_backprop, output_shape, propagate_nans_);
   }
 
  private:
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  std::vector<int64> explicit_paddings_;
   TensorFormat data_format_;
   bool propagate_nans_;
 };
@@ -492,8 +520,13 @@ class MaxPoolingGradGradOp : public OpKernel {
         errors::Unimplemented(
             "MaxPoolingGrad is not yet supported on the depth dimension."));
 
-    PoolParameters params{context,  ksize,       stride,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize,
+                          stride,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          FORMAT_NHWC,
+                          tensor_in.shape()};
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {2}, 0, tensor_out.shape(), &output));
@@ -551,8 +584,8 @@ class MaxPoolingGradGradOp : public OpKernel {
       const int32 depth = params.depth;
       const int32 in_rows = params.tensor_in_rows;
       const int32 in_cols = params.tensor_in_cols;
-      const int32 pad_rows = params.pad_rows;
-      const int32 pad_cols = params.pad_cols;
+      const int32 pad_top = params.pad_top;
+      const int32 pad_left = params.pad_left;
       const int32 window_rows = params.window_rows;
       const int32 window_cols = params.window_cols;
       const int32 row_stride = params.row_stride;
@@ -574,9 +607,9 @@ class MaxPoolingGradGradOp : public OpKernel {
           for (int pw = 0; pw < out_width; ++pw) {
             // (h_start, h_end) * (w_start, w_end) is the range that the input
             // vector projects to.
-            int h_start = ph * row_stride - pad_rows;
+            int h_start = ph * row_stride - pad_top;
             const int h_end = std::min(h_start + window_rows, in_rows);
-            int w_start = pw * col_stride - pad_cols;
+            int w_start = pw * col_stride - pad_left;
             const int w_end = std::min(w_start + window_cols, in_cols);
             h_start = std::max(h_start, 0);
             w_start = std::max(w_start, 0);
@@ -691,15 +724,20 @@ class MaxPoolingGradGradOp<Eigen::GpuDevice, T> : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
 
-    PoolParameters params{context,  ksize,        stride,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize,
+                          stride,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          data_format_,
+                          tensor_in.shape()};
 
     functor::MaxPoolGradBackwardNoMask<T>()(
         data_format_, tensor_in.flat<T>().data(), tensor_out.flat<T>().data(),
         params.tensor_in_batch, params.out_height, params.out_width,
         params.depth, params.tensor_in_rows, params.tensor_in_cols,
         params.window_rows, params.window_cols, params.row_stride,
-        params.col_stride, params.pad_rows, params.pad_cols,
+        params.col_stride, params.pad_top, params.pad_left,
         out_grad_backprop.flat<T>().data(), output->flat<T>().data(),
         context->eigen_device<Eigen::GpuDevice>());
   }
@@ -743,13 +781,22 @@ class MaxPoolingNoMaskOp : public OpKernel {
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
+    OP_REQUIRES(
+        context, padding_ != EXPLICIT,
+        errors::Unimplemented(
+            "Explicit padding is not supported for MaxPoolingNoMaskOp."));
   }
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
 
-    PoolParameters params{context,  ksize_,       stride_,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          data_format_,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -826,8 +873,13 @@ class MaxPoolingNoMaskV2Op : public OpKernel {
     OP_REQUIRES(context, ksize[0] == 1 && stride[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
-    PoolParameters params{context,  ksize,        stride,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize,
+                          stride,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          data_format_,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -889,8 +941,13 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
 
-    PoolParameters params{context,  ksize_,      stride_,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          FORMAT_NHWC,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -1003,8 +1060,13 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
     const Tensor& grad_in = context->input(1);
     const Tensor& argmax = context->input(2);
 
-    PoolParameters params{context,  ksize_,      stride_,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          FORMAT_NHWC,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -1056,8 +1118,13 @@ class MaxPoolingGradGradWithArgmaxOp : public OpKernel {
     const Tensor& grad_in = context->input(1);
     const Tensor& argmax = context->input(2);
 
-    PoolParameters params{context,  ksize_,      stride_,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          FORMAT_NHWC,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -1100,6 +1167,8 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                 errors::InvalidArgument("Sliding window stride field must "
                                         "specify 4 dimensions"));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("explicit_paddings", &explicit_paddings_));
     const int32 ksize_n = GetTensorDim(ksize_, data_format_, 'N');
     const int32 stride_n = GetTensorDim(stride_, data_format_, 'N');
     OP_REQUIRES(context, ksize_n == 1 && stride_n == 1,
@@ -1113,8 +1182,9 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
 
-    PoolParameters params{context,  ksize_,       stride_,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{
+        context,      ksize_,           stride_, padding_, explicit_paddings_,
+        data_format_, tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -1131,15 +1201,20 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
 
 #if CUDNN_VERSION >= 7300
     DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kMaximum, ksize_,
-                             stride_, padding_, data_format_, tensor_in,
-                             out_shape, propagate_nans_);
+                             stride_, padding_, explicit_paddings_,
+                             data_format_, tensor_in, out_shape,
+                             propagate_nans_);
 #else
     // These is_int8x4 checks avoid linker errors for missing qint8 kernels.
     if (!is_int8x4 && data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kMaximum, ksize_,
-                               stride_, padding_, data_format_, tensor_in,
-                               out_shape, propagate_nans_);
+                               stride_, padding_, explicit_paddings_,
+                               data_format_, tensor_in, out_shape,
+                               propagate_nans_);
     } else {
+      OP_REQUIRES(context, padding_ != EXPLICIT,
+                  errors::Unimplemented("Explicit padding is not supported ",
+                                        "when CUDNN is not enabled."));
       Tensor* output = nullptr;
       OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
       if (is_int8x4) {
@@ -1165,6 +1240,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  std::vector<int64> explicit_paddings_;
   TensorFormat data_format_;
   bool propagate_nans_;
 };
@@ -1228,8 +1304,13 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
 
-    PoolParameters params{context,  ksize,        stride,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize,
+                          stride,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          data_format_,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -1239,8 +1320,9 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
                         params.out_width, params.depth);
     if (data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(context, se::dnn::PoolingMode::kMaximum, ksize,
-                               stride, padding_, data_format_, tensor_in,
-                               out_shape, propagate_nans_);
+                               stride, padding_, explicit_paddings_,
+                               data_format_, tensor_in, out_shape,
+                               propagate_nans_);
     } else {
       CHECK(data_format_ == FORMAT_NHWC)
           << "MaxPool only supports NCHW or NHWC format";
@@ -1255,6 +1337,7 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  std::vector<int64> explicit_paddings_;
   TensorFormat data_format_;
   bool propagate_nans_;
 };
@@ -1267,7 +1350,7 @@ struct LaunchMaxPoolingNoMask<Eigen::GpuDevice, T> {
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
         params.out_width, params.window_rows, params.window_cols,
-        params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
+        params.row_stride, params.col_stride, params.pad_top, params.pad_left,
         output->flat<T>().data(), nullptr, context->eigen_gpu_device(),
         propagate_nans, false);
     if (!status) {
@@ -1286,7 +1369,7 @@ struct LaunchMaxPoolingWithArgmax<Eigen::GpuDevice, T> {
         input.flat<T>().data(), params.tensor_in_batch, params.tensor_in_rows,
         params.tensor_in_cols, params.depth, params.out_height,
         params.out_width, params.window_rows, params.window_cols,
-        params.row_stride, params.col_stride, params.pad_rows, params.pad_cols,
+        params.row_stride, params.col_stride, params.pad_top, params.pad_left,
         output->flat<T>().data(),
         reinterpret_cast<int64*>(argmax->flat<int64>().data()),
         context->eigen_gpu_device(), propagate_nans, include_batch_in_index);
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index 4bd710546fe..ca2f0ecfe21 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/common_runtime/device.h"
+#include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -49,12 +50,75 @@ struct RawType<qint8> {
   using type = int8;
 };
 
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+template <typename T>
+struct PadInputWithNegativeInf {
+  Status operator()(const GPUDevice& d,
+                    typename TTypes<T, 4, int>::ConstTensor in,
+                    int input_pad_top, int input_pad_bottom, int input_pad_left,
+                    int input_pad_right, typename TTypes<T, 4, int>::Tensor out,
+                    TensorFormat format) {
+    T padding_value = -std::numeric_limits<T>::infinity();
+    functor::PadInput<GPUDevice, T, int, 4>()(
+        d, in, {{input_pad_top, input_pad_left}},
+        {{input_pad_bottom, input_pad_right}}, out, format, padding_value);
+    return Status::OK();
+  }
+};
+
+template <>
+struct PadInputWithNegativeInf<qint8> {
+  Status operator()(const GPUDevice& d,
+                    typename TTypes<qint8, 4, int>::ConstTensor in,
+                    int input_pad_top, int input_pad_bottom, int input_pad_left,
+                    int input_pad_right,
+                    typename TTypes<qint8, 4, int>::Tensor out,
+                    TensorFormat format) {
+    return errors::InvalidArgument(
+        "Explicit padding not yet supported with qint8");
+  }
+};
+
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
 }  // namespace
 
+Status CheckPaddingSize(int64 window_rows, int64 window_cols, int64 pad_top,
+                        int64 pad_bottom, int64 pad_left, int64 pad_right) {
+  if (!FastBoundsCheck(pad_top, window_rows)) {
+    return errors::InvalidArgument("Top padding ", pad_top,
+                                   " needs to be smaller than the "
+                                   "window size ",
+                                   window_rows);
+  }
+  if (!FastBoundsCheck(pad_bottom, window_rows)) {
+    return errors::InvalidArgument("Bottom padding ", pad_bottom,
+                                   " needs to be smaller than the "
+                                   "window size ",
+                                   window_rows);
+  }
+  if (!FastBoundsCheck(pad_left, window_cols)) {
+    return errors::InvalidArgument("Left padding ", pad_left,
+                                   " needs to be smaller than the "
+                                   "window size ",
+                                   window_cols);
+  }
+  if (!FastBoundsCheck(pad_right, window_cols)) {
+    return errors::InvalidArgument("Right padding ", pad_right,
+                                   " needs to be smaller than the "
+                                   "window size ",
+                                   window_cols);
+  }
+  return Status::OK();
+}
+
 PoolParameters::PoolParameters(OpKernelContext* context,
                                const std::vector<int32>& ksize,
                                const std::vector<int32>& stride,
-                               Padding padding, TensorFormat data_format,
+                               Padding padding,
+                               std::vector<int64> explicit_paddings,
+                               TensorFormat data_format,
                                const TensorShape& tensor_in_shape) {
   // For maxpooling, tensor_in should have 2 spatial dimensions.
   // Note: the total number of dimensions could be 4 for NHWC, NCHW,
@@ -85,14 +149,24 @@ PoolParameters::PoolParameters(OpKernelContext* context,
               errors::Unimplemented(
                   "MaxPooling supports exactly one of pooling across depth "
                   "or pooling across width/height."));
+  if (padding == Padding::EXPLICIT) {
+    OP_REQUIRES_OK(context, CheckValidPadding(padding, explicit_paddings,
+                                              /*num_dims=*/4, data_format));
+    GetExplicitPaddingForDim(explicit_paddings, data_format, 'H', &pad_top,
+                             &pad_bottom);
+    GetExplicitPaddingForDim(explicit_paddings, data_format, 'W', &pad_left,
+                             &pad_right);
+    OP_REQUIRES_OK(context, CheckPaddingSize(window_rows, window_cols, pad_top,
+                                             pad_bottom, pad_left, pad_right));
+  }
 
   if (depth_window == 1) {
-    OP_REQUIRES_OK(
-        context, GetWindowedOutputSize(tensor_in_rows, window_rows, row_stride,
-                                       padding, &out_height, &pad_rows));
-    OP_REQUIRES_OK(
-        context, GetWindowedOutputSize(tensor_in_cols, window_cols, col_stride,
-                                       padding, &out_width, &pad_cols));
+    OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
+                                tensor_in_rows, window_rows, row_stride,
+                                padding, &out_height, &pad_top, &pad_bottom));
+    OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose(
+                                tensor_in_cols, window_cols, col_stride,
+                                padding, &out_width, &pad_left, &pad_right));
     pad_depth = 0;
     out_depth = depth;
   } else {
@@ -140,6 +214,7 @@ void DnnPoolingOp<T>::Compute(OpKernelContext* context,
                               se::dnn::PoolingMode pooling_mode,
                               const std::vector<int32>& size,
                               const std::vector<int32>& stride, Padding padding,
+                              std::vector<int64> explicit_paddings,
                               TensorFormat data_format, const Tensor& tensor_in,
                               const TensorShape& tensor_out_shape,
                               bool propagate_nans) {
@@ -150,14 +225,18 @@ void DnnPoolingOp<T>::Compute(OpKernelContext* context,
     return;
   }
 
-  PoolParameters params{context, size,        stride,
-                        padding, data_format, tensor_in.shape()};
+  PoolParameters params{
+      context,           size,        stride,           padding,
+      explicit_paddings, data_format, tensor_in.shape()};
   if (!context->status().ok()) {
     return;
   }
 
   int batch_size = params.tensor_in_batch;
   int depth = params.depth;
+  int tensor_in_cols = params.tensor_in_cols;
+  int tensor_in_rows = params.tensor_in_rows;
+
 #if CUDNN_VERSION < 7300
   /// Earlier versions do not support NHWC format, so we need to convert it
   /// to NCHW before calling cudnn. We need to get rid of this once it is done
@@ -186,7 +265,7 @@ void DnnPoolingOp<T>::Compute(OpKernelContext* context,
   }
   se::dnn::DataLayout data_layout = se::dnn::DataLayout::kBatchDepthYX;
 #else
-  auto& transformed_input = tensor_in;
+  Tensor transformed_input = tensor_in;
   auto& transformed_output = *tensor_out;
   se::dnn::DataLayout data_layout;
   switch (data_format) {
@@ -209,21 +288,81 @@ void DnnPoolingOp<T>::Compute(OpKernelContext* context,
                                           ToString(data_format)));
   }
 #endif
-  /// Get ready to call cudnn
+
+  int64 vertical_padding = params.pad_top;
+  int64 horizontal_padding = params.pad_left;
+
+  if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
+                              params.pad_left != params.pad_right)) {
+    // cuDNN only supports padding the same amount on the left and right sides,
+    // and on the top and bottom sides. So we manually create a new padded
+    // input tensor such that we can pass it to cuDNN.
+    const int64 common_padding_rows =
+        std::min(params.pad_top, params.pad_bottom);
+    const int64 common_padding_cols =
+        std::min(params.pad_left, params.pad_right);
+
+    Tensor padded_input;
+    const int64 padding_rows_diff =
+        std::abs(params.pad_top - params.pad_bottom);
+    const int64 padding_cols_diff =
+        std::abs(params.pad_left - params.pad_right);
+
+    const int64 new_in_rows = tensor_in_rows + padding_rows_diff;
+    const int64 new_in_cols = tensor_in_cols + padding_cols_diff;
+
+    OP_REQUIRES_OK(
+        context,
+        context->allocate_temp(DataTypeToEnum<T>::value,
+                               ShapeFromFormat(data_format, batch_size,
+                                               new_in_rows, new_in_cols, depth),
+                               &padded_input));
+    const int64 input_pad_top = params.pad_top - common_padding_rows;
+    const int64 input_pad_bottom = params.pad_bottom - common_padding_rows;
+    const int64 input_pad_left = params.pad_left - common_padding_cols;
+    const int64 input_pad_right = params.pad_right - common_padding_cols;
+
+    bool in_bounds =
+        FastBoundsCheck(input_pad_top, std::numeric_limits<int>::max()) &&
+        FastBoundsCheck(input_pad_bottom, std::numeric_limits<int>::max()) &&
+        FastBoundsCheck(input_pad_left, std::numeric_limits<int>::max()) &&
+        FastBoundsCheck(input_pad_right, std::numeric_limits<int>::max());
+    if (!in_bounds) {
+      context->SetStatus(errors::InvalidArgument("Padding is too large."));
+      return;
+    }
+
+    // We need to call the const version of transformed_input.tensor()
+    const Tensor& const_transformed_input = transformed_input;
+    OP_REQUIRES_OK(
+        context,
+        PadInputWithNegativeInf<T>()(
+            context->eigen_device<GPUDevice>(),
+            To32Bit(const_transformed_input.tensor<T, 4>()),
+            static_cast<int>(input_pad_top), static_cast<int>(input_pad_bottom),
+            static_cast<int>(input_pad_left), static_cast<int>(input_pad_right),
+            To32Bit(padded_input.tensor<T, 4>()), data_format));
+    transformed_input = padded_input;
+    vertical_padding = common_padding_rows;
+    horizontal_padding = common_padding_cols;
+    tensor_in_rows = new_in_rows;
+    tensor_in_cols = new_in_cols;
+  }
+
   se::dnn::PoolingDescriptor pooling_desc;
   pooling_desc.set_pooling_mode(pooling_mode)
       .set_window_height(params.window_rows)
       .set_window_width(params.window_cols)
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
-      .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols)
+      .set_vertical_padding(vertical_padding)
+      .set_horizontal_padding(horizontal_padding)
       .set_propagate_nans(propagate_nans);
 
   se::dnn::BatchDescriptor input_desc;
   input_desc.set_count(batch_size)
-      .set_height(params.tensor_in_rows)
-      .set_width(params.tensor_in_cols)
+      .set_height(tensor_in_rows)
+      .set_width(tensor_in_cols)
       .set_feature_map_count(depth)
       .set_layout(data_layout);
 
@@ -280,13 +419,32 @@ void DnnPoolingOp<T>::Compute(OpKernelContext* context,
 #endif
 }
 
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                             \
+  template <>                                                           \
+  void PadInput<GPUDevice, T, int, 4>::operator()(                      \
+      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,   \
+      const std::array<int, 2>& padding_left,                           \
+      const std::array<int, 2>& padding_right,                          \
+      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format, \
+      T padding_value);                                                 \
+  extern template struct PadInput<GPUDevice, T, int, 4>;
+
+DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
+DECLARE_GPU_SPEC(double);
+DECLARE_GPU_SPEC(int32);
+}  // namespace functor
+
 template <typename T>
 void DnnPoolingGradOp<T>::Compute(
     OpKernelContext* context, se::dnn::PoolingMode pooling_mode,
     const std::vector<int32>& size, const std::vector<int32>& stride,
-    Padding padding, TensorFormat data_format, const Tensor* tensor_in,
-    const Tensor* tensor_out, const Tensor& out_backprop,
-    const TensorShape& tensor_in_shape, bool propagate_nans) {
+    Padding padding, std::vector<int64> explicit_paddings,
+    TensorFormat data_format, const Tensor* tensor_in, const Tensor* tensor_out,
+    const Tensor& out_backprop, const TensorShape& tensor_in_shape,
+    bool propagate_nans) {
   CHECK((pooling_mode != se::dnn::PoolingMode::kMaximum) ||
         (tensor_in && tensor_out))
       << "For MaxPoolGrad, both tensor_in and tensor_out needs to be "
@@ -299,8 +457,8 @@ void DnnPoolingGradOp<T>::Compute(
     return;
   }
 
-  PoolParameters params{context, size,        stride,
-                        padding, data_format, tensor_in_shape};
+  PoolParameters params{context,           size,        stride,         padding,
+                        explicit_paddings, data_format, tensor_in_shape};
   if (!context->status().ok()) {
     return;
   }
@@ -406,6 +564,98 @@ void DnnPoolingGradOp<T>::Compute(
   }
 #endif  // CUDNN_VERSION < 7300
 
+  int64 vertical_padding = params.pad_top;
+  int64 horizontal_padding = params.pad_left;
+
+  int batch_size = params.tensor_in_batch;
+  int depth = params.depth;
+  int tensor_in_cols = params.tensor_in_cols;
+  int tensor_in_rows = params.tensor_in_rows;
+
+  int64 input_pad_top = 0;
+  int64 input_pad_bottom = 0;
+  int64 input_pad_left = 0;
+  int64 input_pad_right = 0;
+
+  if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
+                              params.pad_left != params.pad_right)) {
+    // Pad the input in the same way we did during the forward pass, so that
+    // cuDNN or MIOpen receives the same input during the backward pass function
+    // as it did during the forward pass function.
+    const int64 common_padding_rows =
+        std::min(params.pad_top, params.pad_bottom);
+    const int64 common_padding_cols =
+        std::min(params.pad_left, params.pad_right);
+
+    Tensor padded_input;
+    Tensor padded_input_backprop;
+    const int64 padding_rows_diff =
+        std::abs(params.pad_top - params.pad_bottom);
+    const int64 padding_cols_diff =
+        std::abs(params.pad_left - params.pad_right);
+
+    const int64 new_in_rows = tensor_in_rows + padding_rows_diff;
+    const int64 new_in_cols = tensor_in_cols + padding_cols_diff;
+
+    VLOG(2) << "Create new tensor: "
+            << " original rows=" << tensor_in_rows
+            << " original cols=" << tensor_in_cols
+            << " padding_rows=" << new_in_rows
+            << " padding_cols=" << new_in_cols << " depth= " << depth
+            << " batch_size=" << batch_size << " kernel_rows"
+            << params.window_rows << " kernel_col" << params.window_cols
+            << " stride_rows" << params.row_stride;
+
+    OP_REQUIRES_OK(
+        context,
+        context->allocate_temp(DataTypeToEnum<T>::value,
+                               ShapeFromFormat(data_format, batch_size,
+                                               new_in_rows, new_in_cols, depth),
+                               &padded_input));
+
+    OP_REQUIRES_OK(
+        context,
+        context->allocate_temp(DataTypeToEnum<T>::value,
+                               ShapeFromFormat(data_format, batch_size,
+                                               new_in_rows, new_in_cols, depth),
+                               &transformed_input_backprop));
+
+    input_pad_top = params.pad_top - common_padding_rows;
+    input_pad_bottom = params.pad_bottom - common_padding_rows;
+    input_pad_left = params.pad_left - common_padding_cols;
+    input_pad_right = params.pad_right - common_padding_cols;
+
+    bool in_bounds =
+        FastBoundsCheck(input_pad_top, std::numeric_limits<int>::max()) &&
+        FastBoundsCheck(input_pad_bottom, std::numeric_limits<int>::max()) &&
+        FastBoundsCheck(input_pad_left, std::numeric_limits<int>::max()) &&
+        FastBoundsCheck(input_pad_right, std::numeric_limits<int>::max());
+    if (!in_bounds) {
+      context->SetStatus(errors::InvalidArgument("Padding is too large."));
+      return;
+    }
+
+    // PadInputWithNegativeInf functor requires input to be a const.
+    const Tensor& const_transformed_input = transformed_input;
+    OP_REQUIRES_OK(
+        context,
+        PadInputWithNegativeInf<T>()(
+            context->eigen_device<GPUDevice>(),
+            To32Bit(const_transformed_input.tensor<T, 4>()),
+            static_cast<int>(input_pad_top), static_cast<int>(input_pad_bottom),
+            static_cast<int>(input_pad_left), static_cast<int>(input_pad_right),
+            To32Bit(padded_input.tensor<T, 4>()), data_format));
+
+    transformed_input = padded_input;
+
+    vertical_padding = common_padding_rows;
+    horizontal_padding = common_padding_cols;
+    VLOG(2) << "vertical padding set to: " << vertical_padding
+            << " horizontal padding set to: " << horizontal_padding;
+    tensor_in_rows = new_in_rows;
+    tensor_in_cols = new_in_cols;
+  }
+
   /// Get ready to call cudnn
   se::dnn::PoolingDescriptor pooling_desc;
   pooling_desc.set_pooling_mode(pooling_mode)
@@ -413,8 +663,8 @@ void DnnPoolingGradOp<T>::Compute(
       .set_window_width(params.window_cols)
       .set_vertical_stride(params.row_stride)
       .set_horizontal_stride(params.col_stride)
-      .set_vertical_padding(params.pad_rows)
-      .set_horizontal_padding(params.pad_cols)
+      .set_vertical_padding(vertical_padding)
+      .set_horizontal_padding(horizontal_padding)
       .set_propagate_nans(propagate_nans);
 
   se::dnn::BatchDescriptor orig_output_desc;
@@ -426,8 +676,8 @@ void DnnPoolingGradOp<T>::Compute(
 
   se::dnn::BatchDescriptor orig_input_desc;
   orig_input_desc.set_count(params.tensor_in_batch)
-      .set_height(params.tensor_in_rows)
-      .set_width(params.tensor_in_cols)
+      .set_height(tensor_in_rows)
+      .set_width(tensor_in_cols)
       .set_feature_map_count(params.depth)
       .set_layout(data_layout);
 
@@ -482,6 +732,18 @@ void DnnPoolingGradOp<T>::Compute(
         input_backprop->tensor<T, 4>());
   }
 #endif  // CUDNN_VERSION < 7300
+  if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
+                              params.pad_left != params.pad_right)) {
+    // Remove the padding that was added to the input shape above.
+    functor::PadInput<GPUDevice, T, int, 4>()(
+        context->eigen_device<GPUDevice>(),
+        To32Bit(const_cast<const Tensor&>(transformed_input_backprop)
+                    .tensor<T, 4>()),
+        {{static_cast<int>(-input_pad_top), static_cast<int>(-input_pad_left)}},
+        {{static_cast<int>(-input_pad_bottom),
+          static_cast<int>(-input_pad_right)}},
+        To32Bit(input_backprop->tensor<T, 4>()), data_format);
+  }
 }
 
 #define DEFINE_DNN_OPS(T)         \
diff --git a/tensorflow/core/kernels/pooling_ops_common.h b/tensorflow/core/kernels/pooling_ops_common.h
index 2aa1d0b6de0..dacbb872cf0 100644
--- a/tensorflow/core/kernels/pooling_ops_common.h
+++ b/tensorflow/core/kernels/pooling_ops_common.h
@@ -18,7 +18,12 @@ limitations under the License.
 
 #include <vector>
 
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -40,9 +45,12 @@ typedef Eigen::GpuDevice GPUDevice;
 // A helper class to manage sizes and shapes for pooling operations.
 struct PoolParameters {
   // Updates context->status if there is an invalid input.
+  // explicit_paddings has eight elements if padding==EXPLIICT, and zero
+  // elements otherwise.
   PoolParameters(OpKernelContext* context, const std::vector<int32>& ksize,
                  const std::vector<int32>& stride, Padding padding,
-                 TensorFormat data_format, const TensorShape& tensor_in_shape);
+                 std::vector<int64> explicit_paddings, TensorFormat data_format,
+                 const TensorShape& tensor_in_shape);
 
   // Returns the shape of the output for "forward" pooling operations.
   TensorShape forward_output_shape();
@@ -65,13 +73,21 @@ struct PoolParameters {
   int64 out_width;
   int out_depth;
 
-  int64 pad_rows;
-  int64 pad_cols;
+  int64 pad_top;
+  int64 pad_bottom;
+  int64 pad_left;
+  int64 pad_right;
+
   int pad_depth;
 
   TensorFormat data_format;
 };
 
+// Checks if the sizes of the paddings are less than the size of window.
+// This is required for MaxPool because it pads with -inf, so the pooling
+// window cannot fully cover the padded area.
+Status CheckPaddingSize(PoolParameters& params);
+
 // An implementation of MaxPooling (forward).
 // TODO (yongtang): Remove MaxPoolingOp and use MaxPoolingV2Op,
 //     QuantizedMaxPoolingOp depends on MaxPoolingOp so keep intact for now
@@ -106,6 +122,10 @@ class MaxPoolingOp : public OpKernel {
                 errors::InvalidArgument("Sliding window stride field must "
                                         "specify 4 dimensions"));
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+    if (padding_ == Padding::EXPLICIT) {
+      OP_REQUIRES_OK(
+          context, context->GetAttr("explicit_paddings", &explicit_paddings_));
+    }
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
@@ -113,8 +133,9 @@ class MaxPoolingOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
-    PoolParameters params{context,  ksize_,      stride_,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{
+        context,     ksize_,           stride_, padding_, explicit_paddings_,
+        FORMAT_NHWC, tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
@@ -134,9 +155,21 @@ class MaxPoolingOp : public OpKernel {
           context, params.depth_window == params.depth_stride,
           errors::Unimplemented("Depthwise max pooling requires "
                                 "the depth window to equal the depth stride."));
+      OP_REQUIRES(
+          context, padding_ != EXPLICIT,
+          errors::Unimplemented("Depthwise max pooling does not support "
+                                "explicit padding."));
 
       DepthwiseMaxPool(context, output, tensor_in, params);
     } else {
+      // MaxPoolingOp is only called on the GPU when the eigen_tensor label
+      // is used. In this case, explicit padding is not supported
+      if (std::is_same<Device, GPUDevice>::value &&
+          padding_ == Padding::EXPLICIT) {
+        context->SetStatus(errors::Unimplemented(
+            "MaxPoolingOp does not support explicit padding."));
+        return;
+      }
       SpatialMaxPool(context, output, tensor_in, params, padding_);
     }
   }
@@ -202,8 +235,8 @@ class MaxPoolingOp : public OpKernel {
       auto shard = [&params, &in_mat, &out_mat](int64 start, int64 limit) {
         const int32 in_rows = params.tensor_in_rows;
         const int32 in_cols = params.tensor_in_cols;
-        const int32 pad_rows = params.pad_rows;
-        const int32 pad_cols = params.pad_cols;
+        const int32 pad_top = params.pad_top;
+        const int32 pad_left = params.pad_left;
         const int32 window_rows = params.window_rows;
         const int32 window_cols = params.window_cols;
         const int32 row_stride = params.row_stride;
@@ -225,8 +258,8 @@ class MaxPoolingOp : public OpKernel {
             for (int32 w = 0; w < in_cols; ++w) {
               // (h_start, h_end) * (w_start, w_end) is the range that the input
               // vector projects to.
-              const int32 hpad = h + pad_rows;
-              const int32 wpad = w + pad_cols;
+              const int32 hpad = h + pad_top;
+              const int32 wpad = w + pad_left;
               const int32 h_start = (hpad < window_rows)
                                         ? 0
                                         : (hpad - window_rows) / row_stride + 1;
@@ -263,6 +296,7 @@ class MaxPoolingOp : public OpKernel {
   std::vector<int32> ksize_;
   std::vector<int32> stride_;
   Padding padding_;
+  std::vector<int64> explicit_paddings_;
   TensorFormat data_format_;
 };
 
@@ -280,7 +314,7 @@ struct LaunchMaxPoolingNoMask_NCHW_VECT_C<Eigen::GpuDevice> {
         params.tensor_in_batch, params.tensor_in_rows, params.tensor_in_cols,
         params.depth, params.out_height, params.out_width, params.window_rows,
         params.window_cols, params.row_stride, params.col_stride,
-        params.pad_rows, params.pad_cols,
+        params.pad_top, params.pad_left,
         reinterpret_cast<int32*>(output->flat<qint8>().data()),
         context->eigen_gpu_device());
     if (!status) {
@@ -358,8 +392,15 @@ class MaxPoolingV2Op : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
 
-    PoolParameters params{context,  ksize,        stride,
-                          padding_, data_format_, tensor_in.shape()};
+    PoolParameters params{
+        context,
+        ksize,
+        stride,
+        padding_,
+        /*explicit_paddings=*/{},
+        data_format_,
+        tensor_in.shape(),
+    };
     if (!context->status().ok()) {
       return;
     }
@@ -455,8 +496,8 @@ class MaxPoolingV2Op : public OpKernel {
       auto shard = [&params, &in_mat, &out_mat](int64 start, int64 limit) {
         const int32 in_rows = params.tensor_in_rows;
         const int32 in_cols = params.tensor_in_cols;
-        const int32 pad_rows = params.pad_rows;
-        const int32 pad_cols = params.pad_cols;
+        const int32 pad_top = params.pad_top;
+        const int32 pad_left = params.pad_left;
         const int32 window_rows = params.window_rows;
         const int32 window_cols = params.window_cols;
         const int32 row_stride = params.row_stride;
@@ -478,8 +519,8 @@ class MaxPoolingV2Op : public OpKernel {
             for (int32 w = 0; w < in_cols; ++w) {
               // (h_start, h_end) * (w_start, w_end) is the range that the input
               // vector projects to.
-              const int32 hpad = h + pad_rows;
-              const int32 wpad = w + pad_cols;
+              const int32 hpad = h + pad_top;
+              const int32 wpad = w + pad_left;
               const int32 h_start = (hpad < window_rows)
                                         ? 0
                                         : (hpad - window_rows) / row_stride + 1;
@@ -567,8 +608,8 @@ void SpatialAvgPool(OpKernelContext* context, Tensor* output,
         for (int w = 0; w < params.tensor_in_cols; ++w) {
           // (h_start, h_end) * (w_start, w_end) is the range that the input
           // vector projects to.
-          const int hpad = h + params.pad_rows;
-          const int wpad = w + params.pad_cols;
+          const int hpad = h + params.pad_top;
+          const int wpad = w + params.pad_left;
           const int h_start =
               (hpad < params.window_rows)
                   ? 0
diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h
index 9685bd9fdd0..8e391f35aee 100644
--- a/tensorflow/core/kernels/pooling_ops_common_gpu.h
+++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h
@@ -43,6 +43,7 @@ class DnnPoolingOp {
                       se::dnn::PoolingMode pooling_mode,
                       const std::vector<int32>& size,
                       const std::vector<int32>& stride, Padding padding,
+                      std::vector<int64> explicit_paddings,
                       TensorFormat data_format, const Tensor& tensor_in,
                       const TensorShape& tensor_out_shape, bool propagate_nans);
 };
@@ -58,6 +59,7 @@ class DnnPoolingGradOp {
                       se::dnn::PoolingMode pooling_mode,
                       const std::vector<int32>& size,
                       const std::vector<int32>& stride, Padding padding,
+                      std::vector<int64> explicit_paddings,
                       TensorFormat data_format, const Tensor* tensor_in,
                       const Tensor* tensor_out, const Tensor& out_backprop,
                       const TensorShape& tensor_in_shape, bool propagate_nans);
diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/core/kernels/quantized_pooling_ops.cc
index 33a12c47466..cf8d6838b1c 100644
--- a/tensorflow/core/kernels/quantized_pooling_ops.cc
+++ b/tensorflow/core/kernels/quantized_pooling_ops.cc
@@ -54,8 +54,13 @@ class QuantizedAvgPoolingOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
-    PoolParameters params{context,  ksize_,      stride_,
-                          padding_, FORMAT_NHWC, tensor_in.shape()};
+    PoolParameters params{context,
+                          ksize_,
+                          stride_,
+                          padding_,
+                          /*explicit_paddings=*/{},
+                          FORMAT_NHWC,
+                          tensor_in.shape()};
     if (!context->status().ok()) {
       return;
     }
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index b34bb4131d9..952cb3bc5ea 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -846,11 +846,12 @@ REGISTER_OP("MaxPool")
         "uint16, qint8} = DT_FLOAT")
     .Attr("ksize: list(int) >= 4")
     .Attr("strides: list(int) >= 4")
-    .Attr(GetPaddingAttrString())
+    .Attr(GetPaddingAttrStringWithExplicit())
+    .Attr(GetExplicitPaddingsAttrString())
     .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
     .Input("input: T")
     .Output("output: T")
-    .SetShapeFn(shape_inference::MaxPoolShape);
+    .SetShapeFn(shape_inference::MaxPoolShapeWithExplicitPadding);
 
 REGISTER_OP("MaxPoolV2")
     .Attr(
@@ -870,7 +871,8 @@ REGISTER_OP("MaxPoolV2")
 REGISTER_OP("MaxPoolGrad")
     .Attr("ksize: list(int) >= 4")
     .Attr("strides: list(int) >= 4")
-    .Attr(GetPaddingAttrString())
+    .Attr(GetPaddingAttrStringWithExplicit())
+    .Attr(GetExplicitPaddingsAttrString())
     .Attr(GetConvnetDataFormatAttrString())
     .Input("orig_input: T")
     .Input("orig_output: T")
@@ -895,6 +897,7 @@ REGISTER_OP("MaxPoolGradV2")
       return UnchangedShapeWithRank(c, 4);
     });
 
+// TODO(b/150813181): Implement explicit padding.
 REGISTER_OP("MaxPoolGradGrad")
     .Attr("ksize: list(int) >= 4")
     .Attr("strides: list(int) >= 4")
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 47639c99a4a..89a18f6fdbd 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -337,13 +337,13 @@ def NHWCToNCHW(input_tensor):
   """Converts the input from the NHWC format to NCHW.
 
   Args:
-    input_tensor: a 4- or 5-D tensor, or an array representing shape
+    input_tensor: a 3-, 4-, or 5-D tensor, or an array representing shape
 
   Returns:
     converted tensor or shape array
   """
   # tensor dim -> new axis order
-  new_axes = {4: [0, 3, 1, 2], 5: [0, 4, 1, 2, 3]}
+  new_axes = {3: [0, 2, 1], 4: [0, 3, 1, 2], 5: [0, 4, 1, 2, 3]}
   if isinstance(input_tensor, ops.Tensor):
     ndims = input_tensor.shape.ndims
     return array_ops.transpose(input_tensor, new_axes[ndims])
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 7555230fa35..20699f5de49 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -50,15 +50,21 @@ def GetDeviceScope(self, use_gpu=False):
     return self.session(use_gpu=use_gpu)
 
 
-def GetTestConfigs(include_nchw_vect_c=False):
+def GetTestConfigs(include_nchw_vect_c=False, one_dimensional=False):
   """Get all the valid tests configs to run.
 
   Args:
     include_nchw_vect_c: Whether to include NCHW_VECT_C in the test configs.
+    one_dimensional: If it's a 1D test
 
   Returns:
     all the valid test configs as tuples of data_format and use_gpu.
   """
+  if one_dimensional:
+    test_configs = [("NWC", False), ("NWC", True)]
+    if test.is_gpu_available(cuda_only=True):
+      test_configs += [("NCW", True)]
+    return test_configs
   test_configs = [("NHWC", False), ("NHWC", True)]
   if not test.is_gpu_available(cuda_only=True):
     tf_logging.info("NCHW and NCHW_VECT_C tests skipped because not run with "
@@ -106,8 +112,12 @@ def GetShrunkInceptionMaxPoolShapes(shrink=30):
 
 class PoolingTest(test.TestCase):
 
+  def _isMaxPool(self, func):
+    return func in (nn_ops.max_pool, nn_ops.max_pool_v2)
+
   def _VerifyOneType(self, pool_func, input_sizes, ksize, strides, padding,
-                     data_format, data_type, expected, use_gpu, v2):
+                     data_format, data_type, expected, use_gpu, v2,
+                     use_negative_input=False):
     """Verifies the output values of the pooling function.
 
     Args:
@@ -121,6 +131,8 @@ class PoolingTest(test.TestCase):
       data_type: The data type to use to run the pooling operation.
       expected: An array containing the expected operation outputs.
       use_gpu: Whether we are running on GPU.
+      v2: Whether to use v2 version.
+      use_negative_input: If the input values should be negative.
     """
     total_size = 1
     for s in input_sizes:
@@ -141,10 +153,11 @@ class PoolingTest(test.TestCase):
                     data_type)
     # Initializes the input tensor with array containing incrementing
     # numbers from 1, wrapping round to -127 after 127 to support int8.
-    x = [((f + 128) % 255) - 127 for f in range(total_size)]
+    y = -1 if use_negative_input else 1
+    x = [(((f + 128) % 255) - 127)*y for f in range(total_size)]
     with self.cached_session(use_gpu=use_gpu):
       t = constant_op.constant(x, shape=input_sizes, dtype=data_type)
-      if data_format in ("NCHW", "NCHW_VECT_C"):
+      if data_format in ("NCHW", "NCHW_VECT_C", "NCW"):
         if data_format == "NCHW_VECT_C":
           t = test_util.NHWCToNCHW_VECT_C(t)
           t, _, _ = gen_array_ops.quantize_v2(t, -128.0, 127.0, dtypes.qint8)
@@ -152,6 +165,8 @@ class PoolingTest(test.TestCase):
           t = test_util.NHWCToNCHW(t)
         ksize = test_util.NHWCToNCHW(ksize)
         strides = test_util.NHWCToNCHW(strides)
+        if isinstance(padding, list):
+          padding = test_util.NHWCToNCHW(padding)
       ksize_placeholder = array_ops.placeholder(dtypes.int32, shape=[4])
       strides_placeholder = array_ops.placeholder(dtypes.int32, shape=[4])
       if v2:
@@ -184,7 +199,8 @@ class PoolingTest(test.TestCase):
       self.assertAllCloseAccordingToType(expected, actual.flatten())
 
   def _VerifyOneTest(self, pool_func, input_sizes, ksize, strides, padding,
-                     data_format, expected, use_gpu, v2):
+                     data_format, expected, use_gpu, v2,
+                     use_negative_input=False):
     """Verifies the output values of the pooling function.
 
     Args:
@@ -197,6 +213,8 @@ class PoolingTest(test.TestCase):
       data_format: The data format we use to run the pooling operation.
       expected: An array containing the expected operation outputs.
       use_gpu: Whether we are running on GPU.
+      v2: Whether to use v2 version.
+      use_negative_input: If the input values should be negative."
     """
     if data_format == "NCHW_VECT_C":
       avg_pool_func = nn_ops.avg_pool
@@ -204,17 +222,24 @@ class PoolingTest(test.TestCase):
       if pool_func == avg_pool_func:
         tf_logging.info("NCHW_VECT_C not yet implemented for avg_pool")
         return
+      if (self._isMaxPool(pool_func) and isinstance(padding, list)):
+        tf_logging.info("NCHW_VECT_C not yet implemented for max pool" +
+                        " with explicit padding")
+        return
 
     self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding,
-                        data_format, dtypes.float32, expected, use_gpu, v2)
+                        data_format, dtypes.float32, expected, use_gpu, v2,
+                        use_negative_input)
     if not test.is_built_with_rocm():
       # double datatype is not supported for pooling ops on the ROCm platform
       self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding,
-                          data_format, dtypes.float64, expected, use_gpu, v2)
+                          data_format, dtypes.float64, expected, use_gpu, v2,
+                          use_negative_input)
 
     if not use_gpu or test_util.GpuSupportsHalfMatMulAndConv():
       self._VerifyOneType(pool_func, input_sizes, ksize, strides, padding,
-                          data_format, dtypes.float16, expected, use_gpu, v2)
+                          data_format, dtypes.float16, expected, use_gpu, v2,
+                          use_negative_input)
 
   def _VerifyValues(self,
                     pool_func,
@@ -224,7 +249,9 @@ class PoolingTest(test.TestCase):
                     padding,
                     expected,
                     use_gpu,
-                    v2=False):
+                    v2=False,
+                    one_dim=False,
+                    use_negative_input=False):
     """Verifies the output values of the pooling function.
 
     Args:
@@ -236,11 +263,16 @@ class PoolingTest(test.TestCase):
       padding: Padding type.
       expected: An array containing the expected operation outputs.
       use_gpu: Whether we are running on GPU.
+      v2: Whether to use v2 version.
+      one_dim: If one dimensional pools should be done instead of two
+        dimensional pools.
+      use_negative_input: If the input values should be negative.
     """
-    for (data_format, use_gpu_2) in GetTestConfigs(True):
+    for (data_format, use_gpu_2) in GetTestConfigs(True, one_dim):
       if use_gpu_2 == use_gpu:
         self._VerifyOneTest(pool_func, input_sizes, ksize, strides, padding,
-                            data_format, expected, use_gpu, v2)
+                            data_format, expected, use_gpu, v2,
+                            use_negative_input)
 
   def _testAvgPoolValidPadding(self, use_gpu):
     expected_output = [7.0, 8.0, 9.0]
@@ -467,6 +499,101 @@ class PoolingTest(test.TestCase):
           use_gpu=use_gpu,
           v2=v2)
 
+  def _testMaxPoolZeroExplicitPadding(self, use_gpu):
+    expected_output = [9.0]
+    self._VerifyValues(
+        nn_ops.max_pool,
+        input_sizes=[1, 3, 3, 1],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding=[[0, 0], [0, 0], [0, 0], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu)
+
+  def _testMaxPoolNegativeInputExpPadding(self, use_gpu):
+    expected_output = [-1, -1, -1, -1]
+    self._VerifyValues(
+        nn_ops.max_pool,
+        input_sizes=[1, 3, 3, 1],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding=[[0, 0], [2, 1], [2, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu,
+        use_negative_input=True)
+
+  def _testMaxPoolExplicitPadding(self, use_gpu):
+    expected_output = [9.0, 9.0]
+    self._VerifyValues(
+        nn_ops.max_pool,
+        input_sizes=[1, 3, 3, 1],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding=[[0, 0], [0, 2], [0, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu)
+
+  def _testMaxPoolExplicitPaddingAdvanced(self, use_gpu):
+    expected_output = [7, 9, 11, 12, 19, 21, 23, 24, 31, 33, 35, 36, 31, 33,
+                       35, 36]
+    self._VerifyValues(
+        nn_ops.max_pool,
+        input_sizes=[1, 6, 6, 1],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding=[[0, 0], [1, 2], [2, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu)
+
+  def _testMaxPoolNegativeInputExpPaddingAdv(self, use_gpu):
+    expected_output = [-1, -1, -3, -5, -7, -7, -9, -11, -19, -19, -21, -23, -31,
+                       -31, -33, -35]
+
+    self._VerifyValues(
+        nn_ops.max_pool,
+        input_sizes=[1, 6, 6, 1],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding=[[0, 0], [1, 2], [2, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu,
+        use_negative_input=True)
+
+  def _testMaxPoolExplicitPaddingV2(self, use_gpu):
+    expected_output = [9.0, 9.0]
+    self._VerifyValues(
+        nn_ops.max_pool_v2,
+        input_sizes=[1, 3, 3, 1],
+        ksize=[1, 3, 3, 1],
+        strides=[1, 2, 2, 1],
+        padding=[[0, 0], [0, 2], [0, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu)
+
+  def _testMaxPoolExplicitPadding1D(self, use_gpu):
+    expected_output = [2.0, 3.0]
+    self._VerifyValues(
+        nn_ops.max_pool1d,
+        input_sizes=[1, 3, 1],
+        ksize=[1, 2, 1],
+        strides=[1, 2, 1],
+        padding=[[0, 0], [0, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu,
+        one_dim=True)
+
+  def _testMaxPoolExplicitPadding1dV2(self, use_gpu):
+    expected_output = [2.0, 3.0]
+    self._VerifyValues(
+        nn_ops.max_pool_v2,
+        input_sizes=[1, 3, 1],
+        ksize=[1, 2, 1],
+        strides=[1, 2, 1],
+        padding=[[0, 0], [0, 1], [0, 0]],
+        expected=expected_output,
+        use_gpu=use_gpu,
+        one_dim=True)
+
   def _testMaxPoolSamePaddingNonSquareWindow(self, use_gpu):
     # input is:
     # [1.0, 2.0
@@ -618,6 +745,14 @@ class PoolingTest(test.TestCase):
       self._testMaxPoolSamePaddingPacket4(use_gpu)
       self._testMaxPoolSamePaddingPacket8(use_gpu)
       self._testMaxPoolEmptyInput(use_gpu)
+      self._testMaxPoolZeroExplicitPadding(use_gpu)
+      self._testMaxPoolExplicitPadding(use_gpu)
+      self._testMaxPoolExplicitPaddingV2(use_gpu)
+      self._testMaxPoolExplicitPadding1D(use_gpu)
+      self._testMaxPoolExplicitPadding1dV2(use_gpu)
+      self._testMaxPoolExplicitPaddingAdvanced(use_gpu)
+      self._testMaxPoolNegativeInputExpPadding(use_gpu)
+      self._testMaxPoolNegativeInputExpPaddingAdv(use_gpu)
 
   # Tests for DepthwiseMaxPooling on CPU only.
   @test_util.run_deprecated_v1
@@ -980,7 +1115,7 @@ class PoolingTest(test.TestCase):
                                 data_format,
                                 use_gpu,
                                 x_init_value=None):
-    """Verifies the gradients of the avg pooling function.
+    """Verifies the gradients of the max or avg pooling function.
 
     Args:
       pool_func: Function to be called, co.MaxPool, co.AvgPool,
@@ -1017,11 +1152,13 @@ class PoolingTest(test.TestCase):
         func_name = "max_pool"
         err_tolerance = 1e-3
       if data_format == "NCHW":
-        ksize = [1, 1, window_rows, window_rows]
+        ksize = [1, 1, window_rows, window_cols]
         strides = [1, 1, row_stride, col_stride]
+        if isinstance(padding, list):
+          padding = test_util.NHWCToNCHW(padding)
         t = test_util.NHWCToNCHW(input_tensor)
       else:
-        ksize = [1, window_rows, window_rows, 1]
+        ksize = [1, window_rows, window_cols, 1]
         strides = [1, row_stride, col_stride, 1]
         t = input_tensor
       t = pool_func(
@@ -1261,6 +1398,76 @@ class PoolingTest(test.TestCase):
           data_format=data_format,
           use_gpu=use_gpu)
 
+  def _testMaxPoolExplicitPadding1(self, data_format, use_gpu):
+    for pool_func in [nn_ops.max_pool]:
+      self._ConstructAndTestGradient(
+          pool_func,
+          input_sizes=[1, 7, 7, 1],
+          output_sizes=[1, 7, 7, 1],
+          window_rows=3,
+          window_cols=3,
+          row_stride=1,
+          col_stride=1,
+          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
+          data_format=data_format,
+          use_gpu=use_gpu)
+
+  def _testMaxPoolExplicitPadding2(self, data_format, use_gpu):
+    for pool_func in [nn_ops.max_pool]:
+      self._ConstructAndTestGradient(
+          pool_func,
+          input_sizes=[1, 7, 7, 1],
+          output_sizes=[1, 6, 8, 1],
+          window_rows=3,
+          window_cols=5,
+          row_stride=1,
+          col_stride=1,
+          padding=[[0, 0], [0, 1], [2, 3], [0, 0]],
+          data_format=data_format,
+          use_gpu=use_gpu)
+
+  def _testMaxPoolExplicitPaddingLeftGreater(self, data_format, use_gpu):
+    for pool_func in [nn_ops.max_pool]:
+      self._ConstructAndTestGradient(
+          pool_func,
+          input_sizes=[1, 7, 7, 1],
+          output_sizes=[1, 6, 8, 1],
+          window_rows=3,
+          window_cols=5,
+          row_stride=1,
+          col_stride=1,
+          padding=[[0, 0], [0, 1], [3, 2], [0, 0]],
+          data_format=data_format,
+          use_gpu=use_gpu)
+
+  def _testMaxPoolExplicitPaddingBatchChannel(self, data_format, use_gpu):
+    for pool_func in [nn_ops.max_pool]:
+      self._ConstructAndTestGradient(
+          pool_func,
+          input_sizes=[4, 7, 7, 3],
+          output_sizes=[4, 6, 8, 3],
+          window_rows=3,
+          window_cols=5,
+          row_stride=1,
+          col_stride=1,
+          padding=[[0, 0], [0, 1], [3, 2], [0, 0]],
+          data_format=data_format,
+          use_gpu=use_gpu)
+
+  def _testMaxPoolExplicitPaddingStrides(self, data_format, use_gpu):
+    for pool_func in [nn_ops.max_pool]:
+      self._ConstructAndTestGradient(
+          pool_func,
+          input_sizes=[1, 7, 7, 1],
+          output_sizes=[1, 4, 3, 1],
+          window_rows=3,
+          window_cols=3,
+          row_stride=2,
+          col_stride=3,
+          padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
+          data_format=data_format,
+          use_gpu=use_gpu)
+
   @test_util.run_deprecated_v1
   def testMaxPoolGrad(self):
     for (data_format, use_gpu) in GetTestConfigs():
@@ -1274,6 +1481,11 @@ class PoolingTest(test.TestCase):
       self._testMaxPoolGradSamePadding2_1(data_format, use_gpu)
       self._testMaxPoolGradSamePadding2_2(data_format, use_gpu)
       self._testMaxPoolGradSamePadding3_1(data_format, use_gpu)
+      self._testMaxPoolExplicitPadding1(data_format, use_gpu)
+      self._testMaxPoolExplicitPadding2(data_format, use_gpu)
+      self._testMaxPoolExplicitPaddingStrides(data_format, use_gpu)
+      self._testMaxPoolExplicitPaddingLeftGreater(data_format, use_gpu)
+      self._testMaxPoolExplicitPaddingBatchChannel(data_format, use_gpu)
 
   def _MaxPoolGrad(self, orig_input, orig_output, grad, window_rows,
                    window_cols, row_stride, col_stride, padding, v2):
@@ -1294,9 +1506,16 @@ class PoolingTest(test.TestCase):
       A Tensor.
     """
     pool_func = gen_nn_ops.max_pool_grad_v2 if v2 else gen_nn_ops.max_pool_grad
-    return pool_func(orig_input, orig_output, grad,
-                     [1, window_rows, window_cols, 1],
-                     [1, row_stride, col_stride, 1], padding)
+    if v2:
+      return pool_func(orig_input, orig_output, grad,
+                       [1, window_rows, window_cols, 1],
+                       [1, row_stride, col_stride, 1], padding)
+    else:
+      padding, explicit_paddings = nn_ops.convert_padding(padding)
+      return pool_func(orig_input, orig_output, grad,
+                       [1, window_rows, window_cols, 1],
+                       [1, row_stride, col_stride, 1], padding,
+                       explicit_paddings)
 
   def _testMaxPoolGradDirect(self, input_data, output_backprop,
                              expected_input_backprop, input_sizes, output_sizes,
@@ -1439,6 +1658,116 @@ class PoolingTest(test.TestCase):
             use_gpu=use_gpu,
             v2=v2)
 
+  def _testMaxPoolGradZeroExplicitPadding(self):
+    input_data = [
+        1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0,
+        0.0, 1.0
+    ]
+    output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0]
+    expected_input_backprop = [
+        11.0, 0.0, 25.0, 0.0, 0.0, 31.0, 0.0, 17.0, 19.0, 0.0, 41.0, 0.0, 0.0,
+        0.0, 0.0, 0.0
+    ]
+
+    for use_gpu in True, False:
+      for v2 in [False]:
+        self._testMaxPoolGradDirect(
+            input_data,
+            output_backprop,
+            expected_input_backprop,
+            input_sizes=[1, 4, 4, 1],
+            output_sizes=[1, 3, 3, 1],
+            window_rows=2,
+            window_cols=2,
+            row_stride=1,
+            col_stride=1,
+            padding=[[0, 0], [0, 0], [0, 0], [0, 0]],
+            use_gpu=use_gpu,
+            v2=v2)
+
+  def _testMaxPoolGradExplicitPadding_1(self):
+    input_data = [
+        1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0,
+        0.0, 1.0
+    ]
+    output_backprop = [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0,
+                       20.0, 21.0, 22.0]
+    expected_input_backprop = [
+        11.0, 0.0, 25.0, 0.0, 0.0, 31.0, 0.0, 49.0, 19.0, 0.0, 41.0, 0.0, 0.0,
+        0.0, 0.0, 22.0
+    ]
+
+    for use_gpu in True, False:
+      for v2 in [False]:
+        self._testMaxPoolGradDirect(
+            input_data,
+            output_backprop,
+            expected_input_backprop,
+            input_sizes=[1, 4, 4, 1],
+            output_sizes=[1, 3, 4, 1],
+            window_rows=2,
+            window_cols=2,
+            row_stride=1,
+            col_stride=1,
+            padding=[[0, 0], [0, 0], [0, 1], [0, 0]],
+            use_gpu=use_gpu,
+            v2=v2)
+
+  def _testMaxPoolGradExplicitPadding_2(self):
+    input_data = [
+        1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0,
+        0.0, 1.0
+    ]
+    output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0]
+    expected_input_backprop = [
+        54.0, 0.0, 30.0, 0.0, 0.0, 0.0, 0.0, 0.0, 39.0, 0.0, 21.0, 0.0, 0.0,
+        0.0, 0.0, 0.0
+    ]
+
+    for use_gpu in True, False:
+      for v2 in [False]:
+        self._testMaxPoolGradDirect(
+            input_data,
+            output_backprop,
+            expected_input_backprop,
+            input_sizes=[1, 4, 4, 1],
+            output_sizes=[1, 3, 3, 1],
+            window_rows=3,
+            window_cols=3,
+            row_stride=2,
+            col_stride=2,
+            padding=[[0, 0], [2, 1], [2, 1], [0, 0]],
+            use_gpu=use_gpu,
+            v2=v2)
+
+  def _testMaxPoolGradExplicitPadding_3(self):
+    input_data = [
+        -1.0, -5.0, -1.0, -5.0, -5.0, -1.0, -5.0, -1.0, -1.0, -5.0, -1.0, -5.0,
+        -5.0, -1.0, -5.0, -1.0
+    ]
+    output_backprop = [11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0,
+                       20.0, 21.0, 22.0]
+    expected_input_backprop = [
+        11.0, 0.0, 25.0, 0.0, 0.0, 31.0, 0.0, 49.0, 19.0, 0.0, 41.0, 0.0, 0.0,
+        0.0, 0.0, 22.0
+    ]
+
+    for use_gpu in True, False:
+      for v2 in [False]:
+        self._testMaxPoolGradDirect(
+            input_data,
+            output_backprop,
+            expected_input_backprop,
+            input_sizes=[1, 4, 4, 1],
+            output_sizes=[1, 3, 4, 1],
+            window_rows=2,
+            window_cols=2,
+            row_stride=1,
+            col_stride=1,
+            padding=[[0, 0], [0, 0], [0, 1], [0, 0]],
+            use_gpu=use_gpu,
+            v2=v2)
+
   @test_util.no_xla_auto_jit("b/123923733")  # NaNs handled differently
   def _testMaxPoolGradDirectWithNans2_1(self):
     input_data = [float("nan")] * 16
@@ -1615,6 +1944,10 @@ class PoolingTest(test.TestCase):
     self._testMaxPoolGradDirect1_3()
     self._testMaxPoolGradDirectWithNans2_1()
     self._testMaxPoolGradDirectWithNans2_2()
+    self._testMaxPoolGradZeroExplicitPadding()
+    self._testMaxPoolGradExplicitPadding_1()
+    self._testMaxPoolGradExplicitPadding_2()
+    self._testMaxPoolGradExplicitPadding_3()
 
   def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu):
     for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]:
@@ -1956,6 +2289,94 @@ class PoolingTest(test.TestCase):
               strides=[1, 1, 1, 1],
               padding="VALID")
 
+  @test_util.run_deprecated_v1
+  def _testEdgeCasesRaiseErrors(self):
+    with self.assertRaisesRegexp(
+        ValueError, "Data formats NCHW_VECT_C is not yet supported with "
+                    "explicit padding"):
+      nn_ops.max_pool(
+          array_ops.placeholder(dtypes.float32, shape=[1, 3, 3, 1]),
+          ksize=[1, 2, 2, 1],
+          strides=[1, 2, 2, 1],
+          padding=[[0, 0], [0, 1], [0, 1], [0, 0]],
+          data_format="NCHW_VECT_C")
+    with self.assertRaisesRegexp(
+        ValueError, "Explicit padding is not yet supported with an input "
+                    "tensor of rank 5"):
+      nn_ops.max_pool_v2(
+          array_ops.placeholder(dtypes.float32, shape=[1, 3, 3, 1, 1]),
+          ksize=[1, 2, 2, 1, 1],
+          strides=[1, 2, 2, 1, 1],
+          padding=[[0, 0], [0, 1], [0, 1], [0, 0]],
+          data_format="NCHW")
+    with self.assertRaisesRegexp(
+        ValueError, "Attr 'padding' of 'MaxPoolV2' Op passed "
+                    "string 'EXPLICIT'"):
+      gen_nn_ops.max_pool_v2(
+          array_ops.placeholder(dtypes.float32, shape=[1, 3, 3, 1, 1]),
+          ksize=[1, 2, 2, 1, 1],
+          strides=[1, 2, 2, 1, 1],
+          padding="EXPLICIT",
+          data_format="NHWC")
+
+  @test_util.run_deprecated_v1
+  def _testEdgeCasesExcessPadding(self):
+    with self.session(use_gpu=test.is_gpu_available()) as sess:
+      with self.assertRaisesRegexp(
+          errors_impl.InvalidArgumentError,
+          "Right padding 2 needs to be smaller than the window size 2"):
+        input_sizes = [1, 3, 3, 1]
+        x = [(((f + 128) % 255) - 127) for f in range(9)]
+        t = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
+        sess.run(gen_nn_ops.max_pool(
+            t,
+            ksize=[1, 2, 2, 1],
+            strides=[1, 2, 2, 1],
+            padding="EXPLICIT",
+            explicit_paddings=[0, 0, 0, 1, 0, 2, 0, 0],
+            data_format="NHWC"))
+
+  @test_util.run_deprecated_v1
+  def _testNegativePadding(self):
+    with self.session(use_gpu=test.is_gpu_available()) as sess:
+      with self.assertRaisesRegexp(
+          ValueError, "All elements of explicit_paddings must be "
+                      "nonnegative for"):
+        input_sizes = [1, 3, 3, 1]
+        x = [(((f + 128) % 255) - 127) for f in range(9)]
+        t = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
+        sess.run(gen_nn_ops.max_pool(
+            t,
+            ksize=[1, 2, 2, 1],
+            strides=[1, 2, 2, 1],
+            padding="EXPLICIT",
+            explicit_paddings=[0, 0, -1, -1, -1, -1, 0, 0],
+            data_format="NHWC"))
+
+  @test_util.run_deprecated_v1
+  def _testExplicitPaddingBatch(self):
+    with self.session(use_gpu=test.is_gpu_available()) as sess:
+      with self.assertRaisesRegexp(
+          ValueError, "Nonzero explicit padding in the batch or depth "
+                      "dimensions is not supported"):
+        input_sizes = [1, 3, 3, 1]
+        x = [(((f + 128) % 255) - 127) for f in range(9)]
+        t = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32)
+        sess.run(gen_nn_ops.max_pool(
+            t,
+            ksize=[1, 2, 2, 1],
+            strides=[1, 2, 2, 1],
+            padding="EXPLICIT",
+            explicit_paddings=[1, 1, 1, 1, 1, 1, 0, 0],
+            data_format="NHWC"))
+
+  def testExplicitPaddingEdgeCases(self):
+    # Tests for Explicit padding.
+    self._testEdgeCasesRaiseErrors()
+    self._testEdgeCasesExcessPadding()
+    self._testExplicitPaddingBatch()
+    self._testNegativePadding()
+
 
 def GetMaxPoolFwdTest(input_size, filter_size, strides, padding):
 
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index 6dee2fac95d..58dd1852cc5 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -688,6 +688,7 @@ def _MaxPoolGrad(op, grad):
       op.get_attr("ksize"),
       op.get_attr("strides"),
       padding=op.get_attr("padding"),
+      explicit_paddings=op.get_attr("explicit_paddings"),
       data_format=op.get_attr("data_format"))
 
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 7874d6e4d59..ef7a7b980ab 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1752,12 +1752,14 @@ def atrous_conv2d(value, filters, rate, padding, name=None):
       name=name)
 
 
-def convert_padding(padding):
+def convert_padding(padding, expected_length=4):
   """Converts Python padding to C++ padding for ops which take EXPLICIT padding.
 
   Args:
     padding: the `padding` argument for a Python op which supports EXPLICIT
       padding.
+    expected_length: Expected number of entries in the padding list when
+      explicit padding is used.
 
   Returns:
     (padding, explicit_paddings) pair, which should be passed as attributes to a
@@ -1783,9 +1785,9 @@ def convert_padding(padding):
                          "be a list/tuple of size 2. Element with index %d of "
                          "padding has size %d" % (i, len(dim_paddings)))
       explicit_paddings.extend(dim_paddings)
-    if len(padding) != 4:
-      raise ValueError("When padding is a list, it must be of size 4. Got "
-                       "padding of size: %d" % len(padding))
+    if len(padding) != expected_length:
+      raise ValueError("When padding is a list, it must be of size %d. Got "
+                       "padding of size: %d" % (expected_length, len(padding)))
     padding = "EXPLICIT"
   return padding, explicit_paddings
 
@@ -4481,8 +4483,15 @@ def max_pool_v2(input, ksize, strides, padding, data_format=None, name=None):
       of the window for each dimension of the input tensor.
     strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The
       stride of the sliding window for each dimension of the input tensor.
-    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
-      the "returns" section of `tf.nn.convolution` for details.
+    padding: Either the `string `"SAME"` or `"VALID"` indicating the type of
+      padding algorithm to use, or a list indicating the explicit paddings at
+      the start and end of each dimension. When explicit padding is used and
+      data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top,
+      pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used
+      and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0],
+      [pad_top, pad_bottom], [pad_left, pad_right]]`. When using explicit
+      padding, the size of the paddings cannot be greater than the sliding
+      window size.
     data_format: A string. Specifies the channel dimension. For N=1 it can be
       either "NWC" (default) or "NCW", for N=2 it can be either "NHWC" (default)
       or "NCHW" and for N=3 either "NDHWC" (default) or "NCDHW".
@@ -4508,12 +4517,20 @@ def max_pool_v2(input, ksize, strides, padding, data_format=None, name=None):
   else:
     channel_index = 1 if data_format.startswith("NC") else n + 1
 
+  if isinstance(padding, (list, tuple)) and data_format == "NCHW_VECT_C":
+    raise ValueError("Data formats NCHW_VECT_C is not yet supported with "
+                     "explicit padding")
+
   ksize = _get_sequence(ksize, n, channel_index, "ksize")
   strides = _get_sequence(strides, n, channel_index, "strides")
 
+  if (isinstance(padding, (list, tuple)) and n == 3):
+    raise ValueError("Explicit padding is not yet supported with an input "
+                     "tensor of rank 5")
+
   max_pooling_ops = {
       1: max_pool1d,
-      2: gen_nn_ops.max_pool,
+      2: max_pool2d,
       3: gen_nn_ops.max_pool3d
   }
 
@@ -4545,8 +4562,15 @@ def max_pool(value,
       The size of the window for each dimension of the input tensor.
     strides: An int or list of `ints` that has length `1`, `2` or `4`.
       The stride of the sliding window for each dimension of the input tensor.
-    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
-      See the "returns" section of `tf.nn.convolution` for details.
+    padding: Either the `string `"SAME"` or `"VALID"` indicating the type of
+      padding algorithm to use, or a list indicating the explicit paddings at
+      the start and end of each dimension. When explicit padding is used and
+      data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top,
+      pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used
+      and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0],
+      [pad_top, pad_bottom], [pad_left, pad_right]]`. When using explicit
+      padding, the size of the paddings cannot be greater than the sliding
+      window size.
     data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported.
     name: Optional name for the operation.
     input: Alias for value.
@@ -4563,6 +4587,10 @@ def max_pool(value,
 
     ksize = _get_sequence(ksize, 2, channel_index, "ksize")
     strides = _get_sequence(strides, 2, channel_index, "strides")
+    if isinstance(padding, (list, tuple)) and data_format == "NCHW_VECT_C":
+      raise ValueError("Data formats NCHW_VECT_C is not yet supported with "
+                       "explicit padding")
+    padding, explicit_paddings = convert_padding(padding)
     if ((np.isscalar(ksize) and ksize == 0) or
         (isinstance(ksize,
                     (list, tuple, np.ndarray)) and any(v == 0 for v in ksize))):
@@ -4573,6 +4601,7 @@ def max_pool(value,
         ksize=ksize,
         strides=strides,
         padding=padding,
+        explicit_paddings=explicit_paddings,
         data_format=data_format,
         name=name)
 
@@ -4591,8 +4620,14 @@ def max_pool1d(input, ksize, strides, padding, data_format="NWC", name=None):
       window for each dimension of the input tensor.
     strides: An int or list of `ints` that has length `1` or `3`. The stride of
       the sliding window for each dimension of the input tensor.
-    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
-      the "returns" section of `tf.nn.convolution` for details.
+    padding: Either the `string `"SAME"` or `"VALID"` indicating the type of
+      padding algorithm to use, or a list indicating the explicit paddings at
+      the start and end of each dimension. When explicit padding is used and
+      data_format is `"NWC"`, this should be in the form `[[0, 0], [pad_left,
+      pad_right], [0, 0]]`. When explicit padding used and data_format is
+      `"NCW"`, this should be in the form `[[0, 0], [0, 0], [pad_left,
+      pad_right]]`. When using explicit padding, the size of the paddings cannot
+      be greater than the sliding window size.
     data_format: An optional string from: "NWC", "NCW". Defaults to "NWC".
     name: A name for the operation (optional).
 
@@ -4601,11 +4636,17 @@ def max_pool1d(input, ksize, strides, padding, data_format="NWC", name=None):
     The max pooled output tensor.
   """
   with ops.name_scope(name, "MaxPool1d", [input]) as name:
+    if isinstance(padding, (list, tuple)) and data_format == "NCHW_VECT_C":
+      raise ValueError("Data formats NCHW_VECT_C is not yet supported with "
+                       "explicit padding")
     if data_format is None:
       data_format = "NWC"
     channel_index = 1 if data_format.startswith("NC") else 2
     ksize = [1] + _get_sequence(ksize, 1, channel_index, "ksize")
     strides = [1] + _get_sequence(strides, 1, channel_index, "strides")
+    padding, explicit_paddings = convert_padding(padding, 3)
+    if padding == "EXPLICIT":
+      explicit_paddings = [0, 0] + explicit_paddings
 
     expanding_dim = 1 if data_format == "NWC" else 2
     data_format = "NHWC" if data_format == "NWC" else "NCHW"
@@ -4616,6 +4657,7 @@ def max_pool1d(input, ksize, strides, padding, data_format="NWC", name=None):
         ksize=ksize,
         strides=strides,
         padding=padding,
+        explicit_paddings=explicit_paddings,
         data_format=data_format,
         name=name)
     return array_ops.squeeze(result, expanding_dim)
@@ -4634,8 +4676,15 @@ def max_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None):
       the window for each dimension of the input tensor.
     strides: An int or list of `ints` that has length `1`, `2` or `4`. The
       stride of the sliding window for each dimension of the input tensor.
-    padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See
-      the "returns" section of `tf.nn.convolution` for details.
+    padding: Either the `string `"SAME"` or `"VALID"` indicating the type of
+      padding algorithm to use, or a list indicating the explicit paddings at
+      the start and end of each dimension. When explicit padding is used and
+      data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top,
+      pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used
+      and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0],
+      [pad_top, pad_bottom], [pad_left, pad_right]]`. When using explicit
+      padding, the size of the paddings cannot be greater than the sliding
+      window size.
     data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported.
     name: Optional name for the operation.
 
@@ -4650,12 +4699,17 @@ def max_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None):
 
     ksize = _get_sequence(ksize, 2, channel_index, "ksize")
     strides = _get_sequence(strides, 2, channel_index, "strides")
+    if isinstance(padding, (list, tuple)) and data_format == "NCHW_VECT_C":
+      raise ValueError("Data formats NCHW_VECT_C is not yet supported with "
+                       "explicit padding")
+    padding, explicit_paddings = convert_padding(padding)
 
     return gen_nn_ops.max_pool(
         input,
         ksize=ksize,
         strides=strides,
         padding=padding,
+        explicit_paddings=explicit_paddings,
         data_format=data_format,
         name=name)
 # pylint: enable=redefined-builtin
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
index daec1ff9882..5c5e9b3cfd2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
@@ -2426,7 +2426,7 @@ tf_module {
   }
   member_method {
     name: "MaxPool"
-    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'explicit_paddings\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'NHWC\', \'None\'], "
   }
   member_method {
     name: "MaxPool3D"
@@ -2442,7 +2442,7 @@ tf_module {
   }
   member_method {
     name: "MaxPoolGrad"
-    argspec: "args=[\'orig_input\', \'orig_output\', \'grad\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'orig_input\', \'orig_output\', \'grad\', \'ksize\', \'strides\', \'padding\', \'explicit_paddings\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'NHWC\', \'None\'], "
   }
   member_method {
     name: "MaxPoolGradGrad"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
index 422b3147a5b..2b1ee19fa79 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
@@ -2426,7 +2426,7 @@ tf_module {
   }
   member_method {
     name: "MaxPool"
-    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'input\', \'ksize\', \'strides\', \'padding\', \'explicit_paddings\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'NHWC\', \'None\'], "
   }
   member_method {
     name: "MaxPool3D"
@@ -2442,7 +2442,7 @@ tf_module {
   }
   member_method {
     name: "MaxPoolGrad"
-    argspec: "args=[\'orig_input\', \'orig_output\', \'grad\', \'ksize\', \'strides\', \'padding\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
+    argspec: "args=[\'orig_input\', \'orig_output\', \'grad\', \'ksize\', \'strides\', \'padding\', \'explicit_paddings\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'NHWC\', \'None\'], "
   }
   member_method {
     name: "MaxPoolGradGrad"