diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index 53168105aba..cd9680c8212 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -1212,9 +1212,11 @@ class MaxPoolingNoMaskOp : public OpKernel { data_format_, tensor_in, out_shape, propagate_nans_); } else { +#if !defined(TENSORFLOW_USE_ROCM) OP_REQUIRES(context, padding_ != EXPLICIT, errors::Unimplemented("Explicit padding is not supported ", "when CUDNN is not enabled.")); +#endif Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); if (is_int8x4) { diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc index bbe03dd39cb..a0c07f31b3d 100644 --- a/tensorflow/core/kernels/pooling_ops_common.cc +++ b/tensorflow/core/kernels/pooling_ops_common.cc @@ -463,6 +463,8 @@ void DnnPoolingGradOp::Compute( return; } + TensorFormat transformed_input_data_format = data_format; + #if CUDNN_VERSION < 7300 /// For now, cudnn does not support NHWC format, so we need to convert it /// to NCHW before calling cudnn. We need to get rid of this once it is done @@ -516,6 +518,7 @@ void DnnPoolingGradOp::Compute( functor::NHWCToNCHW()(context->eigen_device(), tensor_in->tensor(), transformed_input.tensor()); + transformed_input_data_format = FORMAT_NCHW; } if (tensor_out) { // For AvgPoolGrad, the original output tensor is not necessary. However, @@ -577,6 +580,8 @@ void DnnPoolingGradOp::Compute( int64 input_pad_left = 0; int64 input_pad_right = 0; + Tensor transformed_and_padded_input_backprop; + if (padding == EXPLICIT && (params.pad_top != params.pad_bottom || params.pad_left != params.pad_right)) { // Pad the input in the same way we did during the forward pass, so that @@ -588,7 +593,6 @@ void DnnPoolingGradOp::Compute( std::min(params.pad_left, params.pad_right); Tensor padded_input; - Tensor padded_input_backprop; const int64 padding_rows_diff = std::abs(params.pad_top - params.pad_bottom); const int64 padding_cols_diff = @@ -607,18 +611,18 @@ void DnnPoolingGradOp::Compute( << " stride_rows" << params.row_stride; OP_REQUIRES_OK( - context, - context->allocate_temp(DataTypeToEnum::value, - ShapeFromFormat(data_format, batch_size, - new_in_rows, new_in_cols, depth), - &padded_input)); + context, context->allocate_temp( + DataTypeToEnum::value, + ShapeFromFormat(transformed_input_data_format, batch_size, + new_in_rows, new_in_cols, depth), + &padded_input)); OP_REQUIRES_OK( - context, - context->allocate_temp(DataTypeToEnum::value, - ShapeFromFormat(data_format, batch_size, - new_in_rows, new_in_cols, depth), - &transformed_input_backprop)); + context, context->allocate_temp( + DataTypeToEnum::value, + ShapeFromFormat(transformed_input_data_format, batch_size, + new_in_rows, new_in_cols, depth), + &transformed_and_padded_input_backprop)); input_pad_top = params.pad_top - common_padding_rows; input_pad_bottom = params.pad_bottom - common_padding_rows; @@ -644,7 +648,8 @@ void DnnPoolingGradOp::Compute( To32Bit(const_transformed_input.tensor()), static_cast(input_pad_top), static_cast(input_pad_bottom), static_cast(input_pad_left), static_cast(input_pad_right), - To32Bit(padded_input.tensor()), data_format)); + To32Bit(padded_input.tensor()), + transformed_input_data_format)); transformed_input = padded_input; @@ -654,6 +659,8 @@ void DnnPoolingGradOp::Compute( << " horizontal padding set to: " << horizontal_padding; tensor_in_rows = new_in_rows; tensor_in_cols = new_in_cols; + } else { + transformed_and_padded_input_backprop = transformed_input_backprop; } /// Get ready to call cudnn @@ -690,9 +697,9 @@ void DnnPoolingGradOp::Compute( auto output_backprop_data = AsDeviceMemory(transformed_output_backprop.template flat().data(), transformed_output_backprop.template flat().size()); - auto input_backprop_data = - AsDeviceMemory(transformed_input_backprop.template flat().data(), - transformed_input_backprop.template flat().size()); + auto input_backprop_data = AsDeviceMemory( + transformed_and_padded_input_backprop.template flat().data(), + transformed_and_padded_input_backprop.template flat().size()); auto* stream = context->op_device_context()->stream(); OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); @@ -722,6 +729,20 @@ void DnnPoolingGradOp::Compute( OP_REQUIRES(context, status, errors::Internal("dnn PoolBackward launch failed")); + if (padding == EXPLICIT && (params.pad_top != params.pad_bottom || + params.pad_left != params.pad_right)) { + // Remove the padding that was added to the input shape above. + functor::PadInput()( + context->eigen_device(), + To32Bit(const_cast(transformed_and_padded_input_backprop) + .tensor()), + {{static_cast(-input_pad_top), static_cast(-input_pad_left)}}, + {{static_cast(-input_pad_bottom), + static_cast(-input_pad_right)}}, + To32Bit(transformed_input_backprop.template tensor()), + transformed_input_data_format, T{}); + } + #if CUDNN_VERSION < 7300 if (data_format == FORMAT_NHWC) { /// Transform the output data from NCHW back to NHWC. @@ -732,18 +753,6 @@ void DnnPoolingGradOp::Compute( input_backprop->tensor()); } #endif // CUDNN_VERSION < 7300 - if (padding == EXPLICIT && (params.pad_top != params.pad_bottom || - params.pad_left != params.pad_right)) { - // Remove the padding that was added to the input shape above. - functor::PadInput()( - context->eigen_device(), - To32Bit(const_cast(transformed_input_backprop) - .tensor()), - {{static_cast(-input_pad_top), static_cast(-input_pad_left)}}, - {{static_cast(-input_pad_bottom), - static_cast(-input_pad_right)}}, - To32Bit(input_backprop->tensor()), data_format, T{}); - } } #define DEFINE_DNN_OPS(T) \