Merge pull request #42944 from ROCmSoftwarePlatform:google_upstream_rocm_fix_200902_2
PiperOrigin-RevId: 334635468 Change-Id: I06271b0c24b68e87913262fe9378f1ba2650a69b
This commit is contained in:
commit
221282c169
@ -1212,9 +1212,11 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
|
|||||||
data_format_, tensor_in, out_shape,
|
data_format_, tensor_in, out_shape,
|
||||||
propagate_nans_);
|
propagate_nans_);
|
||||||
} else {
|
} else {
|
||||||
|
#if !defined(TENSORFLOW_USE_ROCM)
|
||||||
OP_REQUIRES(context, padding_ != EXPLICIT,
|
OP_REQUIRES(context, padding_ != EXPLICIT,
|
||||||
errors::Unimplemented("Explicit padding is not supported ",
|
errors::Unimplemented("Explicit padding is not supported ",
|
||||||
"when CUDNN is not enabled."));
|
"when CUDNN is not enabled."));
|
||||||
|
#endif
|
||||||
Tensor* output = nullptr;
|
Tensor* output = nullptr;
|
||||||
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
|
OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
|
||||||
if (is_int8x4) {
|
if (is_int8x4) {
|
||||||
|
@ -463,6 +463,8 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TensorFormat transformed_input_data_format = data_format;
|
||||||
|
|
||||||
#if CUDNN_VERSION < 7300
|
#if CUDNN_VERSION < 7300
|
||||||
/// For now, cudnn does not support NHWC format, so we need to convert it
|
/// For now, cudnn does not support NHWC format, so we need to convert it
|
||||||
/// to NCHW before calling cudnn. We need to get rid of this once it is done
|
/// to NCHW before calling cudnn. We need to get rid of this once it is done
|
||||||
@ -516,6 +518,7 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
functor::NHWCToNCHW<GPUDevice, T, 4>()(context->eigen_device<Device>(),
|
functor::NHWCToNCHW<GPUDevice, T, 4>()(context->eigen_device<Device>(),
|
||||||
tensor_in->tensor<T, 4>(),
|
tensor_in->tensor<T, 4>(),
|
||||||
transformed_input.tensor<T, 4>());
|
transformed_input.tensor<T, 4>());
|
||||||
|
transformed_input_data_format = FORMAT_NCHW;
|
||||||
}
|
}
|
||||||
if (tensor_out) {
|
if (tensor_out) {
|
||||||
// For AvgPoolGrad, the original output tensor is not necessary. However,
|
// For AvgPoolGrad, the original output tensor is not necessary. However,
|
||||||
@ -577,6 +580,8 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
int64 input_pad_left = 0;
|
int64 input_pad_left = 0;
|
||||||
int64 input_pad_right = 0;
|
int64 input_pad_right = 0;
|
||||||
|
|
||||||
|
Tensor transformed_and_padded_input_backprop;
|
||||||
|
|
||||||
if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
|
if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
|
||||||
params.pad_left != params.pad_right)) {
|
params.pad_left != params.pad_right)) {
|
||||||
// Pad the input in the same way we did during the forward pass, so that
|
// Pad the input in the same way we did during the forward pass, so that
|
||||||
@ -588,7 +593,6 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
std::min(params.pad_left, params.pad_right);
|
std::min(params.pad_left, params.pad_right);
|
||||||
|
|
||||||
Tensor padded_input;
|
Tensor padded_input;
|
||||||
Tensor padded_input_backprop;
|
|
||||||
const int64 padding_rows_diff =
|
const int64 padding_rows_diff =
|
||||||
std::abs(params.pad_top - params.pad_bottom);
|
std::abs(params.pad_top - params.pad_bottom);
|
||||||
const int64 padding_cols_diff =
|
const int64 padding_cols_diff =
|
||||||
@ -607,18 +611,18 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
<< " stride_rows" << params.row_stride;
|
<< " stride_rows" << params.row_stride;
|
||||||
|
|
||||||
OP_REQUIRES_OK(
|
OP_REQUIRES_OK(
|
||||||
context,
|
context, context->allocate_temp(
|
||||||
context->allocate_temp(DataTypeToEnum<T>::value,
|
DataTypeToEnum<T>::value,
|
||||||
ShapeFromFormat(data_format, batch_size,
|
ShapeFromFormat(transformed_input_data_format, batch_size,
|
||||||
new_in_rows, new_in_cols, depth),
|
new_in_rows, new_in_cols, depth),
|
||||||
&padded_input));
|
&padded_input));
|
||||||
|
|
||||||
OP_REQUIRES_OK(
|
OP_REQUIRES_OK(
|
||||||
context,
|
context, context->allocate_temp(
|
||||||
context->allocate_temp(DataTypeToEnum<T>::value,
|
DataTypeToEnum<T>::value,
|
||||||
ShapeFromFormat(data_format, batch_size,
|
ShapeFromFormat(transformed_input_data_format, batch_size,
|
||||||
new_in_rows, new_in_cols, depth),
|
new_in_rows, new_in_cols, depth),
|
||||||
&transformed_input_backprop));
|
&transformed_and_padded_input_backprop));
|
||||||
|
|
||||||
input_pad_top = params.pad_top - common_padding_rows;
|
input_pad_top = params.pad_top - common_padding_rows;
|
||||||
input_pad_bottom = params.pad_bottom - common_padding_rows;
|
input_pad_bottom = params.pad_bottom - common_padding_rows;
|
||||||
@ -644,7 +648,8 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
To32Bit(const_transformed_input.tensor<T, 4>()),
|
To32Bit(const_transformed_input.tensor<T, 4>()),
|
||||||
static_cast<int>(input_pad_top), static_cast<int>(input_pad_bottom),
|
static_cast<int>(input_pad_top), static_cast<int>(input_pad_bottom),
|
||||||
static_cast<int>(input_pad_left), static_cast<int>(input_pad_right),
|
static_cast<int>(input_pad_left), static_cast<int>(input_pad_right),
|
||||||
To32Bit(padded_input.tensor<T, 4>()), data_format));
|
To32Bit(padded_input.tensor<T, 4>()),
|
||||||
|
transformed_input_data_format));
|
||||||
|
|
||||||
transformed_input = padded_input;
|
transformed_input = padded_input;
|
||||||
|
|
||||||
@ -654,6 +659,8 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
<< " horizontal padding set to: " << horizontal_padding;
|
<< " horizontal padding set to: " << horizontal_padding;
|
||||||
tensor_in_rows = new_in_rows;
|
tensor_in_rows = new_in_rows;
|
||||||
tensor_in_cols = new_in_cols;
|
tensor_in_cols = new_in_cols;
|
||||||
|
} else {
|
||||||
|
transformed_and_padded_input_backprop = transformed_input_backprop;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get ready to call cudnn
|
/// Get ready to call cudnn
|
||||||
@ -690,9 +697,9 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
auto output_backprop_data =
|
auto output_backprop_data =
|
||||||
AsDeviceMemory(transformed_output_backprop.template flat<T>().data(),
|
AsDeviceMemory(transformed_output_backprop.template flat<T>().data(),
|
||||||
transformed_output_backprop.template flat<T>().size());
|
transformed_output_backprop.template flat<T>().size());
|
||||||
auto input_backprop_data =
|
auto input_backprop_data = AsDeviceMemory(
|
||||||
AsDeviceMemory(transformed_input_backprop.template flat<T>().data(),
|
transformed_and_padded_input_backprop.template flat<T>().data(),
|
||||||
transformed_input_backprop.template flat<T>().size());
|
transformed_and_padded_input_backprop.template flat<T>().size());
|
||||||
|
|
||||||
auto* stream = context->op_device_context()->stream();
|
auto* stream = context->op_device_context()->stream();
|
||||||
OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
|
OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
|
||||||
@ -722,6 +729,20 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
OP_REQUIRES(context, status,
|
OP_REQUIRES(context, status,
|
||||||
errors::Internal("dnn PoolBackward launch failed"));
|
errors::Internal("dnn PoolBackward launch failed"));
|
||||||
|
|
||||||
|
if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
|
||||||
|
params.pad_left != params.pad_right)) {
|
||||||
|
// Remove the padding that was added to the input shape above.
|
||||||
|
functor::PadInput<GPUDevice, T, int, 4>()(
|
||||||
|
context->eigen_device<GPUDevice>(),
|
||||||
|
To32Bit(const_cast<const Tensor&>(transformed_and_padded_input_backprop)
|
||||||
|
.tensor<T, 4>()),
|
||||||
|
{{static_cast<int>(-input_pad_top), static_cast<int>(-input_pad_left)}},
|
||||||
|
{{static_cast<int>(-input_pad_bottom),
|
||||||
|
static_cast<int>(-input_pad_right)}},
|
||||||
|
To32Bit(transformed_input_backprop.template tensor<T, 4>()),
|
||||||
|
transformed_input_data_format, T{});
|
||||||
|
}
|
||||||
|
|
||||||
#if CUDNN_VERSION < 7300
|
#if CUDNN_VERSION < 7300
|
||||||
if (data_format == FORMAT_NHWC) {
|
if (data_format == FORMAT_NHWC) {
|
||||||
/// Transform the output data from NCHW back to NHWC.
|
/// Transform the output data from NCHW back to NHWC.
|
||||||
@ -732,18 +753,6 @@ void DnnPoolingGradOp<T>::Compute(
|
|||||||
input_backprop->tensor<T, 4>());
|
input_backprop->tensor<T, 4>());
|
||||||
}
|
}
|
||||||
#endif // CUDNN_VERSION < 7300
|
#endif // CUDNN_VERSION < 7300
|
||||||
if (padding == EXPLICIT && (params.pad_top != params.pad_bottom ||
|
|
||||||
params.pad_left != params.pad_right)) {
|
|
||||||
// Remove the padding that was added to the input shape above.
|
|
||||||
functor::PadInput<GPUDevice, T, int, 4>()(
|
|
||||||
context->eigen_device<GPUDevice>(),
|
|
||||||
To32Bit(const_cast<const Tensor&>(transformed_input_backprop)
|
|
||||||
.tensor<T, 4>()),
|
|
||||||
{{static_cast<int>(-input_pad_top), static_cast<int>(-input_pad_left)}},
|
|
||||||
{{static_cast<int>(-input_pad_bottom),
|
|
||||||
static_cast<int>(-input_pad_right)}},
|
|
||||||
To32Bit(input_backprop->tensor<T, 4>()), data_format, T{});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DEFINE_DNN_OPS(T) \
|
#define DEFINE_DNN_OPS(T) \
|
||||||
|
Loading…
Reference in New Issue
Block a user