Merge pull request from kaixih:test_nhwc_depwise_pr

PiperOrigin-RevId: 315828768
Change-Id: Id8fa2d9eb6f651c2b9dd85f594fbeffa216472ca
This commit is contained in:
TensorFlower Gardener 2020-06-10 21:26:08 -07:00
commit 9172bf8f74
2 changed files with 26 additions and 2 deletions

View File

@ -582,10 +582,22 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel {
use_cudnn_ = CanUseCudnn() && std::is_same<Device, GPUDevice>::value;
cudnn_use_autotune_ = CudnnUseAutotune();
dtype_ = DataTypeToEnum<T>::value;
#if CUDNN_VERSION >= 8000
// From the cuDNN release note 8.0: Weve extended the fprop and dgrad
// NHWC depthwise kernels to support more combinations (filter
// sizes/strides) such as 5x5/1x1, 5x5/2x2, 7x7/1x1, 7x7/2x2 (in addition
// to what we already have, 1x1/1x1, 3x3/1x1, 3x3/2x2), which provides
// good performance. (https://docs.nvidia.com/deeplearning/sdk/cudnn-
// release-notes/rel_8.html#rel_8)
use_cudnn_grouped_conv_ =
dtype_ == DT_HALF &&
((data_format_ == FORMAT_NCHW && stride_ == 1 && stride_w == 1) ||
(data_format_ == FORMAT_NHWC && stride_ == stride_w &&
(stride_ == 1 || stride_ == 2)));
#elif CUDNN_VERSION >= 7603
// Use CuDNN grouped conv (input gradient) when stride = 1, input/output is
// NCHW and float16(half). See cudnn release note 7.6.3 (https://docs.nvidi
// a.com/deeplearning/sdk/cudnn-release-notes/rel_763.html#rel_763).
#if CUDNN_VERSION >= 7603
use_cudnn_grouped_conv_ = dtype_ == DT_HALF &&
data_format_ == FORMAT_NCHW && stride_ == 1 &&
stride_w == 1;

View File

@ -302,10 +302,22 @@ class DepthwiseConv2dNativeOp : public BinaryOp<T> {
use_cudnn_ = CanUseCudnn() && std::is_same<Device, GPUDevice>::value;
cudnn_use_autotune_ = CudnnUseAutotune();
dtype_ = DataTypeToEnum<T>::value;
#if CUDNN_VERSION >= 8000
// From the cuDNN release note 8.0: Weve extended the fprop and dgrad
// NHWC depthwise kernels to support more combinations (filter
// sizes/strides) such as 5x5/1x1, 5x5/2x2, 7x7/1x1, 7x7/2x2 (in addition
// to what we already have, 1x1/1x1, 3x3/1x1, 3x3/2x2), which provides
// good performance. (https://docs.nvidia.com/deeplearning/sdk/cudnn-
// release-notes/rel_8.html#rel_8)
use_cudnn_grouped_conv_ =
dtype_ == DT_HALF &&
(data_format_ == FORMAT_NCHW ||
(data_format_ == FORMAT_NHWC && stride_ == stride_w &&
(stride_ == 1 || stride_ == 2)));
#elif CUDNN_VERSION >= 7603
// Use CuDNN grouped conv only when input/output is NCHW and float16(half).
// See cudnn release note 7.6.3. (https://docs.nvidia.com/deeplearning/sdk/c
// udnn-release-notes/rel_763.html#rel_763)
#if CUDNN_VERSION >= 7603
use_cudnn_grouped_conv_ = dtype_ == DT_HALF && data_format_ == FORMAT_NCHW;
#else
use_cudnn_grouped_conv_ = false;