From 912db4a625e6e84ec1fd4123b0d2da23537dce7f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 29 Aug 2019 08:04:53 -0700 Subject: [PATCH] Blacklist CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM for NHWC This algorithm is only specified for INT8 in the convolution and causes spurious cuda errors during autotuning when run on floats with cuda 10. This might be a bit too big of a hammer, but shouldn't regress performance anywhere and fixes the crashes we're seeing now. PiperOrigin-RevId: 266142522 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index d15fdd06556..228e7ee515e 100755 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -2946,6 +2946,20 @@ port::Status CudnnSupport::DoConvolve( "This configuration potentially produces incorrect results."); } } + // According to the cuDNN documentation algorithm 1 only supports NHWC + // convolutions when using INT8. It doesn't seem to check that before + // accessing memory though, leading to unaligned accesses. + // TODO(b/138726848): File nvidia bug and restrict this to broken versions. + if (algorithm_desc.algo_id() == + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM && + filter_descriptor.layout() == dnn::FilterLayout::kOutputYXInput && + ToCudnnDataType(element_type) != CUDNN_DATA_INT8 && + ToCudnnDataType(element_type) != CUDNN_DATA_INT8x4 && + ToCudnnDataType(element_type) != CUDNN_DATA_UINT8x4) { + return port::Status( + port::error::FAILED_PRECONDITION, + "Data type not supported by algorithm configuration."); + } return port::Status::OK(); };