Blacklist CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM for NHWC

This algorithm is only specified for INT8 in the convolution and causes spurious cuda errors during autotuning when run on floats with cuda 10. This might be a bit too big of a hammer, but shouldn't regress performance anywhere and fixes the crashes we're seeing now. PiperOrigin-RevId: 266142522
2019-08-29 08:04:53 -07:00 · 2019-08-29 08:04:53 -07:00 · 912db4a625
commit 912db4a625
parent 5cbd7384d8
1 changed files with 14 additions and 0 deletions
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@ -2946,6 +2946,20 @@ port::Status CudnnSupport::DoConvolve(
            "This configuration potentially produces incorrect results.");
      }
    }
+    // According to the cuDNN documentation algorithm 1 only supports NHWC
+    // convolutions when using INT8. It doesn't seem to check that before
+    // accessing memory though, leading to unaligned accesses.
+    // TODO(b/138726848): File nvidia bug and restrict this to broken versions.
+    if (algorithm_desc.algo_id() ==
+            CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM &&
+        filter_descriptor.layout() == dnn::FilterLayout::kOutputYXInput &&
+        ToCudnnDataType(element_type) != CUDNN_DATA_INT8 &&
+        ToCudnnDataType(element_type) != CUDNN_DATA_INT8x4 &&
+        ToCudnnDataType(element_type) != CUDNN_DATA_UINT8x4) {
+      return port::Status(
+          port::error::FAILED_PRECONDITION,
+          "Data type not supported by algorithm configuration.");
+    }
    return port::Status::OK();
  };