From 912db4a625e6e84ec1fd4123b0d2da23537dce7f Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <kramerb@google.com>
Date: Thu, 29 Aug 2019 08:04:53 -0700
Subject: [PATCH] Blacklist CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
 for NHWC

This algorithm is only specified for INT8 in the convolution and causes spurious
cuda errors during autotuning when run on floats with cuda 10.

This might be a bit too big of a hammer, but shouldn't regress performance
anywhere and fixes the crashes we're seeing now.

PiperOrigin-RevId: 266142522
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index d15fdd06556..228e7ee515e 100755
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2946,6 +2946,20 @@ port::Status CudnnSupport::DoConvolve(
             "This configuration potentially produces incorrect results.");
       }
     }
+    // According to the cuDNN documentation algorithm 1 only supports NHWC
+    // convolutions when using INT8. It doesn't seem to check that before
+    // accessing memory though, leading to unaligned accesses.
+    // TODO(b/138726848): File nvidia bug and restrict this to broken versions.
+    if (algorithm_desc.algo_id() ==
+            CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM &&
+        filter_descriptor.layout() == dnn::FilterLayout::kOutputYXInput &&
+        ToCudnnDataType(element_type) != CUDNN_DATA_INT8 &&
+        ToCudnnDataType(element_type) != CUDNN_DATA_INT8x4 &&
+        ToCudnnDataType(element_type) != CUDNN_DATA_UINT8x4) {
+      return port::Status(
+          port::error::FAILED_PRECONDITION,
+          "Data type not supported by algorithm configuration.");
+    }
     return port::Status::OK();
   };