Blacklist CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM for NHWC
This algorithm is only specified for INT8 in the convolution and causes spurious cuda errors during autotuning when run on floats with cuda 10. This might be a bit too big of a hammer, but shouldn't regress performance anywhere and fixes the crashes we're seeing now. PiperOrigin-RevId: 266142522
This commit is contained in:
parent
5cbd7384d8
commit
912db4a625
@ -2946,6 +2946,20 @@ port::Status CudnnSupport::DoConvolve(
|
||||
"This configuration potentially produces incorrect results.");
|
||||
}
|
||||
}
|
||||
// According to the cuDNN documentation algorithm 1 only supports NHWC
|
||||
// convolutions when using INT8. It doesn't seem to check that before
|
||||
// accessing memory though, leading to unaligned accesses.
|
||||
// TODO(b/138726848): File nvidia bug and restrict this to broken versions.
|
||||
if (algorithm_desc.algo_id() ==
|
||||
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM &&
|
||||
filter_descriptor.layout() == dnn::FilterLayout::kOutputYXInput &&
|
||||
ToCudnnDataType(element_type) != CUDNN_DATA_INT8 &&
|
||||
ToCudnnDataType(element_type) != CUDNN_DATA_INT8x4 &&
|
||||
ToCudnnDataType(element_type) != CUDNN_DATA_UINT8x4) {
|
||||
return port::Status(
|
||||
port::error::FAILED_PRECONDITION,
|
||||
"Data type not supported by algorithm configuration.");
|
||||
}
|
||||
return port::Status::OK();
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user