Rollback PR #39577: CUDNN v8 support
PiperOrigin-RevId: 314219496 Change-Id: I3e36453044e5b73274e05d84f8d7f2cdb5b144ae
This commit is contained in:
parent
61a0c3bccd
commit
5cf536f78d
|
@ -1278,11 +1278,7 @@ port::Status CheckAndFetchProjectionWeights(
|
||||||
cudnnRNNMode_t mode;
|
cudnnRNNMode_t mode;
|
||||||
cudnnRNNAlgo_t algo;
|
cudnnRNNAlgo_t algo;
|
||||||
cudnnDataType_t data_type;
|
cudnnDataType_t data_type;
|
||||||
#if CUDNN_VERSION >= 8000
|
|
||||||
RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor_v6(
|
|
||||||
#else
|
|
||||||
RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor(
|
RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor(
|
||||||
#endif
|
|
||||||
/*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc,
|
/*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc,
|
||||||
/*hiddenSize=*/&hidden_size_v,
|
/*hiddenSize=*/&hidden_size_v,
|
||||||
/*numLayers=*/&num_layers_v,
|
/*numLayers=*/&num_layers_v,
|
||||||
|
@ -2428,28 +2424,6 @@ port::StatusOr<cudnnConvolutionFwdAlgo_t> GetCudnnConvolutionForwardAlgo(
|
||||||
const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv,
|
const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv,
|
||||||
const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit,
|
const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit,
|
||||||
size_t memory_limit_bytes) {
|
size_t memory_limit_bytes) {
|
||||||
#if CUDNN_VERSION >= 8000
|
|
||||||
const int num_requested_algos = 5;
|
|
||||||
int num_returned_algos = 0;
|
|
||||||
cudnnConvolutionFwdAlgoPerf_t perf_results[num_requested_algos];
|
|
||||||
|
|
||||||
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionForwardAlgorithm_v7(
|
|
||||||
cudnn.handle(), input_nd.handle(), filter.handle(), conv.handle(),
|
|
||||||
output_nd.handle(), num_requested_algos, &num_returned_algos,
|
|
||||||
perf_results));
|
|
||||||
|
|
||||||
size_t mem_limit = specify_workspace_limit ? memory_limit_bytes : 0ULL;
|
|
||||||
for (int r = 0; r < num_returned_algos; r++) {
|
|
||||||
if (perf_results[r].status == CUDNN_STATUS_SUCCESS &&
|
|
||||||
perf_results[r].algo != CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
|
|
||||||
perf_results[r].memory <= mem_limit) {
|
|
||||||
return perf_results[r].algo;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return port::Status(port::error::INTERNAL,
|
|
||||||
"cudnnGetConvolutionForwardAlgorithm_v7 returned "
|
|
||||||
"no suitable algorithms. This could be a cudnn bug.");
|
|
||||||
#else
|
|
||||||
cudnnConvolutionFwdPreference_t preference =
|
cudnnConvolutionFwdPreference_t preference =
|
||||||
specify_workspace_limit ? CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
|
specify_workspace_limit ? CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT
|
||||||
: CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
|
: CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
|
||||||
|
@ -2458,7 +2432,6 @@ port::StatusOr<cudnnConvolutionFwdAlgo_t> GetCudnnConvolutionForwardAlgo(
|
||||||
cudnn.handle(), input_nd.handle(), filter.handle(), conv.handle(),
|
cudnn.handle(), input_nd.handle(), filter.handle(), conv.handle(),
|
||||||
output_nd.handle(), preference, memory_limit_bytes, &algo_to_use));
|
output_nd.handle(), preference, memory_limit_bytes, &algo_to_use));
|
||||||
return algo_to_use;
|
return algo_to_use;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
port::StatusOr<cudnnConvolutionBwdDataAlgo_t>
|
port::StatusOr<cudnnConvolutionBwdDataAlgo_t>
|
||||||
|
@ -2469,29 +2442,6 @@ GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn,
|
||||||
const CudnnTensorDescriptor& output_nd,
|
const CudnnTensorDescriptor& output_nd,
|
||||||
bool specify_workspace_limit,
|
bool specify_workspace_limit,
|
||||||
size_t memory_limit_bytes) {
|
size_t memory_limit_bytes) {
|
||||||
#if CUDNN_VERSION >= 8000
|
|
||||||
const int num_requested_algos = 5;
|
|
||||||
int num_returned_algos = 0;
|
|
||||||
cudnnConvolutionBwdDataAlgoPerf_t perf_results[num_requested_algos];
|
|
||||||
|
|
||||||
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionBackwardDataAlgorithm_v7(
|
|
||||||
cudnn.handle(), filter.handle(), output_nd.handle(), conv.handle(),
|
|
||||||
input_nd.handle(), num_requested_algos, &num_returned_algos,
|
|
||||||
perf_results));
|
|
||||||
|
|
||||||
size_t mem_limit = specify_workspace_limit ? memory_limit_bytes : 0ULL;
|
|
||||||
for (int r = 0; r < num_returned_algos; r++) {
|
|
||||||
if (perf_results[r].status == CUDNN_STATUS_SUCCESS &&
|
|
||||||
perf_results[r].algo !=
|
|
||||||
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED &&
|
|
||||||
perf_results[r].memory <= mem_limit) {
|
|
||||||
return perf_results[r].algo;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return port::Status(port::error::INTERNAL,
|
|
||||||
"cudnnGetConvolutionBackwardDataAlgorithm_v7 returned "
|
|
||||||
"no suitable algorithms. This could be a cudnn bug.");
|
|
||||||
#else
|
|
||||||
cudnnConvolutionBwdDataPreference_t preference =
|
cudnnConvolutionBwdDataPreference_t preference =
|
||||||
specify_workspace_limit
|
specify_workspace_limit
|
||||||
? CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
|
? CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT
|
||||||
|
@ -2501,7 +2451,6 @@ GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn,
|
||||||
cudnn.handle(), filter.handle(), output_nd.handle(), conv.handle(),
|
cudnn.handle(), filter.handle(), output_nd.handle(), conv.handle(),
|
||||||
input_nd.handle(), preference, memory_limit_bytes, &algo_to_use));
|
input_nd.handle(), preference, memory_limit_bytes, &algo_to_use));
|
||||||
return algo_to_use;
|
return algo_to_use;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
port::StatusOr<cudnnConvolutionBwdFilterAlgo_t>
|
port::StatusOr<cudnnConvolutionBwdFilterAlgo_t>
|
||||||
|
@ -2512,28 +2461,6 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn,
|
||||||
const CudnnTensorDescriptor& output_nd,
|
const CudnnTensorDescriptor& output_nd,
|
||||||
bool specify_workspace_limit,
|
bool specify_workspace_limit,
|
||||||
size_t memory_limit_bytes) {
|
size_t memory_limit_bytes) {
|
||||||
#if CUDNN_VERSION >= 8000
|
|
||||||
const int num_requested_algos = 5;
|
|
||||||
int num_returned_algos = 0;
|
|
||||||
cudnnConvolutionBwdFilterAlgoPerf_t perf_results[num_requested_algos];
|
|
||||||
|
|
||||||
RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionBackwardFilterAlgorithm_v7(
|
|
||||||
cudnn.handle(), input_nd.handle(), output_nd.handle(), conv.handle(),
|
|
||||||
filter.handle(), num_requested_algos, &num_returned_algos, perf_results));
|
|
||||||
|
|
||||||
size_t mem_limit = specify_workspace_limit ? memory_limit_bytes : 0ULL;
|
|
||||||
for (int r = 0; r < num_returned_algos; r++) {
|
|
||||||
if (perf_results[r].status == CUDNN_STATUS_SUCCESS &&
|
|
||||||
perf_results[r].algo !=
|
|
||||||
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED &&
|
|
||||||
perf_results[r].memory <= mem_limit) {
|
|
||||||
return perf_results[r].algo;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return port::Status(port::error::INTERNAL,
|
|
||||||
"cudnnGetConvolutionBackwardFilterAlgorithm_v7 returned "
|
|
||||||
"no suitable algorithms. This could be a cudnn bug.");
|
|
||||||
#else
|
|
||||||
cudnnConvolutionBwdFilterPreference_t preference =
|
cudnnConvolutionBwdFilterPreference_t preference =
|
||||||
specify_workspace_limit
|
specify_workspace_limit
|
||||||
? CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
|
? CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT
|
||||||
|
@ -2543,7 +2470,6 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn,
|
||||||
cudnn.handle(), input_nd.handle(), output_nd.handle(), conv.handle(),
|
cudnn.handle(), input_nd.handle(), output_nd.handle(), conv.handle(),
|
||||||
filter.handle(), preference, memory_limit_bytes, &algo_to_use));
|
filter.handle(), preference, memory_limit_bytes, &algo_to_use));
|
||||||
return algo_to_use;
|
return algo_to_use;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
|
port::StatusOr<DeviceMemory<uint8>> AllocateCudnnConvolutionForwardWorkspace(
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -51,17 +51,15 @@ cudnnStatus_t GetSymbolNotFoundError() { return CUDNN_STATUS_INTERNAL_ERROR; }
|
||||||
#error cuDNN version earlier than 6 is not supported.
|
#error cuDNN version earlier than 6 is not supported.
|
||||||
#elif CUDNN_MAJOR < 7
|
#elif CUDNN_MAJOR < 7
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_6_0.inc"
|
#include "tensorflow/stream_executor/cuda/cudnn_6_0.inc"
|
||||||
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 1
|
#elif CUDNN_MINOR < 1
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_7_0.inc"
|
#include "tensorflow/stream_executor/cuda/cudnn_7_0.inc"
|
||||||
// 2 instead of 3: see https://github.com/tensorflow/tensorflow/issues/32350
|
// 2 instead of 3: see https://github.com/tensorflow/tensorflow/issues/32350
|
||||||
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 2
|
#elif CUDNN_MINOR < 2
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_7_1.inc"
|
#include "tensorflow/stream_executor/cuda/cudnn_7_1.inc"
|
||||||
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 4
|
#elif CUDNN_MINOR < 4
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_7_3.inc"
|
#include "tensorflow/stream_executor/cuda/cudnn_7_3.inc"
|
||||||
#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 6
|
#elif CUDNN_MINOR < 6
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_7_4.inc"
|
#include "tensorflow/stream_executor/cuda/cudnn_7_4.inc"
|
||||||
#elif CUDNN_MAJOR == 7
|
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_7_6.inc"
|
|
||||||
#else
|
#else
|
||||||
#include "tensorflow/stream_executor/cuda/cudnn_8_0.inc"
|
#include "tensorflow/stream_executor/cuda/cudnn_7_6.inc"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1069,31 +1069,11 @@ def _create_local_cuda_repository(repository_ctx):
|
||||||
],
|
],
|
||||||
))
|
))
|
||||||
|
|
||||||
if [int(x) for x in cuda_config.cudnn_version.split(".")] < [8, 0]:
|
|
||||||
cudnn_headers = ["cudnn.h"]
|
|
||||||
else:
|
|
||||||
cudnn_headers = [
|
|
||||||
"cudnn_adv_infer.h",
|
|
||||||
"cudnn_adv_train.h",
|
|
||||||
"cudnn_cnn_infer.h",
|
|
||||||
"cudnn_cnn_train.h",
|
|
||||||
"cudnn_ops_infer.h",
|
|
||||||
"cudnn_ops_train.h",
|
|
||||||
"cudnn.h",
|
|
||||||
"cudnn_version.h",
|
|
||||||
]
|
|
||||||
|
|
||||||
cudnn_srcs = []
|
|
||||||
cudnn_outs = []
|
|
||||||
for header in cudnn_headers:
|
|
||||||
cudnn_srcs.append(cudnn_header_dir + "/" + header)
|
|
||||||
cudnn_outs.append("cudnn/include/" + header)
|
|
||||||
|
|
||||||
copy_rules.append(make_copy_files_rule(
|
copy_rules.append(make_copy_files_rule(
|
||||||
repository_ctx,
|
repository_ctx,
|
||||||
name = "cudnn-include",
|
name = "cudnn-include",
|
||||||
srcs = cudnn_srcs,
|
srcs = [cudnn_header_dir + "/cudnn.h"],
|
||||||
outs = cudnn_outs,
|
outs = ["cudnn/include/cudnn.h"],
|
||||||
))
|
))
|
||||||
|
|
||||||
# Set up BUILD file for cuda/
|
# Set up BUILD file for cuda/
|
||||||
|
|
|
@ -219,20 +219,17 @@ def _find_library(base_paths, library_name, required_version):
|
||||||
return _find_file(base_paths, _library_paths(), filepattern)
|
return _find_file(base_paths, _library_paths(), filepattern)
|
||||||
|
|
||||||
|
|
||||||
def _find_versioned_file(base_paths, relative_paths, filepatterns,
|
def _find_versioned_file(base_paths, relative_paths, filepattern,
|
||||||
required_version, get_version):
|
required_version, get_version):
|
||||||
"""Returns first valid path to a file that matches the requested version."""
|
"""Returns first valid path to a file that matches the requested version."""
|
||||||
if type(filepatterns) not in [list, tuple]:
|
|
||||||
filepatterns = [filepatterns]
|
|
||||||
for path in _cartesian_product(base_paths, relative_paths):
|
for path in _cartesian_product(base_paths, relative_paths):
|
||||||
for filepattern in filepatterns:
|
|
||||||
for file in glob.glob(os.path.join(path, filepattern)):
|
for file in glob.glob(os.path.join(path, filepattern)):
|
||||||
actual_version = get_version(file)
|
actual_version = get_version(file)
|
||||||
if _matches_version(actual_version, required_version):
|
if _matches_version(actual_version, required_version):
|
||||||
return file, actual_version
|
return file, actual_version
|
||||||
raise _not_found_error(
|
raise _not_found_error(
|
||||||
base_paths, relative_paths,
|
base_paths, relative_paths,
|
||||||
", ".join(filepatterns) + " matching version '%s'" % required_version)
|
filepattern + " matching version '%s'" % required_version)
|
||||||
|
|
||||||
|
|
||||||
def _find_header(base_paths, header_name, required_version, get_version):
|
def _find_header(base_paths, header_name, required_version, get_version):
|
||||||
|
@ -429,13 +426,12 @@ def _find_cufft_config(base_paths, required_version, cuda_version):
|
||||||
def _find_cudnn_config(base_paths, required_version):
|
def _find_cudnn_config(base_paths, required_version):
|
||||||
|
|
||||||
def get_header_version(path):
|
def get_header_version(path):
|
||||||
version = [
|
version = (
|
||||||
_get_header_version(path, name)
|
_get_header_version(path, name)
|
||||||
for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL")]
|
for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL"))
|
||||||
return ".".join(version) if version[0] else None
|
return ".".join(version)
|
||||||
|
|
||||||
header_path, header_version = _find_header(base_paths,
|
header_path, header_version = _find_header(base_paths, "cudnn.h",
|
||||||
("cudnn.h", "cudnn_version.h"),
|
|
||||||
required_version,
|
required_version,
|
||||||
get_header_version)
|
get_header_version)
|
||||||
cudnn_version = header_version.split(".")[0]
|
cudnn_version = header_version.split(".")[0]
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue