diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc old mode 100755 new mode 100644 index 6122877f91f..2dbd2c58ebd --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -1278,6 +1278,18 @@ port::Status CheckAndFetchProjectionWeights( cudnnRNNMode_t mode; cudnnRNNAlgo_t algo; cudnnDataType_t data_type; +#if CUDNN_VERSION >= 8000 + RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor_v6( + /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc, + /*hiddenSize=*/&hidden_size_v, + /*numLayers=*/&num_layers_v, + /*dropoutDesc=*/&dropout_desc, + /*inputMode=*/&input_mode, + /*direction=*/&direction, + /*mode=*/&mode, + /*algo=*/&algo, + /*mathPrec=*/&data_type)); +#else RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor( /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc, /*hiddenSize=*/&hidden_size_v, @@ -1287,7 +1299,8 @@ port::Status CheckAndFetchProjectionWeights( /*direction=*/&direction, /*mode=*/&mode, /*algo=*/&algo, - /*dataType=*/&data_type)); + /*mathPrec=*/&data_type)); +#endif int rec_proj_size_v; int out_proj_size_v; RETURN_IF_CUDNN_ERROR(cudnnGetRNNProjectionLayers( @@ -2424,6 +2437,28 @@ port::StatusOr GetCudnnConvolutionForwardAlgo( const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv, const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit, size_t memory_limit_bytes) { +#if CUDNN_VERSION >= 8000 + const int num_requested_algos = 5; + int num_returned_algos = 0; + cudnnConvolutionFwdAlgoPerf_t perf_results[num_requested_algos]; + + RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionForwardAlgorithm_v7( + cudnn.handle(), input_nd.handle(), filter.handle(), conv.handle(), + output_nd.handle(), num_requested_algos, &num_returned_algos, + perf_results)); + + size_t mem_limit = specify_workspace_limit ? memory_limit_bytes : 0ULL; + for (int r = 0; r < num_returned_algos; r++) { + if (perf_results[r].status == CUDNN_STATUS_SUCCESS && + perf_results[r].algo != CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED && + perf_results[r].memory <= mem_limit) { + return perf_results[r].algo; + } + } + return port::Status(port::error::INTERNAL, + "cudnnGetConvolutionForwardAlgorithm_v7 returned " + "no suitable algorithms. This could be a cudnn bug."); +#else cudnnConvolutionFwdPreference_t preference = specify_workspace_limit ? CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT : CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; @@ -2432,6 +2467,7 @@ port::StatusOr GetCudnnConvolutionForwardAlgo( cudnn.handle(), input_nd.handle(), filter.handle(), conv.handle(), output_nd.handle(), preference, memory_limit_bytes, &algo_to_use)); return algo_to_use; +#endif } port::StatusOr @@ -2442,6 +2478,29 @@ GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn, const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit, size_t memory_limit_bytes) { +#if CUDNN_VERSION >= 8000 + const int num_requested_algos = 5; + int num_returned_algos = 0; + cudnnConvolutionBwdDataAlgoPerf_t perf_results[num_requested_algos]; + + RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionBackwardDataAlgorithm_v7( + cudnn.handle(), filter.handle(), output_nd.handle(), conv.handle(), + input_nd.handle(), num_requested_algos, &num_returned_algos, + perf_results)); + + size_t mem_limit = specify_workspace_limit ? memory_limit_bytes : 0ULL; + for (int r = 0; r < num_returned_algos; r++) { + if (perf_results[r].status == CUDNN_STATUS_SUCCESS && + perf_results[r].algo != + CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED && + perf_results[r].memory <= mem_limit) { + return perf_results[r].algo; + } + } + return port::Status(port::error::INTERNAL, + "cudnnGetConvolutionBackwardDataAlgorithm_v7 returned " + "no suitable algorithms. This could be a cudnn bug."); +#else cudnnConvolutionBwdDataPreference_t preference = specify_workspace_limit ? CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT @@ -2451,6 +2510,7 @@ GetCudnnConvolutionBackwardDataAlgo(const CudnnHandle& cudnn, cudnn.handle(), filter.handle(), output_nd.handle(), conv.handle(), input_nd.handle(), preference, memory_limit_bytes, &algo_to_use)); return algo_to_use; +#endif } port::StatusOr @@ -2461,6 +2521,28 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn, const CudnnTensorDescriptor& output_nd, bool specify_workspace_limit, size_t memory_limit_bytes) { +#if CUDNN_VERSION >= 8000 + const int num_requested_algos = 5; + int num_returned_algos = 0; + cudnnConvolutionBwdFilterAlgoPerf_t perf_results[num_requested_algos]; + + RETURN_IF_CUDNN_ERROR(cudnnGetConvolutionBackwardFilterAlgorithm_v7( + cudnn.handle(), input_nd.handle(), output_nd.handle(), conv.handle(), + filter.handle(), num_requested_algos, &num_returned_algos, perf_results)); + + size_t mem_limit = specify_workspace_limit ? memory_limit_bytes : 0ULL; + for (int r = 0; r < num_returned_algos; r++) { + if (perf_results[r].status == CUDNN_STATUS_SUCCESS && + perf_results[r].algo != + CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED && + perf_results[r].memory <= mem_limit) { + return perf_results[r].algo; + } + } + return port::Status(port::error::INTERNAL, + "cudnnGetConvolutionBackwardFilterAlgorithm_v7 returned " + "no suitable algorithms. This could be a cudnn bug."); +#else cudnnConvolutionBwdFilterPreference_t preference = specify_workspace_limit ? CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT @@ -2470,6 +2552,7 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn, cudnn.handle(), input_nd.handle(), output_nd.handle(), conv.handle(), filter.handle(), preference, memory_limit_bytes, &algo_to_use)); return algo_to_use; +#endif } port::StatusOr> AllocateCudnnConvolutionForwardWorkspace( diff --git a/tensorflow/stream_executor/cuda/cudnn_8_0.inc b/tensorflow/stream_executor/cuda/cudnn_8_0.inc new file mode 100644 index 00000000000..9eca12e94f3 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cudnn_8_0.inc @@ -0,0 +1,3316 @@ +// Auto-generated, do not edit. + +extern "C" { +size_t CUDNNWINAPI cudnnGetVersion(void) { + using FuncPtr = size_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnGetVersion"); + if (!func_ptr) return 0; + return func_ptr(); +} + +size_t CUDNNWINAPI cudnnGetCudartVersion(void) { + using FuncPtr = size_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); + if (!func_ptr) return 0; + return func_ptr(); +} + +const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { + using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); + static auto func_ptr = LoadSymbol("cudnnGetErrorString"); + if (!func_ptr) return "cudnnGetErrorString symbol not found."; + return func_ptr(status); +} + +cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, + cudnnStatus_t *rstatus, + cudnnErrQueryMode_t mode, + cudnnRuntimeTag_t *tag) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); + static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rstatus, mode, tag); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, + int *value) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); + static auto func_ptr = LoadSymbol("cudnnGetProperty"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(type, value); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); + static auto func_ptr = LoadSymbol("cudnnCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); + static auto func_ptr = LoadSymbol("cudnnDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, + cudaStream_t streamId) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudnnSetStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, + cudaStream_t *streamId) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); + static auto func_ptr = LoadSymbol("cudnnGetStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( + cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, + cudnnDataType_t dataType, /* image data type */ + int n, /* number of inputs (batch size) */ + int c, /* number of input feature maps */ + int h, /* height of input section */ + int w) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, + cudnnDataType_t, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, format, dataType, n, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( + cudnnTensorDescriptor_t tensorDesc, + cudnnDataType_t dataType, /* image data type */ + int n, /* number of inputs (batch size) */ + int c, /* number of input feature maps */ + int h, /* height of input section */ + int w, /* width of input section */ + int nStride, int cStride, int hStride, int wStride) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, + int, int, int, int, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, + wStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( + const cudnnTensorDescriptor_t tensorDesc, + cudnnDataType_t *dataType, /* image data type */ + int *n, /* number of inputs (batch size) */ + int *c, /* number of input feature maps */ + int *h, /* height of input section */ + int *w, /* width of input section */ + int *nStride, int *cStride, int *hStride, int *wStride) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, + int *, int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, + wStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( + cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, + const int dimA[], const int strideA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); + static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( + cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, + cudnnDataType_t dataType, int nbDims, const int dimA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, + cudnnDataType_t, int, const int[]); + static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, format, dataType, nbDims, dimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( + const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, + cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, + cudnnDataType_t *, int *, int[], int[]); + static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( + const cudnnTensorDescriptor_t tensorDesc, size_t *size) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, size); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest( + const cudnnTensorTransformDescriptor_t transformDesc, + const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, + size_t *destSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnTensorTransformDescriptor_t, const cudnnTensorDescriptor_t, + cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnInitTransformDest"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc, srcDesc, destDesc, destSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t *transformDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, + const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], + const int32_t padAfterA[], const uint32_t foldA[], + const cudnnFoldingDirection_t direction) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnTensorTransformDescriptor_t, const uint32_t, + const cudnnTensorFormat_t, const int32_t[], const int32_t[], + const uint32_t[], const cudnnFoldingDirection_t); + static auto func_ptr = + LoadSymbol("cudnnSetTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc, nbDims, destFormat, padBeforeA, padAfterA, + foldA, direction); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, + cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[], + uint32_t foldA[], cudnnFoldingDirection_t *direction) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnTensorTransformDescriptor_t, uint32_t, cudnnTensorFormat_t *, + int32_t[], int32_t[], uint32_t[], cudnnFoldingDirection_t *); + static auto func_ptr = + LoadSymbol("cudnnGetTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc, nbDimsRequested, destFormat, padBeforeA, + padAfterA, foldA, direction); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t transformDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnTransformTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx( + cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, + const void *alpha, const cudnnTensorDescriptor_t srcDesc, + const void *srcData, const void *beta, + const cudnnTensorDescriptor_t destDesc, void *destData) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnTransformTensorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, + destData); +} + +cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t aDesc, + const void *A, const void *beta, + const cudnnTensorDescriptor_t cDesc, + void *C) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnAddTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( + cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, + cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, + cudnnDataType_t, cudnnNanPropagation_t); + static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( + const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, + cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, + cudnnNanPropagation_t *); + static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnOpTensor( + cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, + const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, + const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, + const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnOpTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, + beta, cDesc, C); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( + cudnnReduceTensorDescriptor_t *reduceTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( + cudnnReduceTensorDescriptor_t reduceTensorDesc, + cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, + cudnnNanPropagation_t reduceTensorNanOpt, + cudnnReduceTensorIndices_t reduceTensorIndices, + cudnnIndicesType_t reduceTensorIndicesType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, + cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); + static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, + reduceTensorNanOpt, reduceTensorIndices, + reduceTensorIndicesType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( + const cudnnReduceTensorDescriptor_t reduceTensorDesc, + cudnnReduceTensorOp_t *reduceTensorOp, + cudnnDataType_t *reduceTensorCompType, + cudnnNanPropagation_t *reduceTensorNanOpt, + cudnnReduceTensorIndices_t *reduceTensorIndices, + cudnnIndicesType_t *reduceTensorIndicesType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, + cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, + cudnnIndicesType_t *); + static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, + reduceTensorNanOpt, reduceTensorIndices, + reduceTensorIndicesType); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( + cudnnReduceTensorDescriptor_t reduceTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( + cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, + const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnReduceTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( + cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, + const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnReduceTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( + cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, + void *indices, size_t indicesSizeInBytes, void *workspace, + size_t workspaceSizeInBytes, const void *alpha, + const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, + const cudnnTensorDescriptor_t cDesc, void *C) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, + void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnReduceTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, + workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, + C); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, + const cudnnTensorDescriptor_t yDesc, + void *y, const void *valuePtr) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); + static auto func_ptr = LoadSymbol("cudnnSetTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, yDesc, y, valuePtr); +} + +cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, + const cudnnTensorDescriptor_t yDesc, + void *y, const void *alpha) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); + static auto func_ptr = LoadSymbol("cudnnScaleTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, yDesc, y, alpha); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( + cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t dataType, /* image data type */ + cudnnTensorFormat_t format, int k, /* number of output feature maps */ + int c, /* number of input feature maps */ + int h, /* height of each input filter */ + int w) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, + cudnnTensorFormat_t, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, dataType, format, k, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( + const cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t *dataType, /* image data type */ + cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ + int *c, /* number of input feature maps */ + int *h, /* height of each input filter */ + int *w) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, + int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, dataType, format, k, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( + cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t dataType, /* image data type */ + cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, + cudnnTensorFormat_t, int, const int[]); + static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( + const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, + cudnnDataType_t *dataType, /* image data type */ + cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnFilterDescriptor_t, int, cudnnDataType_t *, + cudnnTensorFormat_t *, int *, int[]); + static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, + filterDimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes( + const cudnnFilterDescriptor_t filterDesc, size_t *size) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnFilterDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetFilterSizeInBytes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, size); +} + +cudnnStatus_t CUDNNWINAPI cudnnTransformFilter( + cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, + const void *alpha, const cudnnFilterDescriptor_t srcDesc, + const void *srcData, const void *beta, + const cudnnFilterDescriptor_t destDesc, void *destData) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, const void *, + const cudnnFilterDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnTransformFilter"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, + destData); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( + cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( + cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, + cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, + int verticalPadding, int horizontalPadding, int verticalStride, + int horizontalStride) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, + int, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, + windowWidth, verticalPadding, horizontalPadding, + verticalStride, horizontalStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( + const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, + cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, + int *windowWidth, int *verticalPadding, int *horizontalPadding, + int *verticalStride, int *horizontalStride) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, + cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, + windowWidth, verticalPadding, horizontalPadding, + verticalStride, horizontalStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( + cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, + const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, + const int windowDimA[], const int paddingA[], const int strideA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, + const cudnnNanPropagation_t, int, const int[], const int[], const int[]); + static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, + paddingA, strideA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( + const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, + cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, + int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, + cudnnNanPropagation_t *, int *, int[], int[], int[]); + static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, + windowDimA, paddingA, strideA); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + int nbDims, int outputTensorDimA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, + const cudnnTensorDescriptor_t, int, int[]); + static auto func_ptr = + LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + int *n, int *c, int *h, int *w) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, + const cudnnTensorDescriptor_t, + int *, int *, int *, int *); + static auto func_ptr = + LoadSymbol("cudnnGetPooling2dForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( + cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnPoolingForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( + cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, + cudnnNanPropagation_t reluNanOpt, double coef) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, + cudnnActivationMode_t, + cudnnNanPropagation_t, double); + static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc, mode, reluNanOpt, coef); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, + cudnnActivationMode_t *mode, + cudnnNanPropagation_t *reluNanOpt, double *coef) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnActivationDescriptor_t, cudnnActivationMode_t *, + cudnnNanPropagation_t *, double *); + static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc, mode, reluNanOpt, coef); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnActivationForward( + cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnActivationDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnActivationForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(normDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, + unsigned lrnN, double lrnAlpha, + double lrnBeta, double lrnK) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnLRNDescriptor_t, unsigned int, double, double, double); + static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, + unsigned *lrnN, + double *lrnAlpha, + double *lrnBeta, double *lrnK) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); + static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(lrnDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, + cudnnDivNormMode_t mode, const void *alpha, + const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ + const void *x, + const void *means, /* if NULL, means are assumed to be zero */ + void *temp, void *temp2, const void *beta, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = + LoadSymbol("cudnnDivisiveNormalizationForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, + beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( + cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, + cudnnBatchNormMode_t mode) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, + cudnnBatchNormMode_t); + static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(derivedBnDesc, xDesc, mode); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, + const void *alpha, /* alpha[0] = result blend factor */ + const void *beta, /* beta[0] = dest layer blend factor */ + const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ + const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, + const void *bnBias, const void *estimatedMean, + const void *estimatedVariance, double epsilon) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, const void *, const void *, const void *, double); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationForwardInference"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, + bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, + estimatedVariance, epsilon); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( + cudnnSpatialTransformerDescriptor_t *stDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( + cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, + cudnnDataType_t dataType, const int nbDims, const int dimA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, + const int, const int[]); + static auto func_ptr = + LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( + cudnnSpatialTransformerDescriptor_t stDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( + cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, + const void *theta, void *grid) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnSpatialTfGridGeneratorForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, theta, grid); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( + cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, + void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, const void *, + cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( + cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(xdesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( + cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, + void *states, size_t stateSizeInBytes, unsigned long long seed) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, + float, void *, size_t, unsigned long long); + static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); +} + +cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( + cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, + void *states, size_t stateSizeInBytes, unsigned long long seed) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, + float, void *, size_t, unsigned long long); + static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( + cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, + void **states, unsigned long long *seed) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, + float *, void **, unsigned long long *); + static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc, handle, dropout, states, seed); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( + cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, + const cudnnTensorDescriptor_t xdesc, const void *x, + const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnDropoutDescriptor_t, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnDropoutForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor( + cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, + cudnnAlgorithm_t); + static auto func_ptr = LoadSymbol("cudnnSetAlgorithmDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoDesc, algorithm); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor( + const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, + cudnnAlgorithm_t *); + static auto func_ptr = LoadSymbol("cudnnGetAlgorithmDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoDesc, algorithm); +} + +cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor( + const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, + cudnnAlgorithmDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnCopyAlgorithmDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(src, dest); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyAlgorithmDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( + cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); + static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoPerf, numberToCreate); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance( + cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, + cudnnStatus_t status, float time, size_t memory) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, + cudnnAlgorithmDescriptor_t, + cudnnStatus_t, float, size_t); + static auto func_ptr = LoadSymbol("cudnnSetAlgorithmPerformance"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoPerf, algoDesc, status, time, memory); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance( + const cudnnAlgorithmPerformance_t algoPerf, + cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time, + size_t *memory) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, + cudnnStatus_t *, float *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetAlgorithmPerformance"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoPerf, algoDesc, status, time, memory); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( + cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); + static auto func_ptr = + LoadSymbol("cudnnDestroyAlgorithmPerformance"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoPerf, numberToDestroy); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize( + cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, + size_t *algoSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetAlgorithmSpaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algoDesc, algoSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, + void *algoSpace, size_t algoSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnSaveAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm( + cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes, + cudnnAlgorithmDescriptor_t algoDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, + cudnnAlgorithmDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnRestoreAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, + cudnnCallback_t fptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); + static auto func_ptr = LoadSymbol("cudnnSetCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mask, udata, fptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, + cudnnCallback_t *fptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); + static auto func_ptr = LoadSymbol("cudnnGetCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mask, udata, fptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnOpsInferVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnOpsInferVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateConvolutionDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyConvolutionDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( + cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnMathType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, mathType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( + cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnMathType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, mathType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( + cudnnConvolutionDescriptor_t convDesc, int groupCount) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, groupCount); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( + cudnnConvolutionDescriptor_t convDesc, int *groupCount) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, groupCount); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionReorderType( + cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnReorderType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionReorderType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, reorderType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionReorderType( + cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnReorderType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionReorderType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, reorderType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( + cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ + int pad_w, /* zero-padding width */ + int u, /* vertical filter stride */ + int v, /* horizontal filter stride */ + int dilation_h, /* filter dilation in the vertical dimension */ + int dilation_w, /* filter dilation in the horizontal dimension */ + cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnConvolutionDescriptor_t, int, int, int, int, int, int, + cudnnConvolutionMode_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, + computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( + const cudnnConvolutionDescriptor_t convDesc, + int *pad_h, /* zero-padding height */ + int *pad_w, /* zero-padding width */ + int *u, /* vertical filter stride */ + int *v, /* horizontal filter stride */ + int *dilation_h, /* filter dilation in the vertical dimension */ + int *dilation_w, /* filter dilation in the horizontal dimension */ + cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, + int *, cudnnConvolutionMode_t *, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, + computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( + cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ + const int padA[], const int filterStrideA[], const int dilationA[], + cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], + cudnnConvolutionMode_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, + computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( + const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, + int *arrayLength, int padA[], int strideA[], int dilationA[], + cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], + cudnnConvolutionMode_t *, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, + dilationA, mode, computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, int *, int *, int *, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + const cudnnFilterDescriptor_t filterDesc, int nbDims, + int tensorOuputDimA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, int, int[]); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, + tensorOuputDimA); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( + cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, + const cudnnFilterDescriptor_t filterDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionFwdAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, + requestedAlgoCount, returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionFwdAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, + returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, + void *workSpace, size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, + const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, + requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const void *x, const cudnnFilterDescriptor_t wDesc, + const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, + const void *, const cudnnFilterDescriptor_t, + const cudnnConvolutionDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnIm2Col"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); +} + +cudnnStatus_t CUDNNWINAPI cudnnReorderFilterAndBias( + cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, + cudnnReorderType_t reorderType, const void *filterData, + void *reorderedFilterData, int reorderBias, const void *biasData, + void *reorderedBiasData) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, cudnnReorderType_t, + const void *, void *, int, const void *, void *); + static auto func_ptr = LoadSymbol("cudnnReorderFilterAndBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, filterDesc, reorderType, filterData, + reorderedFilterData, reorderBias, biasData, + reorderedBiasData); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, + void *workSpace, size_t workSpaceSizeInBytes, const void *beta, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, + size_t, const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, + workSpaceSizeInBytes, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( + cudnnHandle_t handle, const void *alpha1, + const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, + void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, + const cudnnTensorDescriptor_t zDesc, const void *z, + const cudnnTensorDescriptor_t biasDesc, const void *bias, + const cudnnActivationDescriptor_t activationDesc, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, + size_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = + LoadSymbol("cudnnConvolutionBiasActivationForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, + workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, + activationDesc, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( + cudnnHandle_t handle, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( + cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionBwdDataAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, + returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( + cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t dxDesc, void *dx, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, + const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, + requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( + cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, + const cudnnTensorDescriptor_t diffDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionBwdDataAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, + requestedAlgoCount, returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( + cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( + cudnnHandle_t handle, const void *alpha, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnConvolutionDescriptor_t convDesc, + cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, + size_t workSpaceSizeInBytes, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, + size_t, const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, + workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFoldedConvBackwardDataDescriptors( + const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, + const cudnnTensorDescriptor_t diffDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t gradDesc, + const cudnnTensorFormat_t transformFormat, + cudnnFilterDescriptor_t foldedFilterDesc, + cudnnTensorDescriptor_t paddedDiffDesc, + cudnnConvolutionDescriptor_t foldedConvDesc, + cudnnTensorDescriptor_t foldedGradDesc, + cudnnTensorTransformDescriptor_t filterFoldTransDesc, + cudnnTensorTransformDescriptor_t diffPadTransDesc, + cudnnTensorTransformDescriptor_t gradFoldTransDesc, + cudnnTensorTransformDescriptor_t gradUnfoldTransDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorFormat_t, + cudnnFilterDescriptor_t, cudnnTensorDescriptor_t, + cudnnConvolutionDescriptor_t, cudnnTensorDescriptor_t, + cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t, + cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnGetFoldedConvBackwardDataDescriptors"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, + transformFormat, foldedFilterDesc, paddedDiffDesc, + foldedConvDesc, foldedGradDesc, filterFoldTransDesc, + diffPadTransDesc, gradFoldTransDesc, gradUnfoldTransDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsConstParamPack( + cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t *, + cudnnFusedOps_t); + static auto func_ptr = + LoadSymbol("cudnnCreateFusedOpsConstParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack, ops); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyFusedOpsConstParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsConstParamPackAttribute( + cudnnFusedOpsConstParamPack_t constPack, + cudnnFusedOpsConstParamLabel_t paramLabel, const void *param) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t, + cudnnFusedOpsConstParamLabel_t, + const void *); + static auto func_ptr = + LoadSymbol("cudnnSetFusedOpsConstParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack, paramLabel, param); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsConstParamPackAttribute( + const cudnnFusedOpsConstParamPack_t constPack, + cudnnFusedOpsConstParamLabel_t paramLabel, void *param, int *isNULL) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t, + void *, int *); + static auto func_ptr = + LoadSymbol("cudnnGetFusedOpsConstParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack, paramLabel, param, isNULL); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsVariantParamPack( + cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnFusedOpsVariantParamPack_t *, cudnnFusedOps_t); + static auto func_ptr = + LoadSymbol("cudnnCreateFusedOpsVariantParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack, ops); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyFusedOpsVariantParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsVariantParamPackAttribute( + cudnnFusedOpsVariantParamPack_t varPack, + cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t, + cudnnFusedOpsVariantParamLabel_t, void *); + static auto func_ptr = + LoadSymbol("cudnnSetFusedOpsVariantParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack, paramLabel, ptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsVariantParamPackAttribute( + const cudnnFusedOpsVariantParamPack_t varPack, + cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnFusedOpsVariantParamPack_t, + cudnnFusedOpsVariantParamLabel_t, void *); + static auto func_ptr = + LoadSymbol("cudnnGetFusedOpsVariantParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack, paramLabel, ptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, + cudnnFusedOps_t ops) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t *, cudnnFusedOps_t); + static auto func_ptr = LoadSymbol("cudnnCreateFusedOpsPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan, ops); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t); + static auto func_ptr = LoadSymbol("cudnnDestroyFusedOpsPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan); +} + +cudnnStatus_t CUDNNWINAPI +cudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan, + const cudnnFusedOpsConstParamPack_t constPack, + size_t *workspaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnFusedOpsPlan_t, const cudnnFusedOpsConstParamPack_t, + size_t *); + static auto func_ptr = LoadSymbol("cudnnMakeFusedOpsPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, plan, constPack, workspaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, + cudnnFusedOpsVariantParamPack_t varPack) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnFusedOpsPlan_t, + cudnnFusedOpsVariantParamPack_t); + static auto func_ptr = LoadSymbol("cudnnFusedOpsExecute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, plan, varPack); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v8( + cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t algo, cudnnRNNMode_t cellMode, + cudnnRNNBiasMode_t biasMode, cudnnDirectionMode_t dirMode, + cudnnRNNInputMode_t inputMode, cudnnDataType_t dataType, + cudnnDataType_t mathPrec, cudnnMathType_t mathType, int32_t inputSize, + int32_t hiddenSize, int32_t projSize, int32_t numLayers, + cudnnDropoutDescriptor_t dropoutDesc, uint32_t auxFlags) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDescriptor_t, cudnnRNNAlgo_t, cudnnRNNMode_t, cudnnRNNBiasMode_t, + cudnnDirectionMode_t, cudnnRNNInputMode_t, cudnnDataType_t, + cudnnDataType_t, cudnnMathType_t, int32_t, int32_t, int32_t, int32_t, + cudnnDropoutDescriptor_t, uint32_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, algo, cellMode, biasMode, dirMode, inputMode, + dataType, mathPrec, mathType, inputSize, hiddenSize, projSize, + numLayers, dropoutDesc, auxFlags); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor_v8( + cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t *algo, + cudnnRNNMode_t *cellMode, cudnnRNNBiasMode_t *biasMode, + cudnnDirectionMode_t *dirMode, cudnnRNNInputMode_t *inputMode, + cudnnDataType_t *dataType, cudnnDataType_t *mathPrec, + cudnnMathType_t *mathType, int32_t *inputSize, int32_t *hiddenSize, + int32_t *projSize, int32_t *numLayers, + cudnnDropoutDescriptor_t *dropoutDesc, uint32_t *auxFlags) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDescriptor_t, cudnnRNNAlgo_t *, cudnnRNNMode_t *, + cudnnRNNBiasMode_t *, cudnnDirectionMode_t *, cudnnRNNInputMode_t *, + cudnnDataType_t *, cudnnDataType_t *, cudnnMathType_t *, int32_t *, + int32_t *, int32_t *, int32_t *, cudnnDropoutDescriptor_t *, uint32_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, algo, cellMode, biasMode, dirMode, inputMode, + dataType, mathPrec, mathType, inputSize, hiddenSize, projSize, + numLayers, dropoutDesc, auxFlags); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, + const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, + cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, + cudnnRNNMode_t cellMode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, + cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, + cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, + inputMode, direction, cellMode, algo, mathPrec); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor_v6( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, + int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, + cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, + cudnnRNNMode_t *cellMode, cudnnRNNAlgo_t *algo, cudnnDataType_t *mathPrec) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, + cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, + cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor_v6"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, + inputMode, direction, cellMode, algo, mathPrec); +} + +cudnnStatus_t CUDNNWINAPI +cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, mType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( + cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, mType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, + cudnnRNNBiasMode_t biasMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNBiasMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, biasMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, + cudnnRNNBiasMode_t *biasMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNBiasMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, biasMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle, + cudnnRNNDescriptor_t rnnDesc, + cudnnRNNClipMode_t clipMode, + cudnnNanPropagation_t clipNanOpt, + double lclip, double rclip) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, + cudnnNanPropagation_t, double, double); + static auto func_ptr = LoadSymbol("cudnnRNNSetClip"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle, + cudnnRNNDescriptor_t rnnDesc, + cudnnRNNClipMode_t *clipMode, + cudnnNanPropagation_t *clipNanOpt, + double *lclip, double *rclip) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, + cudnnNanPropagation_t *, double *, double *); + static auto func_ptr = LoadSymbol("cudnnRNNGetClip"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); +} + +cudnnStatus_t CUDNNWINAPI +cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + const int recProjSize, const int outProjSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); + static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, recProjSize, outProjSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, + int *outProjSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, recProjSize, outProjSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( + cudnnRNNDescriptor_t rnnDesc, const int minibatch, + const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, + const cudnnDataType_t, + cudnnPersistentRNNPlan_t *); + static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, minibatch, dataType, plan); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); + static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( + cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, + cudnnPersistentRNNPlan_t); + static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, plan); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, + cudnnDataType_t dataType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, + size_t *, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, + cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, + const void *, const int, cudnnFilterDescriptor_t, void **); + static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, + linLayerMatDesc, linLayerMat); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, + cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, + const void *, const int, cudnnFilterDescriptor_t, void **); + static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, + linLayerBiasDesc, linLayerBias); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t *yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, + wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, + unsigned paddingMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, unsigned int); + static auto func_ptr = LoadSymbol("cudnnSetRNNPaddingMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, paddingMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, + unsigned *paddingMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, unsigned int *); + static auto func_ptr = LoadSymbol("cudnnGetRNNPaddingMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, paddingMode); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor( + cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType, + cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, + int vectorSize, + const int seqLengthArray[], /* length of each sequence in the batch */ + void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, + int, const int[], void *); + static auto func_ptr = LoadSymbol("cudnnSetRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, + vectorSize, seqLengthArray, paddingFill); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor( + cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t *dataType, + cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize, + int *vectorSize, int arrayLengthRequested, int seqLengthArray[], + void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, + int *, int *, int *, int, int[], void *); + static auto func_ptr = LoadSymbol("cudnnGetRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, + vectorSize, arrayLengthRequested, seqLengthArray, + paddingFill); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnRNNDataDescriptor_t yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, + const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ + const void *keys, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ + void *cAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ + void *iAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ + void *queries, /* reserved, should pass NULL */ + void *workSpace, size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, + const cudnnRNNDataDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, + void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardInferenceEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, + yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, + iDesc, iAttn, qDesc, queries, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + cudnnAlgorithmDescriptor_t algoDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNAlgorithmDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, algoDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetRNNForwardInferenceAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t *yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnAlgorithmPerformance_t *perfResults, void *workspace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, const float, const int, + int *, cudnnAlgorithmPerformance_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindRNNForwardInferenceAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, + wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, + requestedAlgoCount, returnedAlgoCount, perfResults, workspace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateSeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroySeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetSeqDataDescriptor( + cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims, + const int dimA[], const cudnnSeqDataAxis_t axes[], + size_t seqLengthArraySize, const int seqLengthArray[], void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnSeqDataDescriptor_t, cudnnDataType_t, int, const int[], + const cudnnSeqDataAxis_t[], size_t, const int[], void *); + static auto func_ptr = LoadSymbol("cudnnSetSeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc, dataType, nbDims, dimA, axes, seqLengthArraySize, + seqLengthArray, paddingFill); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetSeqDataDescriptor( + const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t *dataType, + int *nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[], + size_t *seqLengthArraySize, size_t seqLengthSizeRequested, + int seqLengthArray[], void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnSeqDataDescriptor_t, cudnnDataType_t *, int *, int, int[], + cudnnSeqDataAxis_t[], size_t *, size_t, int[], void *); + static auto func_ptr = LoadSymbol("cudnnGetSeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc, dataType, nbDims, nbDimsRequested, dimA, axes, + seqLengthArraySize, seqLengthSizeRequested, seqLengthArray, + paddingFill); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetAttnDescriptor( + cudnnAttnDescriptor_t attnDesc, unsigned attnMode, int nHeads, + double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec, + cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc, + cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize, + int qProjSize, int kProjSize, int vProjSize, int oProjSize, + int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnAttnDescriptor_t, unsigned int, int, double, cudnnDataType_t, + cudnnDataType_t, cudnnMathType_t, cudnnDropoutDescriptor_t, + cudnnDropoutDescriptor_t, int, int, int, int, int, int, int, int, int, + int, int); + static auto func_ptr = LoadSymbol("cudnnSetAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc, attnMode, nHeads, smScaler, dataType, computePrec, + mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, + vSize, qProjSize, kProjSize, vProjSize, oProjSize, + qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetAttnDescriptor( + cudnnAttnDescriptor_t attnDesc, unsigned *attnMode, int *nHeads, + double *smScaler, cudnnDataType_t *dataType, cudnnDataType_t *computePrec, + cudnnMathType_t *mathType, cudnnDropoutDescriptor_t *attnDropoutDesc, + cudnnDropoutDescriptor_t *postDropoutDesc, int *qSize, int *kSize, + int *vSize, int *qProjSize, int *kProjSize, int *vProjSize, int *oProjSize, + int *qoMaxSeqLength, int *kvMaxSeqLength, int *maxBatchSize, + int *maxBeamSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnAttnDescriptor_t, unsigned int *, int *, double *, cudnnDataType_t *, + cudnnDataType_t *, cudnnMathType_t *, cudnnDropoutDescriptor_t *, + cudnnDropoutDescriptor_t *, int *, int *, int *, int *, int *, int *, + int *, int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc, attnMode, nHeads, smScaler, dataType, computePrec, + mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, + vSize, qProjSize, kProjSize, vProjSize, oProjSize, + qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnBuffers( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + size_t *weightSizeInBytes, size_t *workSpaceSizeInBytes, + size_t *reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, size_t *, size_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnBuffers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, weightSizeInBytes, workSpaceSizeInBytes, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnWeights( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes, + const void *weights, cudnnTensorDescriptor_t wDesc, void **wAddr) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, + cudnnMultiHeadAttnWeightKind_t, size_t, const void *, + cudnnTensorDescriptor_t, void **); + static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnWeights"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, wKind, weightSizeInBytes, weights, wDesc, + wAddr); +} + +cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnForward( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx, + const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsQO[], + const int devSeqLengthsKV[], const cudnnSeqDataDescriptor_t qDesc, + const void *queries, const void *residuals, + const cudnnSeqDataDescriptor_t kDesc, const void *keys, + const cudnnSeqDataDescriptor_t vDesc, const void *values, + const cudnnSeqDataDescriptor_t oDesc, void *out, size_t weightSizeInBytes, + const void *weights, size_t workSpaceSizeInBytes, void *workSpace, + size_t reserveSpaceSizeInBytes, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, int, const int[], const int[], + const int[], const int[], const cudnnSeqDataDescriptor_t, const void *, + const void *, const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, void *, size_t, const void *, size_t, + void *, size_t, void *); + static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, currIdx, loWinIdx, hiWinIdx, + devSeqLengthsQO, devSeqLengthsKV, qDesc, queries, residuals, + kDesc, keys, vDesc, values, oDesc, out, weightSizeInBytes, + weights, workSpaceSizeInBytes, workSpace, + reserveSpaceSizeInBytes, reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnAdvInferVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnAdvInferVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( + cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, + dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( + cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, + beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( + cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnActivationDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnActivationBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, + beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, + x, beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, + cudnnDivNormMode_t mode, const void *alpha, + const cudnnTensorDescriptor_t + xDesc, /* same desc for x, means, dy, temp, temp2 */ + const void *x, + const void *means, /* if NULL, means are assumed to be zero */ + const void *dy, void *temp, void *temp2, const void *beta, + const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ + void *dx, /* output x differential */ + void *dMeans) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, const void *, + void *, void *, const void *, const cudnnTensorDescriptor_t, void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnDivisiveNormalizationBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, + temp2, beta, dXdMeansDesc, dx, dMeans); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, + const cudnnTensorDescriptor_t yDesc, + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, + const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnActivationDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol( + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, + bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, + const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, + const cudnnTensorDescriptor_t dxDesc, + const cudnnTensorDescriptor_t dBnScaleBiasDesc, + const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnActivationDescriptor_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetBatchNormalizationBackwardExWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, + dBnScaleBiasDesc, activationDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + const cudnnActivationDescriptor_t activationDesc, + const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, + const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, + size_t *); + static auto func_ptr = LoadSymbol( + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, + + const void *alpha, /* alpha[0] = result blend factor */ + const void *beta, /* beta[0] = dest layer blend factor */ + + const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ + const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ + + /* Shared desc for the next 6 tensors in the argument list. + Data type to be set as follows: + type = (typeOf(x) == double) ? double : float + Dimensions for this descriptor depend on normalization mode + - Spatial Normalization : tensors are expected to have dims 1xCx1x1 + (normalization is performed across NxHxW) + - Per-Activation Normalization : tensors are expected to have dims of + 1xCxHxW (normalization is performed across N) */ + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, + + /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation + */ + const void *bnScale, const void *bnBias, + + /* MUST use factor=1 in the very first call of a complete training cycle. + Use a factor=1/(1+n) at N-th call to the function to get + Cumulative Moving Average (CMA) behavior + CMA[n] = (x[1]+...+x[n])/n + Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = + ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = + CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ + double exponentialAverageFactor, + + /* Used in Training phase only. + runningMean = newMean*factor + runningMean*(1-factor) */ + void *resultRunningMean, + /* Output in training mode, input in inference. Is the moving average + of variance[x] (factor is applied in the same way as for runningMean) */ + void *resultRunningVariance, + + /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and + backward functions. */ + double epsilon, + + /* Optionally save intermediate results from the forward pass here + - can be reused to speed up backward pass. NULL if unused */ + void *resultSaveMean, void *resultSaveInvVariance) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, const void *, double, void *, void *, double, void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationForwardTraining"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr( + handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, + bnScale, bnBias, exponentialAverageFactor, resultRunningMean, + resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + + const void *alpha, /* alpha[0] = result blend factor */ + const void *beta, /* beta[0] = dest layer blend factor */ + + const cudnnTensorDescriptor_t xDesc, const void *xData, + const cudnnTensorDescriptor_t zDesc, const void *zData, + const cudnnTensorDescriptor_t yDesc, void *yData, + + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, + const void *bnBias, + + double exponentialAverageFactor, void *resultRunningMean, + void *resultRunningVariance, + + /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and + backward functions. */ + double epsilon, + + /* Optionally save intermediate results from the forward pass here + - can be reused to speed up backward pass. NULL if unused */ + void *resultSaveMean, void *resultSaveInvVariance, + + cudnnActivationDescriptor_t activationDesc, void *workspace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, const void *, double, void *, void *, double, void *, + void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationForwardTrainingEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, + yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, + exponentialAverageFactor, resultRunningMean, + resultRunningVariance, epsilon, resultSaveMean, + resultSaveInvVariance, activationDesc, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, + const void *betaDataDiff, const void *alphaParamDiff, + const void *betaParamDiff, + const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ + const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t dxDesc, void *dx, + /* Shared tensor desc for the 4 tensors below */ + const cudnnTensorDescriptor_t dBnScaleBiasDesc, + const void *bnScale, /* bnBias doesn't affect backpropagation */ + /* scale and bias diff are not backpropagated below this layer */ + void *dBnScaleResult, void *dBnBiasResult, + /* Same epsilon as forward pass */ + double epsilon, + + /* Optionally cached intermediate results from + forward pass */ + const void *savedMean, const void *savedInvVariance) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, + const void *, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, void *, void *, double, const void *, const void *); + static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, + betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, + dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, + epsilon, savedMean, savedInvVariance); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + + const void *alphaDataDiff, const void *betaDataDiff, + const void *alphaParamDiff, const void *betaParamDiff, + const cudnnTensorDescriptor_t xDesc, const void *xData, + const cudnnTensorDescriptor_t yDesc, const void *yData, + const cudnnTensorDescriptor_t dyDesc, const void *dyData, + const cudnnTensorDescriptor_t dzDesc, void *dzData, + const cudnnTensorDescriptor_t dxDesc, void *dxData, + + /* Shared tensor desc for the 4 tensors below */ + const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData, + const void *bnBiasData, /* needed if there is activation */ + void *dBnScaleData, void *dBnBiasData, + double epsilon, /* Same epsilon as forward pass */ + + /* Optionally cached intermediate results from + forward pass */ + const void *savedMean, const void *savedInvVariance, + cudnnActivationDescriptor_t activationDesc, void *workSpace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, + const void *, const void *, const void *, const cudnnTensorDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, + void *, double, const void *, const void *, cudnnActivationDescriptor_t, + void *, size_t, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationBackwardEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr( + handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, + betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, + dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, + dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, + workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( + cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, + const void *dgrid, void *dtheta) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, dgrid, dtheta); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( + cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, + const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, + const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, const void *, + void *); + static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, + dyDesc, dy, grid, betaDgrid, dgrid); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( + cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, + const cudnnTensorDescriptor_t dydesc, const void *dy, + const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnDropoutDescriptor_t, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnOpsTrainVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnOpsTrainVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( + cudnnHandle_t handle, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnFilterDescriptor_t, const int, int *, + cudnnConvolutionBwdFilterAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, + returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t dyDesc, const void *y, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t dwDesc, void *dw, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, + const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, + requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( + cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, + const cudnnTensorDescriptor_t diffDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnFilterDescriptor_t, const int, int *, + cudnnConvolutionBwdFilterAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, + requestedAlgoCount, returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t gradDesc, + cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnConvolutionDescriptor_t convDesc, + cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, + size_t workSpaceSizeInBytes, const void *beta, + const cudnnFilterDescriptor_t dwDesc, void *dw) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, + void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, + workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, + const cudnnTensorDescriptor_t dbDesc, void *db) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t *yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, + size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, + wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *yDesc, + const void *y, const cudnnTensorDescriptor_t *dyDesc, + const void *dy, const cudnnTensorDescriptor_t dhyDesc, + const void *dhy, const cudnnTensorDescriptor_t dcyDesc, + const void *dcy, const cudnnFilterDescriptor_t wDesc, + const void *w, const cudnnTensorDescriptor_t hxDesc, + const void *hx, const cudnnTensorDescriptor_t cxDesc, + const void *cx, const cudnnTensorDescriptor_t *dxDesc, + void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, + const cudnnTensorDescriptor_t dcxDesc, void *dcx, + void *workspace, size_t workSpaceSizeInBytes, + void *reserveSpace, size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, + size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, + dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, + dx, dhxDesc, dhx, dcxDesc, dcx, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, + size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, + const void *reserveSpace, size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, const void *, const void *, size_t, + const cudnnFilterDescriptor_t, void *, const void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, + workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnRNNDataDescriptor_t yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, + const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ + const void *keys, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ + void *cAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ + void *iAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ + void *queries, /* reserved, should pass NULL */ + void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, + const cudnnRNNDataDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, + void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, + size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardTrainingEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, + yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, + iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, + reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t yDesc, const void *y, + const cudnnRNNDataDescriptor_t dyDesc, const void *dy, + const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */ + const void *dcAttn, /* reserved, should pass NULL */ + const cudnnTensorDescriptor_t dhyDesc, const void *dhy, + const cudnnTensorDescriptor_t dcyDesc, const void *dcy, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnRNNDataDescriptor_t dxDesc, void *dx, + const cudnnTensorDescriptor_t dhxDesc, void *dhx, + const cudnnTensorDescriptor_t dcxDesc, void *dcx, + const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */ + void *dkeys, /* reserved, should pass NULL */ + void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnRNNDataDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, + const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardDataEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, + dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, + dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, + workSpace, workSpaceSizeInBytes, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace, + size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, + void *reserveSpace, size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, const void *, void *, size_t, + const cudnnFilterDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeightsEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, + workSpaceSizeInBytes, dwDesc, dw, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetRNNForwardTrainingAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t *yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnAlgorithmPerformance_t *perfResults, void *workspace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, const float, const int, + int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindRNNForwardTrainingAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, + wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, + requestedAlgoCount, returnedAlgoCount, perfResults, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetRNNBackwardDataAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y, + const cudnnTensorDescriptor_t *dyDesc, const void *dy, + const cudnnTensorDescriptor_t dhyDesc, const void *dhy, + const cudnnTensorDescriptor_t dcyDesc, const void *dcy, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnTensorDescriptor_t *dxDesc, void *dx, + const cudnnTensorDescriptor_t dhxDesc, void *dhx, + const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnAlgorithmPerformance_t *perfResults, void *workspace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, const float, const int, + int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindRNNBackwardDataAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, + dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, + dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, + requestedAlgoCount, returnedAlgoCount, perfResults, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetRNNBackwardWeightsAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t *yDesc, const void *y, + const float findIntensity, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults, + const void *workspace, size_t workSpaceSizeInBytes, + const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, const void *, const float, const int, + int *, cudnnAlgorithmPerformance_t *, const void *, size_t, + const cudnnFilterDescriptor_t, void *, const void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindRNNBackwardWeightsAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, + findIntensity, requestedAlgoCount, returnedAlgoCount, + perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, + reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardData( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsDQDO[], + const int devSeqLengthsDKDV[], const cudnnSeqDataDescriptor_t doDesc, + const void *dout, const cudnnSeqDataDescriptor_t dqDesc, void *dqueries, + const void *queries, const cudnnSeqDataDescriptor_t dkDesc, void *dkeys, + const void *keys, const cudnnSeqDataDescriptor_t dvDesc, void *dvalues, + const void *values, size_t weightSizeInBytes, const void *weights, + size_t workSpaceSizeInBytes, void *workSpace, + size_t reserveSpaceSizeInBytes, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, const int[], const int[], + const int[], const int[], const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, void *, const void *, + const cudnnSeqDataDescriptor_t, void *, const void *, + const cudnnSeqDataDescriptor_t, void *, const void *, size_t, + const void *, size_t, void *, size_t, void *); + static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnBackwardData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, loWinIdx, hiWinIdx, devSeqLengthsDQDO, + devSeqLengthsDKDV, doDesc, dout, dqDesc, dqueries, queries, + dkDesc, dkeys, keys, dvDesc, dvalues, values, + weightSizeInBytes, weights, workSpaceSizeInBytes, workSpace, + reserveSpaceSizeInBytes, reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardWeights( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc, + const void *queries, const cudnnSeqDataDescriptor_t kDesc, const void *keys, + const cudnnSeqDataDescriptor_t vDesc, const void *values, + const cudnnSeqDataDescriptor_t doDesc, const void *dout, + size_t weightSizeInBytes, const void *weights, void *dweights, + size_t workSpaceSizeInBytes, void *workSpace, + size_t reserveSpaceSizeInBytes, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnWgradMode_t, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, size_t, const void *, + void *, size_t, void *, size_t, void *); + static auto func_ptr = + LoadSymbol("cudnnMultiHeadAttnBackwardWeights"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, addGrad, qDesc, queries, kDesc, keys, vDesc, + values, doDesc, dout, weightSizeInBytes, weights, dweights, + workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, + reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptorEx( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, + cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, + cudnnNanPropagation_t); + static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor_v8( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, + cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode, + int maxLabelLength) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, + cudnnNanPropagation_t, int); + static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode, maxLabelLength); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptorEx( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, + cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t *, + cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor_v8( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, + cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode, + int *maxLabelLength) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t *, + cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *, int *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode, maxLabelLength); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( + cudnnHandle_t handle, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the timing steps, N is the + mini batch size, A is the alphabet size) */ + const void *probs, /* probabilities after softmax, in GPU memory */ + const int hostLabels[], /* labels, in CPU memory */ + const int hostLabelLengths[], /* the length of each label, in CPU memory */ + const int hostInputLengths[], /* the lengths of timing steps in each batch, + in CPU memory */ + void *costs, /* the returned costs of CTC, in GPU memory */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the dimensions are + T,N,A */ + void *gradients, /* the returned CTC gradients, in GPU memory, to compute + costs only, set it to NULL */ + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, + void *workspace, /* pointer to the workspace, in GPU memory */ + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int[], + const int[], const int[], void *, const cudnnTensorDescriptor_t, void *, + cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnCTCLoss"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, probsDesc, probs, hostLabels, hostLabelLengths, + hostInputLengths, costs, gradientsDesc, gradients, algo, + ctcLossDesc, workspace, workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnCTCLoss_v8( + cudnnHandle_t handle, + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the timing steps, N is the + mini batch size, A is the alphabet size) */ + const void *probs, /* probabilities after softmax, in GPU memory */ + const int labels[], /* labels, in GPU memory */ + const int labelLengths[], /* the length of each label, in GPU memory */ + const int inputLengths[], /* the lengths of timing steps in each batch, in + GPU memory */ + void *costs, /* the returned costs of CTC, in GPU memory */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the dimensions are + T,N,A */ + void *gradients, /* the returned CTC gradients, in GPU memory, to compute + costs only, set it to NULL */ + size_t workSpaceSizeInBytes, /* size of the workspace */ + void *workspace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, + const cudnnTensorDescriptor_t, const void *, const int[], const int[], + const int[], void *, const cudnnTensorDescriptor_t, void *, size_t, + void *); + static auto func_ptr = LoadSymbol("cudnnCTCLoss_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, ctcLossDesc, probsDesc, probs, labels, + labelLengths, inputLengths, costs, gradientsDesc, gradients, + workSpaceSizeInBytes, workspace); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( + cudnnHandle_t handle, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the + timing steps, N is the mini batch size, A is the alphabet + size) */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the + dimensions are T,N,A. To compute costs + only, set it to NULL */ + const int *labels, /* labels, in CPU memory */ + const int *labelLengths, /* the length of each label, in CPU memory */ + const int *inputLengths, /* the lengths of timing steps in each batch, in + CPU memory */ + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const int *, const int *, const int *, + cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, + inputLengths, algo, ctcLossDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize_v8( + cudnnHandle_t handle, + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the + timing steps, N is the mini batch size, A is the alphabet + size) */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the + dimensions are T,N,A. To compute costs + only, set it to NULL */ + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, ctcLossDesc, probsDesc, gradientsDesc, + sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnAdvTrainVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnAdvTrainVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +} // extern "C" diff --git a/tensorflow/stream_executor/cuda/cudnn_stub.cc b/tensorflow/stream_executor/cuda/cudnn_stub.cc index f683cecdb52..e30f749897e 100644 --- a/tensorflow/stream_executor/cuda/cudnn_stub.cc +++ b/tensorflow/stream_executor/cuda/cudnn_stub.cc @@ -51,15 +51,17 @@ cudnnStatus_t GetSymbolNotFoundError() { return CUDNN_STATUS_INTERNAL_ERROR; } #error cuDNN version earlier than 6 is not supported. #elif CUDNN_MAJOR < 7 #include "tensorflow/stream_executor/cuda/cudnn_6_0.inc" -#elif CUDNN_MINOR < 1 +#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 1 #include "tensorflow/stream_executor/cuda/cudnn_7_0.inc" // 2 instead of 3: see https://github.com/tensorflow/tensorflow/issues/32350 -#elif CUDNN_MINOR < 2 +#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 2 #include "tensorflow/stream_executor/cuda/cudnn_7_1.inc" -#elif CUDNN_MINOR < 4 +#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 4 #include "tensorflow/stream_executor/cuda/cudnn_7_3.inc" -#elif CUDNN_MINOR < 6 +#elif CUDNN_MAJOR == 7 && CUDNN_MINOR < 6 #include "tensorflow/stream_executor/cuda/cudnn_7_4.inc" -#else +#elif CUDNN_MAJOR == 7 #include "tensorflow/stream_executor/cuda/cudnn_7_6.inc" +#else +#include "tensorflow/stream_executor/cuda/cudnn_8_0.inc" #endif diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 203434ab3f4..a0aefa77e9d 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -1069,11 +1069,32 @@ def _create_local_cuda_repository(repository_ctx): ], )) + # Select the headers based on the cuDNN version (strip '64_' for Windows). + if cuda_config.cudnn_version.rsplit("_", 1)[0] < "8": + cudnn_headers = ["cudnn.h"] + else: + cudnn_headers = [ + "cudnn_adv_infer.h", + "cudnn_adv_train.h", + "cudnn_cnn_infer.h", + "cudnn_cnn_train.h", + "cudnn_ops_infer.h", + "cudnn_ops_train.h", + "cudnn.h", + "cudnn_version.h", + ] + + cudnn_srcs = [] + cudnn_outs = [] + for header in cudnn_headers: + cudnn_srcs.append(cudnn_header_dir + "/" + header) + cudnn_outs.append("cudnn/include/" + header) + copy_rules.append(make_copy_files_rule( repository_ctx, name = "cudnn-include", - srcs = [cudnn_header_dir + "/cudnn.h"], - outs = ["cudnn/include/cudnn.h"], + srcs = cudnn_srcs, + outs = cudnn_outs, )) # Set up BUILD file for cuda/ diff --git a/third_party/gpus/find_cuda_config.py b/third_party/gpus/find_cuda_config.py index d768d4e3570..091cd32d5fe 100644 --- a/third_party/gpus/find_cuda_config.py +++ b/third_party/gpus/find_cuda_config.py @@ -219,17 +219,20 @@ def _find_library(base_paths, library_name, required_version): return _find_file(base_paths, _library_paths(), filepattern) -def _find_versioned_file(base_paths, relative_paths, filepattern, +def _find_versioned_file(base_paths, relative_paths, filepatterns, required_version, get_version): """Returns first valid path to a file that matches the requested version.""" + if type(filepatterns) not in [list, tuple]: + filepatterns = [filepatterns] for path in _cartesian_product(base_paths, relative_paths): - for file in glob.glob(os.path.join(path, filepattern)): - actual_version = get_version(file) - if _matches_version(actual_version, required_version): - return file, actual_version + for filepattern in filepatterns: + for file in glob.glob(os.path.join(path, filepattern)): + actual_version = get_version(file) + if _matches_version(actual_version, required_version): + return file, actual_version raise _not_found_error( base_paths, relative_paths, - filepattern + " matching version '%s'" % required_version) + ", ".join(filepatterns) + " matching version '%s'" % required_version) def _find_header(base_paths, header_name, required_version, get_version): @@ -426,12 +429,13 @@ def _find_cufft_config(base_paths, required_version, cuda_version): def _find_cudnn_config(base_paths, required_version): def get_header_version(path): - version = ( + version = [ _get_header_version(path, name) - for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL")) - return ".".join(version) + for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL")] + return ".".join(version) if version[0] else None - header_path, header_version = _find_header(base_paths, "cudnn.h", + header_path, header_version = _find_header(base_paths, + ("cudnn.h", "cudnn_version.h"), required_version, get_header_version) cudnn_version = header_version.split(".")[0] diff --git a/third_party/gpus/find_cuda_config.py.gz.base64 b/third_party/gpus/find_cuda_config.py.gz.base64 index ae3ee37c077..981219bb10a 100644 --- a/third_party/gpus/find_cuda_config.py.gz.base64 +++ b/third_party/gpus/find_cuda_config.py.gz.base64 @@ -1 +1 @@ -eJzdPGtz4zaS3/UrcJxzmZqRaTu7ldrTrXPl2DMX7/rsKVszuS3bq4VJSGaGInUkJVu7lf9+3Q2ABEBS8iOTuOKqTEQQ3ehu9BMPvmFH2XyVx9O7kn2zt/8fbHQn2EikRZZ/SLJ7drgo77K8CNhhkrAL7FawC1GIfCmioPem94adxiF0FxFbpJHIWQnwh3Mewv/UmwH7LPIizlL2TbDHfOzgqVde/z8BwypbsBlfsTQr2aIQgCIu2CROBBMPoZiXLE5ZmM3mSczTULD7uLyjYRQSIIP9TaHIbksOvTn0n8PTxOzHeEkE499dWc6Hu7v39/cBJ2KDLJ/uJrJjsXt6cvT+7PL9DhBMIJ/SRBQFy8X/LeIcWL1dMT4HekJ+C1Qm/J5lOePTXMC7MkN67/O4jNPpgBXZpLznuQAsUVyUeXy7KC1haeqAZ7MDiIunzDu8ZCeXHvv+8PLkcgA4fjwZ/XD+acR+PLy4ODwbnby/ZOcX7Oj87PhkdHJ+Bk8f2OHZ39hfT86OB0yAqGAY8TDPkX4gMkYx0tSxSyEsAiaZJKiYizCexCHwlU4XfCrYNFuKPAV22Fzks7jAySyAvAiwJPEsLnlJLQ2mcJiDX/Sv53nexzxOQQ2PPh0fwvC3Oc9XSAy7ExzHj2CKwjLLY0E0sqXUPlCpDAhEwRKXq6IUs6DXQ4UvwjwGPSsEz0EXChJFF3pUzMLGMoAZR6mVRQ8aZ6gCkShRVCmJOM41EYRoLulH+DBLJ/F0kZMAEa4oo2xRBkTVnJd3hdQnwk7ACFXpYcUaKJieN1TBuzxbTO+YSJdxnqUzkZa9Jc9j1FYw5ZMJmBpb8iSOHAJiJaSBZE5KRZNLxIk8p4nPRbnISQkYNIG4wiwSSpoJqDHanhQeTgPATmIgHvDXVHIke7pA6oCoy8V8nuWo+TUYmg1Ngx+nYbKIoClcfH96eNkfwI/js7MBOzs6Oh2QYKTTuhjZU1ryL4iooumWg6abGlLTA0aNXI8+jHHM8cfD0Q+XPUOETIsQKQd/NOM7hZhzEB0AT5PslgYJmDF6kmVfpDZJ5Sl6SKlWKqlJ5K7ueB7toAgjUEIitFjcmmRO8myG5AH1xALpRtCDqbToRQmjG624Atmw88vapiMx4YukxH7gaqNhr8fAWNPFwxC84u6iyHeTLOTJbriI+IAapHSlLhIV20kktYbtzLcDgP8Rpja7L4asooO1iW0A/qfH6r+j4fX1xzyb5nzGPqAorq/PPp8cnxyy//74CcLSbL5AF8pGWZZ8icvra0R/ff221/sAAr3l4RfwqxFNBVAX38ZJXK7Q4c6EqUNJkVFU4QlYZAodl1KEvYZ4M+ksW0iHWaJQsrINLQDRkVr11JTuVHJuxaKkLWefVWER+ta6xQ6jKEZj5ElDWQF654V/gIIsiv606nx+f3EJwUPPCzaNzs9P/3oyotkEGGl2FQw+WFDtMGChxjhnZ/ZAsunk7HJ0eHqqgdCeK+LwwSGOmkwY6QLGPxxfaBTaERCKEcTx84uLkYGmarKG7n3WvrRdAULwdLdCpxRg0mAID9sYUrcfgtU2ujh0ywL0gfcqNVAOGiLxbF6usPciNfx0xnhIGQ5PV7qv8l8QBUD/cTyuApd2G2C6BhXDXjkZ/1m9+26skAwZEBX8034nfdAYFGrIgiCwX6of9VsItL0epAvglFmc6V9ZoX+hx9O/5wkvkRj9DH5b/QI3Ns+zELKPqmVV9CAtmK8SCCtDzHlQwAfTHfl+B9zXDi93ymzeK/PVECaUjLO4A2+QMIXk/i4O73oqPTyhtvcYhqrumEohQBEUc36fajiMQmPxIMJFqT25RFUTJNJOenq9MOGQRx2R+6MB/fdEA4i8j2PP4TV0Ay/LxnEBQgXP6tMbGTArSQUyc/D7kB9hOgz9PAPwXrrUR4Aq52sCz3iYPQb0mOcwTgU54yUmP1qFfB6WC56MKxXWWYduoQFAS47uRPgF5SjIf5L/1enOTAiV6ShoGe17Kg5gUSES2aiV2h2GpSqrBuvDNBbNMqbsSyGx6axw418Dl9MZexQQDg2Qp3hS/bfP3L/9wG4b5QthAgTfuADO8weIWxsggj+shWj+uWMQUXom8mkxrKBduQ0pqdFCa6RMEF7zoAK2RWyDYlocYr6k8xkZjSDBLXmSUBKqEV3IDHPIflSKhb0l7lq9pMraSXGWikBqJvwLcduZcnCfZ9BFMqvsQ8oOWqAqggpsDizy0Mi6qkQbETv4DlwFxBpu7vd7Lep30GiyehMtDWw8Lwusef2GAWrL5eU4Ebwon2a6SpYH7Ar8nr/sU566pHzUIQFKxtL3Aq9/YzDVBtjkzgE1OGTf1dLQjExFqaOUZgUTzwFL+Uxof6MUA52BCi0wPIBDsUXZ9JFZqQVSDZDCBKsxeB9nQTYXGrOXexCdU8i9Idc88BblZOdPXj/IAQUCKD/KpKrRBAb00/feyDHZVsHe+dfRu77HtojQAQ3VJzDQP+qubUuJgNqCKZRpc3/fmHyv8sYhTLsoYp6OgcloEZb+JM6LEvJbAdl35AoDrIdydMyGb+NUFeTgUgmKUngJqASixrvKigDBgp+yOPUngF3O5gQFJUHxkeQq4W/MuUqisSwFxlQf+G1UNdJsu36ozBTLFidm1mSi9mgwGgtmggK37+lmr4/plbdbAPu7VSPAqUzqwMhFghBj1li+8a8szKASO3Pvpk/RvMSaQc46lhmgUKDMb9nBd8wP3vY9OXMYQRC9KMmOTV2TI0gzsNRJpTamZqnRlHph7yASWBP6Hi/COPb6UqVU1vMpjfHlMXWpkh8TYR0RmlqINAc8inytADBNqLu+rZl9QzcLKs99LHJ9iaDfN5VBlZZjLB2VPtBP0+eoeTbbJU1mC5DuvcWZE4nqDw6EFgE3gVmP7wAM0MhRG0lVrVtV4AJRqNw/AH58K5Z6VW3rDZwXzytjl1vQETyGhcySF70x3OaV59TnO1sFuhwTCJTX6eSpJpNsp0+aejcgrXaL1jOs3HK7nTeXL3KRyFK7WsTCtRBjKUSufjjeSJHoVcR6auGn0aB4c1vf7pD72Jmmi/qleChzXuweffo4Otldi1D2oXeVn9OF0S/AubXws5F16P3tH61H66GdV3zxYIJZzL+1WQPTGlNtOaY1PB/XGySfg4oN/Yw0Kw9FMqj7orl6MoBcedepVrLtrWIb1ZO8NXJPP+LKj9QI+jfSy5gDPhWpDX1juC2zWNNCOcoWSUSehZYlsfrekitx8MtYeVsNwUrTbEL/q43VN4TRlJTBWGU9VHci1JNkbDLYkhB0o1IejiI57qMAPJbrAf7jWyFfRjxz4L6TquA7lCaPC/F8lbEEoezAAtZmJhOorlpTm51MTeQSNkkIzEwXAaKoV5FXdX7REgEMAkHfTAooeKDF6Sjk1NUurLdVvA2iFUKAdmLCq3WXmt5ZyMHZrq3T7L81OfXVcP+GIrSAQN9K1lo6IDIUmbeBmk3Do6TeGo6sQ9VdN7pGO9RA4mkGM+hmwmVhwDDSPUGzuLSi8o6XjZJTapsWTl1t/HZ226hPDW7Jc/VVx/i5iz4N/zBwBu32Fwp6zaSqHqYig4pJuWMepfnSoaCzKiZlksmGJXKVybQ7mmfohspnHq0dlqGsUXYn5bIJ3+BA1rNlSYjSR5nzOZrZmHkYFOE6qnSlGrXe4epAd0nvmVsPurZR+b6V41cSw2UbM3P3tqJgKyKHqwfd3WX7e3t7g4qKLXqW7f1eT6XYhmQrsdZ0d2qOh8DBnbdJ+o+akWfgaMqy32vUQA477A1uBeCygWD8NluKoJKC6ZIrtttiM/Gdl97AKVAMfUiXYdimDUYsOgJYJqtovd2dJdL+BW50XUfvrgP4Z8A++/o3/dP3Kq+o6+rOUl5X8DuKFijkW6rvluWcKp3rLrybdXT3eo6ttNBAAqLUAkSBD4F4EF4jMaFQLjt4GkotgRkSrmZrjftoqShu41QWAIOanIFTP7qT2ZfEL2e2mnQPh313QYsisYxDo84qoFRyyzasVqDijmK+QwVtKWtkBxzI9aqWt8H+XnAbesqY52VsWnM3fQ1v6hEwWHO/QvQUg4D+XouTrIwLF+HkdhpgchdZqmnta+Ea22+t/ZX8+3KR+n9PD/XQMtYowVVRicqaefnA5RGYSnoBgY/Oj8/9sIin0/6QtqzQpG4zADUX63BRHmQWaI7UGFV9pmO5JjbNctyIv7MTFUcYILsAUsf+4FHArmgIerdC0JeZfZO6q70b9m8HbS/2b9TiD6UnZm3onaQQBYsYmE5LuS1hSnWIFeKyYLTm0uG7m+PRfClP8C+t9abBeUPb/qw+amkCOYd+rlK4kaxvAxti6wI29d2BrmdMU2jMoWnqzjiNGauxooVt4sgx574Dvokn14gdpsyp0Wy5eoMQPzsJ0m3Ci8ekSE58HOq1x8b+jLNkBx5t35PdH5NhmTmWuUq5YfvE6ImhlGIR2L3v1ec5xv9z+JfzC4+Ss7rt5AzbuvIVs+vHw9HRD3W41HmarkJNJ8nY85Mwmg8+j5+Tir04D2tPwtCtqkMyi0I55Rn/KatP/mVpsgrUojUx0JWymRX23k3PKu/f4G5qLuQxrwp1nE6gEFlySKzwUMOtgPlVO6uoWgP206JQS110bAT3xcwpaF9AdyfpabHVmCNHPsZGLJ0yRD3k6jinfudDztrvkJY7gyihp0RuREeprIm3w1GbXcBj2PIaOP3WO7cOt0aQ6/1aw6P9bLunIkuAoq/qoPaDvd/UQV2en6J/cVyU2brRSRmdfyU3Vc3LbAZW/Zp8VUXbc3zQV3QarsQ6qX2pD5AIyQvYuDv8gN1pnSdQPZ/lCxTsy7xBztPod+0LLg7Pjl1PULdt9ANV11/JC+B8vDLjJxV5ZaavxNRB4kvtHdGRtZt4O2zd7LLO0qnfs+ycIF9m5ZNJ+bs28g8fRq6NV00bTVz3/JUsHObilRk4ascrs28ppHYCX2rdgI2M28DaYdtGj3Wmjd2eZdkI+HTDtndj0vTrbMfUhvlYs3SNEu9rGAZJj9IY9SNZ3On7z+9PtdmtM7oXbMOkz0ioX2hwnbsvMGGPN7YnbbqkqdxzqUfoWlCse8iFrfp5YPd6lloj4AvVupjzHJj7Pcesy4+HF5fvG0Wq0bq5SK07/1pFKs3LK4tfSlleWQirRNVJ5ovLUkIoy1ILd1dZanVaW5bKns8rSyXsixxAGobJ6wxrdH2wslf5pIMaPVmG+DXCGcrmlUQzmqavEcwQMUjUxN+q02YH3E4yHgdWn2doMsG9SItLulqaP6r0+g00+XN93bVWaLNxXfyxelo6/4b9QxHxD1y058BQKnJewuDZ7U8ixEspGbunrxlAx5LFpTwZRZfu1W7EoojTqUIH7/3ZIi7jOZ6qi2eigF5Jdq9W/GuJ0F0DKzaoA0I8SfxaztogqgMXOMhn+Vreh1dfNZjxL6Jx5o3pbxnQfYlgo5lXVzeea+9ny5N0IvJXFHPVlRJjO7zabtKXqb9ls2wpIvM2HO075bPq2xWKLyX44C74JaRUYXslwkIF0F7gkY7ySX5yGSPXYLnuKK3u0u0ELs1tGjT6PsN1VrDPdZ9oyWO8/jUW6dKH/9ThI3Vp6ODqxj1s6X5hg5wJXSBru6Bfn/PWuOn6VXWjx7ryUzdf6d43es4GnnF3QFFnXXkTUx5KDh/FBpdncqw7s+AZCUs7JzjPzW8jDNo+fdD6KQO6iFJmgGVb3vPBKyF/+nb87R/rCyPb7J6uQMxzsYyzRZGs5PYwHQJqfvSmx9QHR9gxXeAFL5DhJzWgg5IUDbU9kN/QuI/l54Tw+xzoGJzZ71vXc9fNV/PS5d/93au/77Kbt/13xNj1/buaq93/+ncwnLbZXXsZ88o+vWdcudqktv36Jg34wST+pzTkOrwbilB1kWeHB/I7VlpKRcKLO/ldIXWjX2m0chna0nLBk3qIXtfNBgWlQOYJD4XvXV/TZTBTwbGD4qFxGLipyVEc0pdJcvq01roPJCl1kSzUH2M5YFc8ny4DiPXg7uVVU2ygI52rIsDfV/vDm5te41ircznPc75dgpwRY9a9JGf+POtbORsDyqNuNTbGpCOo1hUA4zXMlp5J8QC0FXIeb6RvVxdJ//WznFc6tEQXHysJGrceu3hsfIrFs64kKT9I9z8X8whcq988B14P4CzGqOUGZ24kuiv7YNuN6kkHHDSxNSHaIMsFJIG+e5HcxGRE0z77M/P39wZsv8r63rDvG8dd1EGTe16oL0XIQybVrS48bmKcmtRfHHBorYVgcrJBI80v41Q66Qq8zvNbDpiZVLinVLpmQ20c/6JSBjnuVVJuDNEQTmOLvlU89SGMpwnIPuJiU9PcxO8SE+27fU0hWQM0ROTsabYKSO9OP0085p6/SYW749klGNy2+JpyMfE3xGLvBbVKRW3oPU0oxhapQYGzT9RpUXLN66talD1E06Lc1cV2i6pWjJ9oUdZ6vE1Nc/2xTUwqQqVpR4hK0zpGuZmz10xwW8KUu7fSngPU3xXr5L5lb80g0N1iqbijRasW5mghqZu3RqLewpqz0NbCmfnxsw2MmYurNW3OalvFla7l2jir6rxu7lprjhYOW6rkFi7d77Nt4NRdgLPpbSmaiWs0zy8D/YkawhqXYlZlyyCVL0BZJD+z41FxS18TsVpxCM9YbaKc58sNCslJ/5dmra6+MCWT7BmPUzlqtYpExInVAO8VLoRxQb6ZkGuqKyLo05y+t1UM5b0E5teY+u0LO5j7qK0SzLjlJz0D/Fyt8Isy94Va9MOXkKKWeJ2p18Mqg0qf8Zg+HTYeIyfjsYeYJFO9/wcrgfOr \ No newline at end of file +eJzdPGtT40iS3/0r6tRHINNGwOzGxJ5vmQsGum/Y5aAD3D23AV5vIZeNpmXJJ8kG78b+98vMqpKqSpINpnu2Y4iYHkuqzMrMyme93rDTdL7KoulDwb47PPoPNngQbCCSPM3ex+kjO1kUD2mWB+wkjtk1NsvZtchFthTjoPOm84ZdRCE0F2O2SMYiYwXAn8x5CP9TX3rsk8jyKE3Yd8Eh87GBpz553f8EDKt0wWZ8xZK0YItcAIooZ5MoFkw8hWJesChhYTqbxxFPQsEeo+KBulFIgAz2F4UivS84tObQfg5PE7Md4wURjH8PRTHvHxw8Pj4GnIgN0mx6EMuG+cHF+em7y5t3+0AwgXxMYpHnLBP/t4gyYPV+xfgc6An5PVAZ80eWZoxPMwHfihTpfcyiIkqmPZank+KRZwKwjKO8yKL7RWEJS1MHPJsNQFw8Yd7JDTu/8diPJzfnNz3A8fP54KerjwP288n19cnl4PzdDbu6ZqdXl2fng/OrS3h6z04u/8L+fH551mMCRAXdiKd5hvQDkRGKkYaO3QhhETBJJUH5XITRJAqBr2S64FPBpulSZAmww+Yim0U5DmYO5I0BSxzNooIX9KbGFHZz/EX/Op7nfciiBNTw9OPZCXR/n/FshcSwB8Gx/zEMUVikWSSIRraU2gcqlQKBKFjicpUXYhZ0OqjweZhFoGe54BnoQk6iaEOPipnbWHow4ii1Iu/AyxmqwFgUKKqERBxlmghCNJf0I3yYJpNoushIgAiXF+N0UQRE1ZwXD7nUJ8JOwAhV6mHJGiiYHjdUwYcsXUwfmEiWUZYmM5EUnSXPItRWMOXzCZgaW/I4GjsEREpIPcmclIoml4gTWUYDn4likZESMHgF4grTsVDSjEGN0fak8HAYAHYSAfGAv6KSI9nTBVIHRN0s5vM0Q82vwNBsaBj8KAnjxRhehYsfL05uuj34cXZ52WOXp6cXPRKMdFrXA3tIC/4ZEZU03XPQdFNDKnrAqJHrwfsR9jn6cDL46aZjiJBpESLl4I9mfD8Xcw6iA+BpnN5TJwEzeo/T9LPUJqk8eQcp1UolNYnc1QPPxvsowjEoIRGaL+5NMidZOkPygHpigXQj6MBQWvSihNGNllyBbNjVTWXTYzHhi7jAduBqx/1Oh4GxJounPnjFg0WeHcRpyOODcDHmPXohpSt1kajYjcdSa9j+fDcA+J9haNPHvM9KOliT2Hrgfzqs+jvt3919yNJpxmfsPYri7u7y0/nZ+Qn77w8fISzN5gt0oWyQpvHnqLi7Q/R3d3udznsQ6D0PP4NfHdNQAHXRfRRHxQod7kyYOhTnKUUVHoNFJtBwKUXYqYk3lc6ygXQYJQolK9vQAhAdqVVHDel+KedGLEracvRZGRahbaVb7GQ8jtAYeVxTVoDef+UfoCCLoj+tOp/eXd9A8NDjgq8GV1cXfz4f0GgCjDS7EgYfLKhmGLBQo5/LS7sj+er88mZwcnGhgdCeS+LwwSGOXpkw0gWMfjq71ii0IyAUA4jjV9fXAwNN+crquvNJ+9JmBQjB090LnVKASYMhPO1iSN19Cla76OLQLQvQB94p1UA5aIjEs3mxwtaLxPDTKeMhZTg8Wem2yn9BFAD9x/64ClzabYDpGlT0O8Vk9Ef17YeRQtJnQFTwd/ub9EEjUKg+C4LA/qh+VF8h0HY6kC6AU2ZRqn+luf6FHk//nse8QGL0M/ht9Qvc2DxLQ8g+yjervANpwXwVQ1jpY86DAj6e7svv++C+9nmxX6TzTpGt+jCgZJz5A3iDmCkkjw9R+NBR6eE5vXuHYahsjqkUAuRBPuePiYbDKDQSTyJcFNqTS1QVQSJppafTCWMOedQpuT/q0H9HNIDIu9j3HD5DM/CybBTlIFTwrD59kQGzlFQgMwe/C/kRpsPQzjMAH6VLfQaocr4m8IyH6XNAz3gG/ZSQM15g8qNVyOdhseDxqFRhnXXoN9QBaMnpgwg/oxwF+U/yvzrdmQmhMh0FLaN9R8UBLCpELF9qpXa7YYnKqsH6MI1Fs4wo+1JIbDpL3PhXw+U0xhY5hEMD5CWeVP8dMffvKLDfDbKFMAGC71wA5/k9xK0NEMHv1kLU/9w+iCg9Etk075fQrtz6lNRoodVSJgivWVAC2yK2QTEtDjFf0vmMjEaQ4BY8jikJ1YiuZYbZZz8rxcLWEnelXlJl7aQ4TUQgNRP+hbjtDDm4z0toIplV9iFlB2+gKoIKbA4s8tDIuspEGxE7+I5dBcQabu53Ow3qd1x7ZbUmWmrYeFbkWPP6NQPUlsuLUSx4XrzMdJUsj9kt+D1/2aU8dUn5qEMClIyF7wVed2gw1QRY584BNThkP1TS0IxMRaGjlGYFE88eS/hMaH+jFAOdgQot0D2AQ7FF2fSpWakFUg2QwhirMfgepUE6Fxqzl3kQnRPIvSHXPPYWxWT/D143yAAFAig/yqSq0QAG9NP33sg+2U7O3vp347ddj+0QoT3qqktgoH/UXNuWEgG9C6ZQps39I2PwvdIbhzDsIo94MgImx4uw8CdRlheQ3wrIvseuMMB6KEfHbPg+SlRBDi6VoCiFl4BKIKq/2zQPECz4JY0SfwLY5WhOUFASFB9JrhJ+aI5VPB7JUmBE9YHfRFUtzbbrh9JMsWxxYmZFJmqPBqO+YCQocPuefu11Mb3yDnJg/6B8CXAqkzo2cpEgxJg1kl/8WwszqMT+3Bt2KZoXWDPIUccyAxQKlHmPHf/A/GCv68mRwwiC6EVBdmzqmuxBmoGlTiq1MTVL9abUC1sHY4E1oe/xPIwirytVSmU9H5MIP55RkzL5MRFWEaGuhUhzwMdjXysADBPqrm9rZtfQzZzKcx+LXF8i6HZNZVCl5QhLR6UP9NP0OWqczfeSJvMNkO7t4ciJWLUHB0KTgJvArMe3AAZoZK+1pKrSrTJwgShU7h8AP74VS72ytvV6zoftytjlDjQEj2Ehs+RFXwy3ees59fn+To4uxwQC5XUaeeqVSbbTJkm8IUir2aL1CCu33Gzn9emLTMSy1C4nsXAuxJgKkbMfjjdSJHolsZ6a+Km9ULy5b/f2yX3sT5NF9VE8FRnPD04/fhicH6xFKNvQt9LP6cLoC3BuTfxsZB1af/9769F6aOYVPzyZYBbzezZrYFojqi1HNIfn43yD5LNXsqGfkWbloUgGVVs0V08GkFvvLtFKtruT76J6krdG7ulHVPqRCkF3KL2M2eFLkdrQQ8NtmcWaFsppuojH5FloWhKr7x05Ewe/jJm3VR+sNEkn9L/KWH1DGHVJGYyV1kN1J0K9SMYmgw0JQTsq5eEokuM6CsBjuR7gP74V8mXEMzvuOqkKfkNp8igX26uMJQhlBxawNjOZQLXVmtrsZGoip7BJQmBmuggQeTWLvKryi4YIYBAI+mZSQMEDLU5HIaeudmG9nXwvGK8QArQTE16tu/TqrYUcnO3aOs3+W5NT3/aPhhShBQT6RrLW0gGRIU+9DdRs6h4ltWc4shZVd93oGu1QHYmXGUzea+fC5aHHMNS9QLW4NKPigRe1mlOqm5ZOqW7Fai5MPwGJNfobsMRbzKB6rFjMYzGsjRp6vlvzefgFHYHWjCixutQW/zp/0VAZG2ImWXTLptG2E07VmJbeqed03O6tFPwaldIRAmxD1UTWEIKySwXAjE7zqYNSa31Oai3THmusVE7V7PK2UFKVWT1bTS2TXWN2TvJnE77Bla1ny5IQJbIy+3RUuqYH0CnCtcwXKEWp9BDnKdonFzxzEURXWarysKqNUmI4gWTWEN7OONgZk+vXnR4csKPDw8NeScUOPcv33U5HJfuGZEuxVnS3ao6HwMGDt0n6zxqRLXDUZdnt1Koxhx32BhclcAJDMH6fLkVQSsEMDiXbTVkC8Z0VXs8plQx9SJZh2KQNRlQ8BVgm63m98J7G0hcIXHK7G7+9C+CfHvvk69/0T9cr3amu8FsnFfRcwr6ixRt2G+YBGiaWysSyfQqgXtG3zyzZSgsvSECU5IAo8CEQT8KrpUiUVMgGnoZSk3GGhMvRWuM+Gmqb+yiRpUivIqfnVLLuYHYl8cuZrSbt3WHbA9CisVhGoVHx5VC0uQUk1k1Q+48jvk+ldSGrdQccyPXKN3vB0WFwH3rKmOdFZFpzO301b+oRMFhzt0T0EoOA9l6DkyyNC6cD5cIeYHKne8ph7WrhGguBje2V/Ltyuvx/L0501zLWKMGVUYkKrHnxxOVmnFJ6AYEPrs6u/DCPptNunxbP0KTuUwA1pw1xeQBkFmiOVB9lpajjuiY2STPcEvBgpy6OMDDCQxLb7T0L2BUNQR+UCLoy6atTd3s4ZP923PThSOV+MlUxq1TvPIEomEOWiGtdtEBiSrWPteoyZzT70+K76/3ReClP8A+t9abBeX3b/qw2apIEOYd2rlK4kaxrAxtiawM29d2BrkZMU2iMoWnqTj+1EauwooVt4sgx564Dvokn14gdpsyh0Wy5eoMQ/3QSpPuY589JkZz42NezoLWVImfyEDzakSebPyfDMnMsc750w0KO0RJDKcUisHvfq3aWjP7n5E9X1x4lZ9W780t815avmE0/nAxOf6rCpc7TdD1sOknGtk/CaDz4PNomFXt1HtachKFbVdt1FrlyyjP+S1rtQUyTeBWo6XNioC1lM2v9w2HHmmh4g+u6mZAbzkrUUTKBQmTJIbHC7RX3AsZXrfGiavXYL4tcTbrRBhZcoTOHoHkq3x2kl8VWY4wc+RhLwrTfEfWQq42l+psPOWu3RVruCKKEXhK5ER2lsibeFkdtNgGPYcur57Rb79xa3BpBrvdrNY/m+qc8jYGkr+qhjoLDf6mHurm6QAfj+Cjz7UYvZTT+lfxUOS6zGZj1t+SsStq2cUJf0Wu4Emul9rVOQCIkN2DjbnEEdqN1rkC13MoZKNhXuoOMJ+PftDO4Prk8c11B9W6jIyib/kpuAMfjG7N+UpFvzPaVmFpIfK3BIzoydxNvi7GbTdaZOrXbytAJ8pVmPpkUv2krf/9+4Bp5+UrauPnqV7JlEPo3ZsqoBt+YJUshNRP4WjsGbGTGBtYWKzZarDNibLaVDSPgK014nCRfZ+ml2l71XPtzrQ9PiRiWR4+V1eEjWdzFu0/vLuQ+03ajQ3+jfuPMHE1x63nx7UzxZebjeyRqNFwmf5YGAmr6QmSvNOXWNZyKqGeY8YuWbpJErtxUPbRNS1Yt5PRY9dyzW21lMAj42kp3zjNg7rcc9m4+nFzfvKtVusbbzZVu1fjXqnRpXL6xyKiU5RsLjqWoWsl8dW1LCGVta+Fuq22tRmtrW9lyu9pWwr7KASRhGH+dgFnZ7nYBk45DlvYqn3S4pCfLENeZ4bZGiLJ5uQF+lWhGw/Q1ghkiBoma+Bt12myAi1LGY89qs4UmE9yrtLigo7LZs6q3f4Emf6qO71YKbb5cF3+slpbOv2F/U0T8Daf+OTCUiIwX0Hl6/4sI8ZBNyh7pdgZoWLCokPur6BIBtaaxyKNkqtDBd3+2iIpojpsEo5nIoVWcPqp1g0oidHbCig1qmxGPY7+SszaIctsGdvJJfpbn+9UtDTP+WdT20TF9NwOd/wg2mnl5FGVbe79cnicTkX1DMVcdkTEW1ctFK304/Hs2S5dibJ7uo9WrbFbexaH4+qQT9OBLSKnE9o0ICxVAe4FnOsoX+cllhFyD5bq9NLpLtxG4NPdVr9Z2C9dZwm7rPtGSR3icbSSSpQ//qS1M6hDU8e3Q3bLp3hhCzoQOxDVdOFBtJNa46ThZeULJOsJUvb7VrYd6zHqecRZCUWcd4RNTHkoOn8UGlzt7rDPA4BkJSzMnOM71ux56TVc5NF7NQAdrihSw7MpzS3jE5Q/fj77/fXUAZpc90pGOeSaWUbrI45VcZKatRPVLfDpMXaDCzuhAMniBFK8IgQZKUtTVbk/eCfIYyeuR8L4RdAzO6Het48brxqt+iPSv/sHtXw/YcK/7lhi7e3xbcXXwX/8OhtM0umsPl97aewCNI2Sb1LZbnQwCPxhHf5eGXIV3QxHKJnIHck/ey6WllMc8f5D3JKkbCpRGK5ehLS0TPK666LSd1FBQCmQe81D43t0dHW4zFRwbKB5qW4rrmjyOQrppJaOrwtZd+KTURbJQXS5zzG55Nl0GEOvB3cujs/iCNoau8gB/3x71h8NObXOsc9jQc+5iQc6IMeuclTN+nnX3z8aA8qxTmrU+aSOrdQLB+AyjpUdSPAFtuRzHofTt6mDsP/4px5W2PtFBzlKCxinONh5rV8t41hEr5QfpPOtiPgbX6td3k1cdOJMxarrBGRuJ7tbeHjdULWmbhCa2IkQbJB3r8N2D8SYmI5p22R+Zf3TYY0dl1veG/VjbNKO2qzzyXN18IbeqlKfUcNOKsfdS36Dg0FoJweRkg0aaN/2UOukKvMrzG7apmVS4e13aRkOtPn9RKYMcD0sp17qoCae2zt8onmonx8sEZO+Tsamp7wRoExOt3X1NIVkd1ETkrIs2CkivcL9MPOa+AZMKd9W0TTC4IPI15WLir4nFXmVqlIpaKlwrlIbFVaNjZ+Gp1ZDkVNdXNSS7i7ohuZOKzYZUThS/0JCsaXibmvq0Y5OYVGBKkpbIlCRVaHITZq+e1zZEJ3dJpTn0V9ejbVQJY7HOINBdWSm5o7mqBuZo/qidt1p+3sCaM7/WwJl5h9sGxsw51Yo2Z5Kt5EqXcE2cleVdO3eNpUYDhw3FcQOX7jVzGzh1591sehtqZeIazfNzT9+0Q1ijQszKJBmk8hkoG8vbgjyqaelSFOstduEZk0yU6nweopCcrH9plujqoiyZW894lMhey8kjIk6sengocSGMc/71PFxTXRJBN4z63k7el4camF9h6jbP52DKo1ZIMNGWN5MGeOuu8PMi84Wa68OPkJkWeBaq08Higiqe0YhuQBuNkJPRyENMkqnO/wPiKjLf \ No newline at end of file