diff --git a/tensorflow/stream_executor/cuda/cublas_10_2.inc b/tensorflow/stream_executor/cuda/cublas_10_2.inc
index 42c4e5fef3b..067ba675288 100644
--- a/tensorflow/stream_executor/cuda/cublas_10_2.inc
+++ b/tensorflow/stream_executor/cuda/cublas_10_2.inc
@@ -2,29 +2,31 @@
 
 extern "C" {
 
-cublasStatus_t CUBLASWINAPI cublasCreate_v2 (cublasHandle_t *handle) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t *);
+cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCreate_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDestroy_v2 (cublasHandle_t handle) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t);
+cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDestroy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, int *version) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int *);
+cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle,
+                                                int *version) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVersion_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, version);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(libraryPropertyType, int *);
+cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type,
+                                              int *value) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
@@ -37,57 +39,71 @@ size_t CUBLASWINAPI cublasGetCudartVersion(void) {
   return func_ptr();
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetStream_v2 (cublasHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle,
+                                               cudaStream_t streamId) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetStream_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetStream_v2 (cublasHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *);
+cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle,
+                                               cudaStream_t *streamId) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetStream_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t *mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *);
+cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle,
+                                                    cublasPointerMode_t *mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetPointerMode_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t);
+cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle,
+                                                    cublasPointerMode_t mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetPointerMode_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t *mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *);
+cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle,
+                                                 cublasAtomicsMode_t *mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetAtomicsMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t);
+cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle,
+                                                 cublasAtomicsMode_t mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetAtomicsMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, cublasMath_t *mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *);
+cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle,
+                                              cublasMath_t *mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetMathMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasMath_t);
+cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle,
+                                              cublasMath_t mode) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetMathMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
@@ -118,399 +134,384 @@ cublasGetLoggerCallback(cublasLogCallback *userCallback) {
   return func_ptr(userCallback);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetVector (int n, int elemSize, const void *x, 
-                                             int incx, void *devicePtr, int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x,
+                                            int incx, void *devicePtr,
+                                            int incy) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetVector");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, x, incx, devicePtr, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetVector (int n, int elemSize, const void *x, 
-                                             int incx, void *y, int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x,
+                                            int incx, void *y, int incy) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVector");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetMatrix (int rows, int cols, int elemSize, 
-                                             const void *A, int lda, void *B, 
-                                             int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize,
+                                            const void *A, int lda, void *B,
+                                            int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *,
+                                                 int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetMatrix");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetMatrix (int rows, int cols, int elemSize, 
-                                             const void *A, int lda, void *B,
-                                             int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize,
+                                            const void *A, int lda, void *B,
+                                            int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *,
+                                                 int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetMatrix");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetVectorAsync (int n, int elemSize, 
-                                                  const void *hostPtr, int incx, 
-                                                  void *devicePtr, int incy,
-                                                  cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize,
+                                                 const void *hostPtr, int incx,
+                                                 void *devicePtr, int incy,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int,
+                                                 void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetVectorAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetVectorAsync (int n, int elemSize,
-                                                  const void *devicePtr, int incx,
-                                                  void *hostPtr, int incy,
-                                                  cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize,
+                                                 const void *devicePtr,
+                                                 int incx, void *hostPtr,
+                                                 int incy,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int,
+                                                 void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVectorAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync (int rows, int cols, int elemSize,
-                                                  const void *A, int lda, void *B,
-                                                  int ldb, cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols,
+                                                 int elemSize, const void *A,
+                                                 int lda, void *B, int ldb,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      int, int, int, const void *, int, void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetMatrixAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync (int rows, int cols, int elemSize,
-                                                  const void *A, int lda, void *B,
-                                                  int ldb, cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols,
+                                                 int elemSize, const void *A,
+                                                 int lda, void *B, int ldb,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      int, int, int, const void *, int, void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetMatrixAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream);
 }
 
-void CUBLASWINAPI cublasXerbla (const char *srName, int info) {
-  using FuncPtr = void (CUBLASWINAPI *)(const char *, int);
+void CUBLASWINAPI cublasXerbla(const char *srName, int info) {
+  using FuncPtr = void(CUBLASWINAPI *)(const char *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasXerbla");
   if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla");
   return func_ptr(srName, info);
 }
 
-cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const void *x, 
-                                                     cudaDataType xType,
-                                                     int incx, 
-                                                     void *result,
-                                                     cudaDataType resultType,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, int, void *, cudaDataType, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n,
+                                         const void *x, cudaDataType xType,
+                                         int incx, void *result,
+                                         cudaDataType resultType,
+                                         cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, int, void *,
+      cudaDataType, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasNrm2Ex");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, xType, incx, result, resultType, executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const float *x, 
-                                                     int incx, 
-                                                     float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n,
+                                           const float *x, int incx,
+                                           float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSnrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const double *x, 
-                                                     int incx, 
-                                                     double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n,
+                                           const double *x, int incx,
+                                           double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDnrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScnrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDznrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDotEx (cublasHandle_t handle,
-                                                     int n, 
-                                                     const void *x,
-                                                     cudaDataType xType, 
-                                                     int incx, 
-                                                     const void *y, 
-                                                     cudaDataType yType,
-                                                     int incy,
-                                                     void *result,
-                                                     cudaDataType resultType,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, int, const void *, cudaDataType, int, void *, cudaDataType, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n,
+                                        const void *x, cudaDataType xType,
+                                        int incx, const void *y,
+                                        cudaDataType yType, int incy,
+                                        void *result, cudaDataType resultType,
+                                        cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, void *, cudaDataType, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDotEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, executionType);
+  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType,
+                  executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDotcEx (cublasHandle_t handle,
-                                                     int n, 
-                                                     const void *x,
-                                                     cudaDataType xType, 
-                                                     int incx, 
-                                                     const void *y, 
-                                                     cudaDataType yType,
-                                                     int incy,
-                                                     void *result,
-                                                     cudaDataType resultType,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, int, const void *, cudaDataType, int, void *, cudaDataType, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n,
+                                         const void *x, cudaDataType xType,
+                                         int incx, const void *y,
+                                         cudaDataType yType, int incy,
+                                         void *result, cudaDataType resultType,
+                                         cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, void *, cudaDataType, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDotcEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, executionType);
+  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType,
+                  executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSdot_v2 (cublasHandle_t handle,
-                                                     int n, 
-                                                     const float *x, 
-                                                     int incx, 
-                                                     const float *y, 
-                                                     int incy,
-                                                     float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n,
+                                          const float *x, int incx,
+                                          const float *y, int incy,
+                                          float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, int, const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSdot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDdot_v2 (cublasHandle_t handle,
-                                                     int n, 
-                                                     const double *x, 
-                                                     int incx, 
-                                                     const double *y,
-                                                     int incy,
-                                                     double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n,
+                                          const double *x, int incx,
+                                          const double *y, int incy,
+                                          double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, int, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDdot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCdotu_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      const cuComplex *y, 
-                                                      int incy,
-                                                      cuComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n,
+                                           const cuComplex *x, int incx,
+                                           const cuComplex *y, int incy,
+                                           cuComplex *result) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *,
+                                     int, const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotu_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCdotc_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      const cuComplex *y, 
-                                                      int incy,
-                                                      cuComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n,
+                                           const cuComplex *x, int incx,
+                                           const cuComplex *y, int incy,
+                                           cuComplex *result) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *,
+                                     int, const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdotu_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      const cuDoubleComplex *y, 
-                                                      int incy,
-                                                      cuDoubleComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n,
+                                           const cuDoubleComplex *x, int incx,
+                                           const cuDoubleComplex *y, int incy,
+                                           cuDoubleComplex *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotu_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdotc_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx,
-                                                      const cuDoubleComplex *y, 
-                                                      int incy,
-                                                      cuDoubleComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n,
+                                           const cuDoubleComplex *x, int incx,
+                                           const cuDoubleComplex *y, int incy,
+                                           cuDoubleComplex *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const void *alpha,  /* host or device pointer */
-                                                     cudaDataType alphaType,
-                                                     void *x, 
-                                                     cudaDataType xType,
-                                                     int incx,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, int, cudaDataType);
+cublasStatus_t CUBLASWINAPI
+cublasScalEx(cublasHandle_t handle, int n,
+             const void *alpha, /* host or device pointer */
+             cudaDataType alphaType, void *x, cudaDataType xType, int incx,
+             cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType,
+      int, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScalEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const float *alpha,  /* host or device pointer */
-                                                     float *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSscal_v2(cublasHandle_t handle, int n,
+               const float *alpha, /* host or device pointer */
+               float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const double *alpha,  /* host or device pointer */
-                                                     double *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDscal_v2(cublasHandle_t handle, int n,
+               const double *alpha, /* host or device pointer */
+               double *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const cuComplex *alpha, /* host or device pointer */
-                                                     cuComplex *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCscal_v2(cublasHandle_t handle, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsscal_v2(cublasHandle_t handle, int n,
+                const float *alpha, /* host or device pointer */
+                cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */
-                                                     cuDoubleComplex *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZscal_v2(cublasHandle_t handle, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const double *alpha, /* host or device pointer */
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZdscal_v2(cublasHandle_t handle, int n,
+                const double *alpha, /* host or device pointer */
+                cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasAxpyEx (cublasHandle_t handle,
-                                                      int n,
-                                                      const void *alpha, /* host or device pointer */
-                                                      cudaDataType alphaType,
-                                                      const void *x,
-                                                      cudaDataType xType,
-                                                      int incx,
-                                                      void *y,
-                                                      cudaDataType yType,
-                                                      int incy,
-                                                      cudaDataType executiontype) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, const void *, cudaDataType, int, void *, cudaDataType, int, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasAxpyEx(
+    cublasHandle_t handle, int n,
+    const void *alpha, /* host or device pointer */
+    cudaDataType alphaType, const void *x, cudaDataType xType, int incx,
+    void *y, cudaDataType yType, int incy, cudaDataType executiontype) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, const void *,
+      cudaDataType, int, void *, cudaDataType, int, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasAxpyEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, executiontype);
+  return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy,
+                  executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */
-                                                      const float *x, 
-                                                      int incx, 
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSaxpy_v2(cublasHandle_t handle, int n,
+               const float *alpha, /* host or device pointer */
+               const float *x, int incx, float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const double *alpha, /* host or device pointer */
-                                                      const double *x, 
-                                                      int incx, 
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDaxpy_v2(cublasHandle_t handle, int n,
+               const double *alpha, /* host or device pointer */
+               const double *x, int incx, double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *alpha, /* host or device pointer */
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      cuComplex *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCaxpy_v2(cublasHandle_t handle, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, cuComplex *y, int incy) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *,
+                                     const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      cuDoubleComplex *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZaxpy_v2(
+    cublasHandle_t handle, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *,
+      int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
@@ -528,97 +529,82 @@ cublasStatus_t CUBLASWINAPI cublasCopyEx(cublasHandle_t handle, int n,
   return func_ptr(handle, n, x, xType, incx, y, yType, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n,
+                                           const float *x, int incx, float *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDcopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const double *x, 
-                                                      int incx, 
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n,
+                                           const double *x, int incx, double *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDcopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCcopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n,
+                                           const cuComplex *x, int incx,
+                                           cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCcopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZcopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n,
+                                           const cuDoubleComplex *x, int incx,
+                                           cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const cuDoubleComplex *, int,
+                                                 cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZcopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      float *x, 
-                                                      int incx, 
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n,
+                                           float *x, int incx, float *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *,
+                                                 int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      double *x, 
-                                                      int incx, 
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n,
+                                           double *x, int incx, double *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *,
+                                                 int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      cuComplex *x, 
-                                                      int incx, 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n,
+                                           cuComplex *x, int incx, cuComplex *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n,
+                                           cuDoubleComplex *x, int incx,
+                                           cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
@@ -635,45 +621,41 @@ cublasStatus_t CUBLASWINAPI cublasSwapEx(cublasHandle_t handle, int n, void *x,
   return func_ptr(handle, n, x, xType, incx, y, yType, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n,
+                                            const float *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const double *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n,
+                                            const double *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const cuComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
@@ -690,45 +672,41 @@ cublasStatus_t CUBLASWINAPI cublasIamaxEx(
   return func_ptr(handle, n, x, xType, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n,
+                                            const float *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const double *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n,
+                                            const double *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const cuComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
@@ -757,129 +735,113 @@ cublasStatus_t CUBLASWINAPI cublasAsumEx(
   return func_ptr(handle, n, x, xType, incx, result, resultType, executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const float *x, 
-                                                     int incx, 
-                                                     float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n,
+                                           const float *x, int incx,
+                                           float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const double *x, 
-                                                     int incx, 
-                                                     double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n,
+                                           const double *x, int incx,
+                                           double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDzasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     float *x, 
-                                                     int incx, 
-                                                     float *y, 
-                                                     int incy, 
-                                                     const float *c,  /* host or device pointer */
-                                                     const float *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, int, const float *, const float *);
+cublasStatus_t CUBLASWINAPI
+cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y,
+              int incy, const float *c, /* host or device pointer */
+              const float *s) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *,
+                                     int, const float *, const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     double *x, 
-                                                     int incx, 
-                                                     double *y, 
-                                                     int incy, 
-                                                     const double *c,  /* host or device pointer */
-                                                     const double *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *, int, double *, int, const double *, const double *);
+cublasStatus_t CUBLASWINAPI
+cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y,
+              int incy, const double *c, /* host or device pointer */
+              const double *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, double *, int, double *, int, const double *,
+      const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuComplex *x, 
-                                                     int incx, 
-                                                     cuComplex *y, 
-                                                     int incy, 
-                                                     const float *c,      /* host or device pointer */
-                                                     const cuComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, const cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCrot_v2(
+    cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y,
+    int incy, const float *c, /* host or device pointer */
+    const cuComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *,
+      const cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuComplex *x, 
-                                                     int incx, 
-                                                     cuComplex *y, 
-                                                     int incy, 
-                                                     const float *c,  /* host or device pointer */
-                                                     const float *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, const float *);
+cublasStatus_t CUBLASWINAPI cublasCsrot_v2(
+    cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y,
+    int incy, const float *c, /* host or device pointer */
+    const float *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *,
+      const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuDoubleComplex *x, 
-                                                     int incx, 
-                                                     cuDoubleComplex *y, 
-                                                     int incy, 
-                                                     const double *c,            /* host or device pointer */
-                                                     const cuDoubleComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, const double *, const cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZrot_v2(
+    cublasHandle_t handle, int n, cuDoubleComplex *x, int incx,
+    cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */
+    const cuDoubleComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int,
+      const double *, const cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuDoubleComplex *x, 
-                                                     int incx, 
-                                                     cuDoubleComplex *y, 
-                                                     int incy, 
-                                                     const double *c,  /* host or device pointer */
-                                                     const double *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, const double *, const double *);
+cublasStatus_t CUBLASWINAPI cublasZdrot_v2(
+    cublasHandle_t handle, int n, cuDoubleComplex *x, int incx,
+    cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */
+    const double *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int,
+      const double *, const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
@@ -899,45 +861,50 @@ cublasRotEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, int incx,
                   executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrotg_v2(cublasHandle_t handle, 
-                                                     float *a,   /* host or device pointer */
-                                                     float *b,   /* host or device pointer */
-                                                     float *c,   /* host or device pointer */
-                                                     float *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, float *, float *, float *, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */
+               float *b,                        /* host or device pointer */
+               float *c,                        /* host or device pointer */
+               float *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *,
+                                                 float *, float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrotg_v2(cublasHandle_t handle, 
-                                                     double *a,  /* host or device pointer */
-                                                     double *b,  /* host or device pointer */
-                                                     double *c,  /* host or device pointer */
-                                                     double *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, double *, double *, double *, double *);
+cublasStatus_t CUBLASWINAPI
+cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */
+               double *b,                        /* host or device pointer */
+               double *c,                        /* host or device pointer */
+               double *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *,
+                                                 double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCrotg_v2(cublasHandle_t handle, 
-                                                     cuComplex *a,  /* host or device pointer */
-                                                     cuComplex *b,  /* host or device pointer */
-                                                     float *c,      /* host or device pointer */
-                                                     cuComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */
+               cuComplex *b,                        /* host or device pointer */
+               float *c,                            /* host or device pointer */
+               cuComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZrotg_v2(cublasHandle_t handle, 
-                                                     cuDoubleComplex *a,  /* host or device pointer */
-                                                     cuDoubleComplex *b,  /* host or device pointer */
-                                                     double *c,           /* host or device pointer */
-                                                     cuDoubleComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZrotg_v2(
+    cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */
+    cuDoubleComplex *b,                        /* host or device pointer */
+    double *c,                                 /* host or device pointer */
+    cuDoubleComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *,
+      cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
@@ -959,27 +926,21 @@ cublasStatus_t CUBLASWINAPI cublasRotgEx(cublasHandle_t handle,
   return func_ptr(handle, a, b, abType, c, s, csType, executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     float *x, 
-                                                     int incx, 
-                                                     float *y, 
-                                                     int incy, 
-                                                     const float* param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, int, const float *);
+cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n,
+                                           float *x, int incx, float *y,
+                                           int incy, const float *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, float *, int, float *, int, const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, param);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     double *x, 
-                                                     int incx, 
-                                                     double *y, 
-                                                     int incy, 
-                                                     const double* param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *, int, double *, int, const double *);
+cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n,
+                                           double *x, int incx, double *y,
+                                           int incy, const double *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, double *, int, double *, int, const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, param);
@@ -999,25 +960,27 @@ cublasRotmEx(cublasHandle_t handle, int n, void *x, cudaDataType xType,
                   executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrotmg_v2(cublasHandle_t handle, 
-                                                      float *d1,        /* host or device pointer */
-                                                      float *d2,        /* host or device pointer */
-                                                      float *x1,        /* host or device pointer */
-                                                      const float *y1,  /* host or device pointer */
-                                                      float *param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, float *, float *, float *, const float *, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */
+                float *d2,                        /* host or device pointer */
+                float *x1,                        /* host or device pointer */
+                const float *y1,                  /* host or device pointer */
+                float *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, float *, float *, float *, const float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotmg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, d1, d2, x1, y1, param);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrotmg_v2(cublasHandle_t handle, 
-                                                      double *d1,        /* host or device pointer */  
-                                                      double *d2,        /* host or device pointer */  
-                                                      double *x1,        /* host or device pointer */  
-                                                      const double *y1,  /* host or device pointer */  
-                                                      double *param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, double *, double *, double *, const double *, double *);
+cublasStatus_t CUBLASWINAPI
+cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */
+                double *d2,                        /* host or device pointer */
+                double *x1,                        /* host or device pointer */
+                const double *y1,                  /* host or device pointer */
+                double *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, double *, double *, double *, const double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotmg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, d1, d2, x1, y1, param);
@@ -1040,2031 +1003,1701 @@ cublasRotmgEx(cublasHandle_t handle, void *d1,     /* host or device pointer */
                   paramType, executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemv_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t trans, 
-                                                      int m, 
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */
-                                                      const float *A, 
-                                                      int lda, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      const float *beta,  /* host or device pointer */
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const float *, const float *,
+      int, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemv_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta, /* host or device pointer */
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const double *,
+      const double *, int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x, 
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgemv_v2(
+    cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgbmv_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A, 
-                                                      int lda, 
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               int kl, int ku, const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int, const float *,
+      const float *, int, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgbmv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const double *alpha, /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda, 
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta, /* host or device pointer */ 
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               int kl, int ku, const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int, const double *,
+      const double *, int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgbmv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda, 
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgbmv_v2(
+    cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl,
+    int ku, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *x, int incx,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgbmv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                      const cuDoubleComplex *A,
-                                                      int lda, 
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgbmv_v2(
+    cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl,
+    int ku, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const float *A, int lda, float *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *AP, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStpmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *AP, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *AP, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtpmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const double *AP, double *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *AP, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtpmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *AP, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *AP,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *AP, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStpsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *AP, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *AP, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtpsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const double *AP, double *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *AP, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtpsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *AP, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *AP,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const float *A, int lda, float *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsymv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */ 
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *beta, /* host or device pointer */ 
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsymv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta, /* host or device pointer */ 
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsymv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsymv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */ 
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta,   /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsymv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChemv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhemv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */ 
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta,   /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZhemv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsbmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha,   /* host or device pointer */ 
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *x, 
-                                                      int incx,
-                                                      const float *beta,  /* host or device pointer */ 
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+               const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *,
+      int, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha,   /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *x, 
-                                                      int incx,
-                                                      const double *beta,   /* host or device pointer */ 
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+               const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const double *,
+      const double *, int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChbmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x, 
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhbmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *x, 
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZhbmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSspmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const float *alpha,  /* host or device pointer */                                           
-                                                      const float *AP,
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *beta,   /* host or device pointer */  
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const float *alpha, /* host or device pointer */
+               const float *AP, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *,
+      const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDspmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *AP,
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta,  /* host or device pointer */  
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const double *alpha, /* host or device pointer */
+               const double *AP, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *AP,
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *AP, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhpmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *AP,
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx,
+               const cuDoubleComplex *beta, /* host or device pointer */
+               cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSger_v2 (cublasHandle_t handle,
-                                                     int m,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const float *x,
-                                                     int incx,
-                                                     const float *y,
-                                                     int incy,
-                                                     float *A,
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const float *, const float *, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSger_v2(
+    cublasHandle_t handle, int m, int n,
+    const float *alpha, /* host or device pointer */
+    const float *x, int incx, const float *y, int incy, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const float *, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSger_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDger_v2 (cublasHandle_t handle, 
-                                                     int m,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */   
-                                                     const double *x,
-                                                     int incx,
-                                                     const double *y,
-                                                     int incy,
-                                                     double *A,
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const double *, const double *, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDger_v2(
+    cublasHandle_t handle, int m, int n,
+    const double *alpha, /* host or device pointer */
+    const double *x, int incx, const double *y, int incy, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const double *, const double *, int,
+      const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDger_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgeru_v2 (cublasHandle_t handle, 
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *y,
-                                                      int incy,
-                                                      cuComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgeru_v2(cublasHandle_t handle, int m, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeru_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgerc_v2 (cublasHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *y,
-                                                      int incy,
-                                                      cuComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgerc_v2(cublasHandle_t handle, int m, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgerc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgeru_v2 (cublasHandle_t handle, 
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgeru_v2(cublasHandle_t handle, int m, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeru_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgerc_v2 (cublasHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgerc_v2(cublasHandle_t handle, int m, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgerc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const float *x,
-                                                     int incx,
-                                                     float *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const float *x, int incx, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const double *x,
-                                                     int incx,
-                                                     double *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const double *x, int incx, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const cuComplex *alpha, /* host or device pointer */  
-                                                     const cuComplex *x,
-                                                     int incx,
-                                                     cuComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const cuComplex *alpha, /* host or device pointer */
+              const cuComplex *x, int incx, cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *x,
-                                                     int incx,
-                                                     cuDoubleComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const cuDoubleComplex *alpha, /* host or device pointer */
+              const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCher_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const cuComplex *x,
-                                                     int incx,
-                                                     cuComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const cuComplex *x, int incx, cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *,
+      int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZher_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *x,
-                                                     int incx,
-                                                     cuDoubleComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSspr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const float *x,
-                                                     int incx,
-                                                     float *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const float *x, int incx, float *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDspr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const double *x,
-                                                     int incx,
-                                                     double *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI
+cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const double *x, int incx, double *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChpr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const cuComplex *x,
-                                                     int incx,
-                                                     cuComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const cuComplex *x, int incx, cuComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *,
+      int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhpr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *x,
-                                                     int incx,
-                                                     cuDoubleComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *y,
-                                                      int incy,
-                                                      float *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const float *alpha, /* host or device pointer */
+    const float *x, int incx, const float *y, int incy, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *y,
-                                                      int incy,
-                                                      double *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const double *alpha, /* host or device pointer */
+    const double *x, int incx, const double *y, int incy, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, int n, 
-                                                      const cuComplex *alpha,  /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx, 
-                                                      const cuComplex *y,
-                                                      int incy, 
-                                                      cuComplex *A, 
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCher2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, int n, 
-                                                      const cuComplex *alpha,  /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx, 
-                                                      const cuComplex *y,
-                                                      int incy, 
-                                                      cuComplex *A, 
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZher2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSspr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const float *alpha,  /* host or device pointer */  
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *y,
-                                                      int incy,
-                                                      float *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const float *alpha, /* host or device pointer */
+               const float *x, int incx, const float *y, int incy, float *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDspr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const double *alpha,  /* host or device pointer */  
-                                                      const double *x,
-                                                      int incx, 
-                                                      const double *y,
-                                                      int incy,
-                                                      double *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDspr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const double *alpha, /* host or device pointer */
+    const double *x, int incx, const double *y, int incy, double *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChpr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *y,
-                                                      int incy,
-                                                      cuComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasChpr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhpr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A, 
-                                                      int lda,
-                                                      const float *B,
-                                                      int ldb, 
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A, 
-                                                      int lda,
-                                                      const double *B,
-                                                      int ldb, 
-                                                      const double *beta, /* host or device pointer */  
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A, 
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb, 
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3m  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A, 
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb, 
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3m(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3m");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3mEx (cublasHandle_t handle, 
-                                                     cublasOperation_t transa, cublasOperation_t transb,  
-                                                     int m, int n, int k, 
-                                                     const cuComplex *alpha, 
-                                                     const void *A, 
-                                                     cudaDataType Atype, 
-                                                     int lda, 
-                                                     const void *B, 
-                                                     cudaDataType Btype, 
-                                                     int ldb,
-                                                     const cuComplex *beta, 
-                                                     void *C, 
-                                                     cudaDataType Ctype, 
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const void *, cudaDataType, int, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3mEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, const void *A,
+    cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb,
+    const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3mEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A, 
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb, 
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemm3m  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A, 
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb, 
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa,
+              cublasOperation_t transb, int m, int n, int k,
+              const cuDoubleComplex *alpha, /* host or device pointer */
+              const cuDoubleComplex *A, int lda, const cuDoubleComplex *B,
+              int ldb, const cuDoubleComplex *beta, /* host or device pointer */
+              cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemm3m");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemmEx  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype,
-                                                      int lda,
-                                                      const void *B,
-                                                      cudaDataType Btype,
-                                                      int ldb, 
-                                                      const float *beta, /* host or device pointer */  
-                                                      void *C,
-                                                      cudaDataType Ctype,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const void *, cudaDataType, int, const void *, cudaDataType, int, const float *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasSgemmEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda, const void *B,
+    cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, const float *, void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGemmEx  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const void *alpha, /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype,
-                                                      int lda,
-                                                      const void *B,
-                                                      cudaDataType Btype,
-                                                      int ldb, 
-                                                      const void *beta, /* host or device pointer */  
-                                                      void *C,
-                                                      cudaDataType Ctype,
-                                                      int ldc,
-                                                      cudaDataType computeType,
-                                                      cublasGemmAlgo_t algo) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const void *, const void *, cudaDataType, int, const void *, cudaDataType, int, const void *, void *, cudaDataType, int, cudaDataType, cublasGemmAlgo_t);
+cublasStatus_t CUBLASWINAPI cublasGemmEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const void *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda, const void *B,
+    cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc, cudaDataType computeType,
+    cublasGemmAlgo_t algo) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const void *, const void *, cudaDataType, int, const void *, cudaDataType,
+      int, const void *, void *, cudaDataType, int, cudaDataType,
+      cublasGemmAlgo_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGemmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc, computeType, algo);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc, computeType, algo);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemmEx (cublasHandle_t handle, 
-                                                     cublasOperation_t transa, cublasOperation_t transb,  
-                                                     int m, int n, int k, 
-                                                     const cuComplex *alpha, 
-                                                     const void *A, 
-                                                     cudaDataType Atype, 
-                                                     int lda, 
-                                                     const void *B, 
-                                                     cudaDataType Btype, 
-                                                     int ldb,
-                                                     const cuComplex *beta, 
-                                                     void *C, 
-                                                     cudaDataType Ctype, 
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const void *, cudaDataType, int, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCgemmEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, const void *A,
+    cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb,
+    const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasUint8gemmBias (cublasHandle_t handle, 
-                                                           cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transc,  
-                                                           int m, int n, int k, 
-                                                           const unsigned char *A, int A_bias, int lda, 
-                                                           const unsigned char *B, int B_bias, int ldb,
-                                                                 unsigned char *C, int C_bias, int ldc,
-                                                           int C_mult, int C_shift) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, int, int, int, const unsigned char *, int, int, const unsigned char *, int, int, unsigned char *, int, int, int, int);
+cublasStatus_t CUBLASWINAPI cublasUint8gemmBias(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    cublasOperation_t transc, int m, int n, int k, const unsigned char *A,
+    int A_bias, int lda, const unsigned char *B, int B_bias, int ldb,
+    unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t,
+      int, int, int, const unsigned char *, int, int, const unsigned char *,
+      int, int, unsigned char *, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasUint8gemmBias");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, B_bias, ldb, C, C_bias, ldc, C_mult, C_shift);
+  return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B,
+                  B_bias, ldb, C, C_bias, ldc, C_mult, C_shift);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha,           /* host or device pointer */
+    const float *A, int lda, const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha,  /* host or device pointer */  
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *beta,  /* host or device pointer */  
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha,            /* host or device pointer */
+    const double *A, int lda, const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrkEx ( cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype, 
-                                                      int lda,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      void *C, 
-                                                      cudaDataType Ctype, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrkEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda,
+    const cuComplex *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const cuComplex *,
+      void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrkEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx(cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      int n, 
-                                                      int k,
-                                                      const cuComplex *alpha, 
-                                                      const void *A, 
-                                                      cudaDataType Atype, 
-                                                      int lda,
-                                                      const cuComplex *beta, 
-                                                      void *C, 
-                                                      cudaDataType Ctype, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype,
+    int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const cuComplex *,
+      void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrk3mEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha,  /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const float *beta,   /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const cuComplex *, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCherk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha,               /* host or device pointer */
+    const cuComplex *A, int lda, const float *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const cuComplex *, int, const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZherk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha,  /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const double *beta,  /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZherk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    const double *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const cuDoubleComplex *, int, const double *,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZherk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherkEx  (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha,  /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype,
-                                                      int lda,
-                                                      const float *beta,   /* host or device pointer */  
-                                                      void *C,
-                                                      cudaDataType Ctype,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const void *, cudaDataType, int, const float *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCherkEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda,
+    const float *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const void *, cudaDataType, int, const float *, void *,
+      cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherkEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherk3mEx (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo, 
-                                                       cublasOperation_t trans, 
-                                                       int n, 
-                                                       int k,
-                                                       const float *alpha, 
-                                                       const void *A, cudaDataType Atype, 
-                                                       int lda,
-                                                       const float *beta, 
-                                                       void *C, 
-                                                       cudaDataType Ctype, 
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const void *, cudaDataType, int, const float *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCherk3mEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, const void *A, cudaDataType Atype,
+    int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const void *, cudaDataType, int, const float *, void *,
+      cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherk3mEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const float *alpha, /* host or device pointer */  
-                                                       const float *A,
-                                                       int lda,
-                                                       const float *B,
-                                                       int ldb,
-                                                       const float *beta, /* host or device pointer */  
-                                                       float *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const double *alpha, /* host or device pointer */  
-                                                       const double *A,
-                                                       int lda,
-                                                       const double *B,
-                                                       int ldb,
-                                                       const double *beta, /* host or device pointer */  
-                                                       double *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const cuComplex *alpha, /* host or device pointer */  
-                                                       const cuComplex *A,
-                                                       int lda,
-                                                       const cuComplex *B,
-                                                       int ldb,
-                                                       const cuComplex *beta, /* host or device pointer */  
-                                                       cuComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const cuDoubleComplex *alpha,  /* host or device pointer */  
-                                                       const cuDoubleComplex *A,
-                                                       int lda,
-                                                       const cuDoubleComplex *B,
-                                                       int ldb,
-                                                       const cuDoubleComplex *beta,  /* host or device pointer */  
-                                                       cuDoubleComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCher2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const cuComplex *alpha, /* host or device pointer */  
-                                                       const cuComplex *A,
-                                                       int lda,
-                                                       const cuComplex *B,
-                                                       int ldb,
-                                                       const float *beta,   /* host or device pointer */  
-                                                       cuComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCher2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const float *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZher2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans, 
-                                                       int n,
-                                                       int k,
-                                                       const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                       const cuDoubleComplex *A, 
-                                                       int lda,
-                                                       const cuDoubleComplex *B,
-                                                       int ldb,
-                                                       const double *beta, /* host or device pointer */  
-                                                       cuDoubleComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZher2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const double *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const float *alpha, /* host or device pointer */ 
-                                                    const float *A,
-                                                    int lda,
-                                                    const float *B,
-                                                    int ldb,
-                                                    const float *beta, /* host or device pointer */ 
-                                                    float *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const double *alpha, /* host or device pointer */ 
-                                                    const double *A,
-                                                    int lda,
-                                                    const double *B,
-                                                    int ldb,
-                                                    const double *beta, /* host or device pointer */ 
-                                                    double *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuComplex *alpha, /* host or device pointer */ 
-                                                    const cuComplex *A,
-                                                    int lda,
-                                                    const cuComplex *B,
-                                                    int ldb,
-                                                    const cuComplex *beta, /* host or device pointer */ 
-                                                    cuComplex *C, 
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo, 
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                    const cuDoubleComplex *A,
-                                                    int lda,
-                                                    const cuDoubleComplex *B,
-                                                    int ldb,
-                                                    const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                    cuDoubleComplex *C, 
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuComplex *alpha, /* host or device pointer */ 
-                                                    const cuComplex *A,
-                                                    int lda,
-                                                    const cuComplex *B,
-                                                    int ldb,
-                                                    const float *beta, /* host or device pointer */ 
-                                                    cuComplex *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCherkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const float *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZherkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                    const cuDoubleComplex *A,
-                                                    int lda,
-                                                    const cuDoubleComplex *B,
-                                                    int ldb,
-                                                    const double *beta, /* host or device pointer */ 
-                                                    cuDoubleComplex *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZherkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const double *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZherkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *B,
-                                                      int ldb,
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m, 
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *B,
-                                                      int ldb,
-                                                      const double *beta, /* host or device pointer */  
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChemm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasChemm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhemm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZhemm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrsm_v2 (cublasHandle_t handle, 
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda,
-                                                      float *B,
-                                                      int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const float *alpha, /* host or device pointer */
+    const float *A, int lda, float *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const float *, const float *, int, float *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrsm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *B,
-                                                      int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const double *alpha, /* host or device pointer */
+    const double *A, int lda, double *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const double *, const double *, int, double *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle,
-                                                     cublasSideMode_t side,
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuComplex *alpha, /* host or device pointer */  
-                                                     const cuComplex *A,
-                                                     int lda,
-                                                     cuComplex *B,
-                                                     int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, cuComplex *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, 
-                                                     cublasSideMode_t side,
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *A,                                        
-                                                     int lda,
-                                                     cuDoubleComplex *B,
-                                                     int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrmm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda, 
-                                                      const float *B,
-                                                      int ldb,
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, const float *, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb, float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const float *, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrmm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *B,
-                                                      int ldb,
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, const double *, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb, double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const double *, const double *, int,
+      const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle,
-                                                     cublasSideMode_t side,
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuComplex *alpha, /* host or device pointer */  
-                                                     const cuComplex *A,
-                                                     int lda,
-                                                     const cuComplex *B,
-                                                     int ldb,
-                                                     cuComplex *C,
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C,
+    int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, 
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *A,
-                                                     int lda,
-                                                     const cuDoubleComplex *B,
-                                                     int ldb,
-                                                     cuDoubleComplex *C,
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasSgemmBatched(
@@ -3079,7 +2712,8 @@ cublasStatus_t CUBLASWINAPI cublasSgemmBatched(
       const float *, float *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasDgemmBatched(
@@ -3094,7 +2728,8 @@ cublasStatus_t CUBLASWINAPI cublasDgemmBatched(
       const double *, double *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasCgemmBatched(
@@ -3110,7 +2745,8 @@ cublasStatus_t CUBLASWINAPI cublasCgemmBatched(
       int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched(
@@ -3126,7 +2762,8 @@ cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched(
       int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3mBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI
@@ -3144,7 +2781,8 @@ cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa,
       cuDoubleComplex *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx(
@@ -3188,200 +2826,155 @@ cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx(
                   batchCount, computeType, algo);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const float *alpha,  /* host or device pointer */
-                                                                 const float *A,
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const float *B,
-                                                                 int ldb,
-                                                                 long long int strideB,
-                                                                 const float *beta,   /* host or device pointer */
-                                                                 float *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const float *, int, long long, const float *, int, long long, const float *, float *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha,        /* host or device pointer */
+    const float *A, int lda, long long int strideA, /* purposely signed */
+    const float *B, int ldb, long long int strideB,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const float *, int, long long, const float *, int,
+      long long, const float *, float *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const double *alpha,  /* host or device pointer */
-                                                                 const double *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const double *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const double *beta,   /* host or device pointer */
-                                                                 double *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const double *, const double *, int, long long, const double *, int, long long, const double *, double *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, long long int strideA, /* purposely signed */
+    const double *B, int ldb, long long int strideB,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const double *, const double *, int, long long, const double *, int,
+      long long, const double *, double *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const cuComplex *alpha,  /* host or device pointer */
-                                                                 const cuComplex *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const cuComplex *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const cuComplex *beta,   /* host or device pointer */
-                                                                 cuComplex *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, long long, const cuComplex *, int, long long, const cuComplex *, cuComplex *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, long long int strideA, /* purposely signed */
+    const cuComplex *B, int ldb, long long int strideB,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, long long, const cuComplex *,
+      int, long long, const cuComplex *, cuComplex *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const cuComplex *alpha,  /* host or device pointer */
-                                                                 const cuComplex *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const cuComplex *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const cuComplex *beta,   /* host or device pointer */
-                                                                 cuComplex *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, long long, const cuComplex *, int, long long, const cuComplex *, cuComplex *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, long long int strideA, /* purposely signed */
+    const cuComplex *B, int ldb, long long int strideB,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, long long, const cuComplex *,
+      int, long long, const cuComplex *, cuComplex *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3mStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const cuDoubleComplex *alpha,  /* host or device pointer */
-                                                                 const cuDoubleComplex *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const cuDoubleComplex *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const cuDoubleComplex *beta,   /* host or device poi */
-                                                                 cuDoubleComplex *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, long long, const cuDoubleComplex *, cuDoubleComplex *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    long long int strideA, /* purposely signed */
+    const cuDoubleComplex *B, int ldb, long long int strideB,
+    const cuDoubleComplex *beta, /* host or device poi */
+    cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int, long long,
+      const cuDoubleComplex *, int, long long, const cuDoubleComplex *,
+      cuDoubleComplex *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const float *alpha, /* host or device pointer */ 
-                                                  const float *A, 
-                                                  int lda,
-                                                  const float *beta , /* host or device pointer */ 
-                                                  const float *B, 
-                                                  int ldb,
-                                                  float *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const float *alpha,           /* host or device pointer */
+    const float *A, int lda, const float *beta, /* host or device pointer */
+    const float *B, int ldb, float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, const float *, int,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const double *alpha, /* host or device pointer */ 
-                                                  const double *A, 
-                                                  int lda,
-                                                  const double *beta, /* host or device pointer */ 
-                                                  const double *B, 
-                                                  int ldb,
-                                                  double *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const double *alpha,            /* host or device pointer */
+    const double *A, int lda, const double *beta, /* host or device pointer */
+    const double *B, int ldb, double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, const double *, int,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const cuComplex *alpha, /* host or device pointer */ 
-                                                  const cuComplex *A, 
-                                                  int lda,
-                                                  const cuComplex *beta, /* host or device pointer */  
-                                                  const cuComplex *B, 
-                                                  int ldb,
-                                                  cuComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda,
+    const cuComplex *beta, /* host or device pointer */
+    const cuComplex *B, int ldb, cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                  const cuDoubleComplex *A, 
-                                                  int lda,
-                                                  const cuDoubleComplex *beta, /* host or device pointer */  
-                                                  const cuDoubleComplex *B, 
-                                                  int ldb,
-                                                  cuDoubleComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(
@@ -3494,7 +3087,8 @@ cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(
       const int *, float *const[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(
@@ -3506,7 +3100,8 @@ cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(
       const int *, double *const[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(
@@ -3518,7 +3113,8 @@ cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(
       int, const int *, cuComplex *const[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(
@@ -3531,7 +3127,8 @@ cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(
       cuDoubleComplex *const[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI cublasStrsmBatched(
@@ -3546,7 +3143,8 @@ cublasStatus_t CUBLASWINAPI cublasStrsmBatched(
       float *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(
@@ -3561,7 +3159,8 @@ cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(
       double *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(
@@ -3576,7 +3175,8 @@ cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(
       int, cuComplex *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(
@@ -3591,7 +3191,8 @@ cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(
       const cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
 cublasStatus_t CUBLASWINAPI cublasSmatinvBatched(
@@ -3710,7 +3311,8 @@ cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
       float *const[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI
@@ -3724,7 +3326,8 @@ cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
       double *const[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI
@@ -3737,7 +3340,8 @@ cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
       cuComplex *const[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI
@@ -3751,1467 +3355,1666 @@ cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
       int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const float *A, 
-                                                  int lda,
-                                                  const float *x, 
-                                                  int incx,
-                                                  float *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const float *, int, const float *, int, float *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const float *A, int lda, const float *x,
+                                        int incx, float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const double *A, 
-                                                  int lda,
-                                                  const double *x, 
-                                                  int incx,
-                                                  double *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const double *, int, const double *, int, double *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const double *A, int lda,
+                                        const double *x, int incx, double *C,
+                                        int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const double *, int,
+      const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const cuComplex *A, 
-                                                  int lda,
-                                                  const cuComplex *x, 
-                                                  int incx,
-                                                  cuComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const cuComplex *A, int lda,
+                                        const cuComplex *x, int incx,
+                                        cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const cuDoubleComplex *A, 
-                                                  int lda,
-                                                  const cuDoubleComplex *x, 
-                                                  int incx,
-                                                  cuDoubleComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const cuDoubleComplex *A, int lda,
+                                        const cuDoubleComplex *x, int incx,
+                                        cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const float *AP,
-                                                     float *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const float *AP, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const double *AP,
-                                                     double *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const double *AP, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuComplex *AP,
-                                                     cuComplex *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuComplex *AP, cuComplex *A,
+                                         int lda) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int,
+                                     const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuDoubleComplex *AP,
-                                                     cuDoubleComplex *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuDoubleComplex *AP,
+                                         cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const float *A,
-                                                     int lda,
-                                                     float *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const float *A, int lda, float *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const double *A,
-                                                     int lda,
-                                                     double *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const double *A, int lda, double *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuComplex *A,
-                                                     int lda,
-                                                     cuComplex *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuComplex *A, int lda,
+                                         cuComplex *AP) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int,
+                                     const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuDoubleComplex *A,
-                                                     int lda,
-                                                     cuDoubleComplex *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuDoubleComplex *A, int lda,
+                                         cuDoubleComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus CUBLASWINAPI cublasInit (void) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)();
+cublasStatus CUBLASWINAPI cublasInit(void) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasInit");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr();
 }
 
-cublasStatus CUBLASWINAPI cublasShutdown (void) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)();
+cublasStatus CUBLASWINAPI cublasShutdown(void) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasShutdown");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr();
 }
 
-cublasStatus CUBLASWINAPI cublasGetError (void) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)();
+cublasStatus CUBLASWINAPI cublasGetError(void) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr();
 }
 
 cublasStatus CUBLASWINAPI cublasGetVersion(int *version) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int *);
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVersion");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(version);
 }
 
-cublasStatus CUBLASWINAPI cublasAlloc (int n, int elemSize, void **devicePtr) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, void **);
+cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasAlloc");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, devicePtr);
 }
 
-cublasStatus CUBLASWINAPI cublasFree (void *devicePtr) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(void *);
+cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasFree");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(devicePtr);
 }
 
-cublasStatus CUBLASWINAPI cublasSetKernelStream (cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cudaStream_t);
+cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetKernelStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stream);
 }
 
-float CUBLASWINAPI cublasSnrm2 (int n, const float *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const float *, int);
+float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSnrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDnrm2 (int n, const double *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const double *, int);
+double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDnrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasScnrm2 (int n, const cuComplex *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const cuComplex *, int);
+float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScnrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDznrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasSdot (int n, const float *x, int incx, const float *y, 
-                               int incy) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const float *, int, const float *, int);
+float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y,
+                              int incy) {
+  using FuncPtr =
+      float(CUBLASWINAPI *)(int, const float *, int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSdot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSdot");
   return func_ptr(n, x, incx, y, incy);
 }
 
-double CUBLASWINAPI cublasDdot (int n, const double *x, int incx, const double *y, 
-                               int incy) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const double *, int, const double *, int);
+double CUBLASWINAPI cublasDdot(int n, const double *x, int incx,
+                               const double *y, int incy) {
+  using FuncPtr =
+      double(CUBLASWINAPI *)(int, const double *, int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDdot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDdot");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuComplex CUBLASWINAPI cublasCdotu (int n, const cuComplex *x, int incx, const cuComplex *y, 
-                               int incy) {
-  using FuncPtr = cuComplex (CUBLASWINAPI *)(int, const cuComplex *, int, const cuComplex *, int);
+cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx,
+                                   const cuComplex *y, int incy) {
+  using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int,
+                                            const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotu");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuComplex CUBLASWINAPI cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y, 
-                               int incy) {
-  using FuncPtr = cuComplex (CUBLASWINAPI *)(int, const cuComplex *, int, const cuComplex *, int);
+cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx,
+                                   const cuComplex *y, int incy) {
+  using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int,
+                                            const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuDoubleComplex CUBLASWINAPI cublasZdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, 
-                               int incy) {
-  using FuncPtr = cuDoubleComplex (CUBLASWINAPI *)(int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
+cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x,
+                                         int incx, const cuDoubleComplex *y,
+                                         int incy) {
+  using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)(
+      int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotu");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuDoubleComplex CUBLASWINAPI cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, 
-                               int incy) {
-  using FuncPtr = cuDoubleComplex (CUBLASWINAPI *)(int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
+cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x,
+                                         int incx, const cuDoubleComplex *y,
+                                         int incy) {
+  using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)(
+      int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasSscal (int n, float alpha, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float, float *, int);
+void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasDscal (int n, double alpha, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double, double *, int);
+void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x,
+                              int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasCsscal (int n, float alpha, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float, cuComplex *, int);
+void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x,
+                               int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasSaxpy (int n, float alpha, const float *x, int incx, 
-                               float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx,
+                              float *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, float, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasDaxpy (int n, double alpha, const double *x, 
-                               int incx, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx,
+                              double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, double, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasCaxpy (int n, cuComplex alpha, const cuComplex *x, 
-                               int incx, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x,
+                              int incx, cuComplex *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x, 
-                               int incx, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int,
+                           cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasScopy (int n, const float *x, int incx, float *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const float *, int, float *, int);
+void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasScopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasDcopy (int n, const double *x, int incx, double *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDcopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y,
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCcopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y,
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int,
+                                       cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZcopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasSswap (int n, float *x, int incx, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float *, int, float *, int);
+void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasDswap (int n, double *x, int incx, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double *, int, double *, int);
+void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-int CUBLASWINAPI cublasIsamax (int n, const float *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const float *, int);
+int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIdamax (int n, const double *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const double *, int);
+int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIcamax (int n, const cuComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuComplex *, int);
+int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIsamin (int n, const float *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const float *, int);
+int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIdamin (int n, const double *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const double *, int);
+int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIcamin (int n, const cuComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuComplex *, int);
+int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasSasum (int n, const float *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const float *, int);
+float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSasum");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDasum (int n, const double *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const double *, int);
+double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDasum");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasScasum (int n, const cuComplex *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const cuComplex *, int);
+float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasScasum");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDzasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum");
   return func_ptr(n, x, incx);
 }
 
-void CUBLASWINAPI cublasSrot (int n, float *x, int incx, float *y, int incy, 
-                              float sc, float ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float *, int, float *, int, float, float);
+void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy,
+                             float sc, float ss) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrot");
   return func_ptr(n, x, incx, y, incy, sc, ss);
 }
 
-void CUBLASWINAPI cublasDrot (int n, double *x, int incx, double *y, int incy, 
-                              double sc, double ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double *, int, double *, int, double, double);
+void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy,
+                             double sc, double ss) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrot");
   return func_ptr(n, x, incx, y, incy, sc, ss);
 }
 
-void CUBLASWINAPI cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, 
-                              int incy, float c, cuComplex s) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, float, cuComplex);
+void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y,
+                             int incy, float c, cuComplex s) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int,
+                                       float, cuComplex);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCrot");
   return func_ptr(n, x, incx, y, incy, c, s);
 }
 
-void CUBLASWINAPI cublasZrot (int n, cuDoubleComplex *x, int incx, 
-                              cuDoubleComplex *y, int incy, double sc, 
-                              cuDoubleComplex cs) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, double, cuDoubleComplex);
+void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx,
+                             cuDoubleComplex *y, int incy, double sc,
+                             cuDoubleComplex cs) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int,
+                           double, cuDoubleComplex);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZrot");
   return func_ptr(n, x, incx, y, incy, sc, cs);
 }
 
-void CUBLASWINAPI cublasCsrot (int n, cuComplex *x, int incx, cuComplex *y,
-                               int incy, float c, float s) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, float, float);
+void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y,
+                              int incy, float c, float s) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int,
+                                       float, float);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot");
   return func_ptr(n, x, incx, y, incy, c, s);
 }
 
-void CUBLASWINAPI cublasZdrot (int n, cuDoubleComplex *x, int incx, 
-                               cuDoubleComplex *y, int incy, double c, double s) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, double, double);
+void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy, double c,
+                              double s) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int,
+                                       cuDoubleComplex *, int, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot");
   return func_ptr(n, x, incx, y, incy, c, s);
 }
 
-void CUBLASWINAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(float *, float *, float *, float *);
+void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) {
+  using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg");
   return func_ptr(sa, sb, sc, ss);
 }
 
-void CUBLASWINAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(double *, double *, double *, double *);
+void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) {
+  using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg");
   return func_ptr(sa, sb, sc, ss);
 }
 
-void CUBLASWINAPI cublasCrotg (cuComplex *ca, cuComplex cb, float *sc,
-                               cuComplex *cs) {
-  using FuncPtr = void (CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *);
+void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc,
+                              cuComplex *cs) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg");
   return func_ptr(ca, cb, sc, cs);
 }
 
-void CUBLASWINAPI cublasZrotg (cuDoubleComplex *ca, cuDoubleComplex cb, double *sc,
-                               cuDoubleComplex *cs) {
-  using FuncPtr = void (CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, double *, cuDoubleComplex *);
+void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb,
+                              double *sc, cuDoubleComplex *cs) {
+  using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex,
+                                       double *, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg");
   return func_ptr(ca, cb, sc, cs);
 }
 
-void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, 
-                              const float* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float *, int, float *, int, const float *);
+void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy,
+                              const float *sparam) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm");
   return func_ptr(n, x, incx, y, incy, sparam);
 }
 
-void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, 
-                              const double* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double *, int, double *, int, const double *);
+void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy,
+                              const double *sparam) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm");
   return func_ptr(n, x, incx, y, incy, sparam);
 }
 
-void CUBLASWINAPI cublasSrotmg (float *sd1, float *sd2, float *sx1, 
-                                const float *sy1, float* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(float *, float *, float *, const float *, float *);
+void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1,
+                               const float *sy1, float *sparam) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotmg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg");
   return func_ptr(sd1, sd2, sx1, sy1, sparam);
 }
 
-void CUBLASWINAPI cublasDrotmg (double *sd1, double *sd2, double *sx1, 
-                                const double *sy1, double* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(double *, double *, double *, const double *, double *);
+void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1,
+                               const double *sy1, double *sparam) {
+  using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *,
+                                       const double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotmg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg");
   return func_ptr(sd1, sd2, sx1, sy1, sparam);
 }
 
-void CUBLASWINAPI cublasSgemv (char trans, int m, int n, float alpha,
-                               const float *A, int lda, const float *x, int incx,
-                               float beta, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha,
+                              const float *A, int lda, const float *x, int incx,
+                              float beta, float *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDgemv (char trans, int m, int n, double alpha,
-                               const double *A, int lda, const double *x, int incx,
-                               double beta, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha,
+                              const double *A, int lda, const double *x,
+                              int incx, double beta, double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasCgemv (char trans, int m, int n, cuComplex alpha,
-                               const cuComplex *A, int lda, const cuComplex *x, int incx,
-                               cuComplex beta, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha,
+                              const cuComplex *A, int lda, const cuComplex *x,
+                              int incx, cuComplex beta, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
-                               cuDoubleComplex beta, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSgbmv (char trans, int m, int n, int kl, int ku, 
-                               float alpha, const float *A, int lda, 
-                               const float *x, int incx, float beta, float *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku,
+                              float alpha, const float *A, int lda,
+                              const float *x, int incx, float beta, float *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDgbmv (char trans, int m, int n, int kl, int ku, 
-                               double alpha, const double *A, int lda, 
-                               const double *x, int incx, double beta, double *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku,
+                              double alpha, const double *A, int lda,
+                              const double *x, int incx, double beta, double *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *,
+                           int, const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasCgbmv (char trans, int m, int n, int kl, int ku, 
-                               cuComplex alpha, const cuComplex *A, int lda, 
-                               const cuComplex *x, int incx, cuComplex beta, cuComplex *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *x, int incx, cuComplex beta,
+                              cuComplex *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZgbmv (char trans, int m, int n, int kl, int ku, 
-                               cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, 
-                               const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasStrmv (char uplo, char trans, char diag, int n, 
-                               const float *A, int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *,
+                                       int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtrmv (char uplo, char trans, char diag, int n, 
-                               const double *A, int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtrmv (char uplo, char trans, char diag, int n, 
-                               const cuComplex *A, int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtrmv (char uplo, char trans, char diag, int n, 
-                               const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int,
+                           cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasStbmv (char uplo, char trans, char diag, int n, int k, 
-                               const float *A, int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtbmv (char uplo, char trans, char diag, int n, int k, 
-                               const double *A, int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k, 
-                               const cuComplex *A, int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtbmv (char uplo, char trans, char diag, int n, int k, 
-                               const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *,
+                           int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float *AP, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
+void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n,
+                              const float *AP, float *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, double *, int);
+void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n,
+                              const double *AP, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, cuComplex *, int);
+void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n,
+                              const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *AP, cuDoubleComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float *A, int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *,
+                                       int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double *A, int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex *A, int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *A, int lda, 
+void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *A, int lda,
                               cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int,
+                           cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float *AP, 
-                              float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
+void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n,
+                              const float *AP, float *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, double *, int);
+void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n,
+                              const double *AP, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, cuComplex *, int);
+void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n,
+                              const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, 
-                              cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *AP, cuDoubleComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasStbsv(char uplo, char trans, 
-                              char diag, int n, int k, const float *A, 
-                              int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtbsv(char uplo, char trans, 
-                              char diag, int n, int k, const double *A, 
-                              int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtbsv(char uplo, char trans, 
-                              char diag, int n, int k, const cuComplex *A, 
-                              int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtbsv(char uplo, char trans, 
-                              char diag, int n, int k, const cuDoubleComplex *A, 
-                              int lda, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *,
+                           int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasSsymv (char uplo, int n, float alpha, const float *A,
-                               int lda, const float *x, int incx, float beta, 
-                               float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A,
+                              int lda, const float *x, int incx, float beta,
+                              float *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int,
+                                       const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDsymv (char uplo, int n, double alpha, const double *A,
-                               int lda, const double *x, int incx, double beta, 
-                               double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A,
+                              int lda, const double *x, int incx, double beta,
+                              double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A,
-                               int lda, const cuComplex *x, int incx, cuComplex beta, 
-                               cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha,
+                              const cuComplex *A, int lda, const cuComplex *x,
+                              int incx, cuComplex beta, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChemv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A,
-                               int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta, 
-                               cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSsbmv (char uplo, int n, int k, float alpha, 
-                               const float *A, int lda, const float *x, int incx, 
-                               float beta, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha,
+                              const float *A, int lda, const float *x, int incx,
+                              float beta, float *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDsbmv (char uplo, int n, int k, double alpha, 
-                               const double *A, int lda, const double *x, int incx, 
-                               double beta, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha,
+                              const double *A, int lda, const double *x,
+                              int incx, double beta, double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha, 
-                               const cuComplex *A, int lda, const cuComplex *x, int incx, 
-                               cuComplex beta, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha,
+                              const cuComplex *A, int lda, const cuComplex *x,
+                              int incx, cuComplex beta, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, 
-                               const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, 
-                               cuDoubleComplex beta, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha,
-                              const float *AP, const float *x,
-                              int incx, float beta, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP,
+                              const float *x, int incx, float beta, float *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *,
+                                       const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha,
-                              const double *AP, const double *x,
-                              int incx, double beta, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP,
+                              const double *x, int incx, double beta, double *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, double, const double *, const double *,
+                           int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
 void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha,
-                              const cuComplex *AP, const cuComplex *x,
-                              int incx, cuComplex beta, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, const cuComplex *, int, cuComplex, cuComplex *, int);
+                              const cuComplex *AP, const cuComplex *x, int incx,
+                              cuComplex beta, cuComplex *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
 void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha,
-                              const cuDoubleComplex *AP, const cuDoubleComplex *x,
-                              int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+                              const cuDoubleComplex *AP,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSger (int m, int n, float alpha, const float *x, int incx,
-                              const float *y, int incy, float *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, float, const float *, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x,
+                             int incx, const float *y, int incy, float *A,
+                             int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSger");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSger");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasDger (int m, int n, double alpha, const double *x, int incx,
-                              const double *y, int incy, double *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, double, const double *, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x,
+                             int incx, const double *y, int incy, double *A,
+                             int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDger");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDger");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x,
-                               int incx, const cuComplex *y, int incy,
-                               cuComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x,
+                              int incx, const cuComplex *y, int incy,
+                              cuComplex *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeru");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x,
-                               int incx, const cuComplex *y, int incy,
-                               cuComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x,
+                              int incx, const cuComplex *y, int incy,
+                              cuComplex *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgerc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
-                               int incx, const cuDoubleComplex *y, int incy,
-                               cuDoubleComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeru");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
-                               int incx, const cuDoubleComplex *y, int incy,
-                               cuDoubleComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgerc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasSsyr (char uplo, int n, float alpha, const float *x,
-                              int incx, float *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x,
+                             int incx, float *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasDsyr (char uplo, int n, double alpha, const double *x,
-                              int incx, double *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x,
+                             int incx, double *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                                       double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasCher (char uplo, int n, float alpha, 
-                              const cuComplex *x, int incx, cuComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x,
+                             int incx, cuComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCher");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasZher (char uplo, int n, double alpha, 
-                              const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZher(char uplo, int n, double alpha,
+                             const cuDoubleComplex *x, int incx,
+                             cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZher");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasSspr (char uplo, int n, float alpha, const float *x,
-                              int incx, float *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, float *);
+void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x,
+                             int incx, float *AP) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, float, const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSspr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasDspr (char uplo, int n, double alpha, const double *x,
-                              int incx, double *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, double *);
+void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x,
+                             int incx, double *AP) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, double, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDspr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasChpr (char uplo, int n, float alpha, const cuComplex *x,
-                              int incx, cuComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const cuComplex *, int, cuComplex *);
+void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x,
+                             int incx, cuComplex *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int,
+                                       cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChpr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x,
-                              int incx, cuDoubleComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *);
+void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha,
+                             const cuDoubleComplex *x, int incx,
+                             cuDoubleComplex *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasSsyr2 (char uplo, int n, float alpha, const float *x, 
-                               int incx, const float *y, int incy, float *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x,
+                              int incx, const float *y, int incy, float *A,
+                              int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasDsyr2 (char uplo, int n, double alpha, const double *x, 
-                               int incx, const double *y, int incy, double *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x,
+                              int incx, const double *y, int incy, double *A,
+                              int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x, 
-                               int incx, const cuComplex *y, int incy, cuComplex *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha,
+                              const cuComplex *x, int incx, const cuComplex *y,
+                              int incy, cuComplex *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCher2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, 
-                               int incx, const cuDoubleComplex *y, int incy, cuDoubleComplex *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZher2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasSspr2 (char uplo, int n, float alpha, const float *x, 
-                               int incx, const float *y, int incy, float *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, const float *, int, float *);
+void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x,
+                              int incx, const float *y, int incy, float *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int,
+                                       const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasDspr2 (char uplo, int n, double alpha,
-                               const double *x, int incx, const double *y,
-                               int incy, double *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, const double *, int, double *);
+void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x,
+                              int incx, const double *y, int incy, double *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                                       const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasChpr2 (char uplo, int n, cuComplex alpha,
-                               const cuComplex *x, int incx, const cuComplex *y,
-                               int incy, cuComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha,
+                              const cuComplex *x, int incx, const cuComplex *y,
+                              int incy, cuComplex *AP) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
-                               int incy, cuDoubleComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasSgemm (char transa, char transb, int m, int n, int k, 
-                               float alpha, const float *A, int lda, 
-                               const float *B, int ldb, float beta, float *C, 
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k,
+                              float alpha, const float *A, int lda,
+                              const float *B, int ldb, float beta, float *C,
+                              int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDgemm (char transa, char transb, int m, int n, int k,
-                               double alpha, const double *A, int lda, 
-                               const double *B, int ldb, double beta, double *C, 
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k,
+                              double alpha, const double *A, int lda,
+                              const double *B, int ldb, double beta, double *C,
+                              int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *,
+                           int, const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCgemm (char transa, char transb, int m, int n, int k, 
-                               cuComplex alpha, const cuComplex *A, int lda,
-                               const cuComplex *B, int ldb, cuComplex beta,
-                               cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *B, int ldb, cuComplex beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZgemm (char transa, char transb, int m, int n,
-                               int k, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda,
-                               const cuDoubleComplex *B, int ldb,
-                               cuDoubleComplex beta, cuDoubleComplex *C,
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *B, int ldb,
+                              cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasSsyrk (char uplo, char trans, int n, int k, float alpha, 
-                               const float *A, int lda, float beta, float *C, 
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha,
+                              const float *A, int lda, float beta, float *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float,
+                                       const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDsyrk (char uplo, char trans, int n, int k,
-                               double alpha, const double *A, int lda,
-                               double beta, double *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha,
+                              const double *A, int lda, double beta, double *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, double, const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCsyrk (char uplo, char trans, int n, int k,
-                               cuComplex alpha, const cuComplex *A, int lda,
-                               cuComplex beta, cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              cuComplex beta, cuComplex *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *,
+                           int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZsyrk (char uplo, char trans, int n, int k,
-                               cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda,
-                               cuDoubleComplex beta,
-                               cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex,
+                                       const cuDoubleComplex *, int,
+                                       cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCherk (char uplo, char trans, int n, int k,
-                               float alpha, const cuComplex *A, int lda,
-                               float beta, cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, float, cuComplex *, int);
+void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha,
+                              const cuComplex *A, int lda, float beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int,
+                           float, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCherk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZherk (char uplo, char trans, int n, int k,
-                               double alpha,
-                               const cuDoubleComplex *A, int lda,
-                               double beta,
-                               cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const cuDoubleComplex *, int, double, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha,
+                              const cuDoubleComplex *A, int lda, double beta,
+                              cuDoubleComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double,
+                                       const cuDoubleComplex *, int, double,
+                                       cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZherk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZherk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasSsyr2k (char uplo, char trans, int n, int k, float alpha, 
-                                const float *A, int lda, const float *B, int ldb, 
-                                float beta, float *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha,
+                               const float *A, int lda, const float *B, int ldb,
+                               float beta, float *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDsyr2k (char uplo, char trans, int n, int k,
-                                double alpha, const double *A, int lda,
-                                const double *B, int ldb, double beta,
-                                double *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k,
+                               double alpha, const double *A, int lda,
+                               const double *B, int ldb, double beta, double *C,
+                               int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCsyr2k (char uplo, char trans, int n, int k,
-                                cuComplex alpha, const cuComplex *A, int lda,
-                                const cuComplex *B, int ldb, cuComplex beta,
-                                cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k,
+                               cuComplex alpha, const cuComplex *A, int lda,
+                               const cuComplex *B, int ldb, cuComplex beta,
+                               cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZsyr2k (char uplo, char trans, int n, int k,
-                                cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
-                                const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
-                                cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k,
+                               cuDoubleComplex alpha, const cuDoubleComplex *A,
+                               int lda, const cuDoubleComplex *B, int ldb,
+                               cuDoubleComplex beta, cuDoubleComplex *C,
+                               int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCher2k (char uplo, char trans, int n, int k,
-                                cuComplex alpha, const cuComplex *A, int lda,
-                                const cuComplex *B, int ldb, float beta,
-                                cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, float, cuComplex *, int);
+void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k,
+                               cuComplex alpha, const cuComplex *A, int lda,
+                               const cuComplex *B, int ldb, float beta,
+                               cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, float, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZher2k (char uplo, char trans, int n, int k,
-                                cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
-                                const cuDoubleComplex *B, int ldb, double beta,
-                                cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, double, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k,
+                               cuDoubleComplex alpha, const cuDoubleComplex *A,
+                               int lda, const cuDoubleComplex *B, int ldb,
+                               double beta, cuDoubleComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, double, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasSsymm (char side, char uplo, int m, int n, float alpha, 
-                               const float *A, int lda, const float *B, int ldb,
-                               float beta, float *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha,
+                              const float *A, int lda, const float *B, int ldb,
+                              float beta, float *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDsymm (char side, char uplo, int m, int n, double alpha, 
-                               const double *A, int lda, const double *B, int ldb,
-                               double beta, double *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha,
+                              const double *A, int lda, const double *B,
+                              int ldb, double beta, double *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha, 
-                               const cuComplex *A, int lda, const cuComplex *B, int ldb,
-                               cuComplex beta, cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *B, int ldb, cuComplex beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha, 
-                               const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
-                               cuDoubleComplex beta, cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *B, int ldb,
+                              cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasChemm (char side, char uplo, int m, int n,
-                               cuComplex alpha, const cuComplex *A, int lda,
-                               const cuComplex *B, int ldb, cuComplex beta,
-                               cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *B, int ldb, cuComplex beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChemm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZhemm (char side, char uplo, int m, int n,
-                               cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
-                               const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
-                               cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *B, int ldb,
+                              cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasStrsm (char side, char uplo, char transa, char diag,
-                               int m, int n, float alpha, const float *A, int lda,
-                               float *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, float alpha, const float *A,
+                              int lda, float *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasDtrsm (char side, char uplo, char transa,
-                               char diag, int m, int n, double alpha,
-                               const double *A, int lda, double *B,
-                               int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, double alpha, const double *A,
+                              int lda, double *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasCtrsm (char side, char uplo, char transa, char diag,
-                               int m, int n, cuComplex alpha, const cuComplex *A,
-                               int lda, cuComplex *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuComplex alpha, const cuComplex *A,
+                              int lda, cuComplex *B, int ldb) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasZtrsm (char side, char uplo, char transa,
-                               char diag, int m, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda,
-                               cuDoubleComplex *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int,
+                                       cuDoubleComplex, const cuDoubleComplex *,
+                                       int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasStrmm (char side, char uplo, char transa, char diag,
-                               int m, int n, float alpha, const float *A, int lda,
-                               float *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, float alpha, const float *A,
+                              int lda, float *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasDtrmm (char side, char uplo, char transa,
-                               char diag, int m, int n, double alpha,
-                               const double *A, int lda, double *B,
-                               int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, double alpha, const double *A,
+                              int lda, double *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasCtrmm (char side, char uplo, char transa, char diag,
-                               int m, int n, cuComplex alpha, const cuComplex *A,
-                               int lda, cuComplex *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuComplex alpha, const cuComplex *A,
+                              int lda, cuComplex *B, int ldb) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasZtrmm (char side, char uplo, char transa,
-                               char diag, int m, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda, cuDoubleComplex *B,
-                               int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int,
+                                       cuDoubleComplex, const cuDoubleComplex *,
+                                       int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
diff --git a/tensorflow/stream_executor/cuda/cublas_9_0.inc b/tensorflow/stream_executor/cuda/cublas_9_0.inc
index ba46426878f..5e716114b23 100644
--- a/tensorflow/stream_executor/cuda/cublas_9_0.inc
+++ b/tensorflow/stream_executor/cuda/cublas_9_0.inc
@@ -2,5120 +2,4814 @@
 
 extern "C" {
 
-cublasStatus_t CUBLASWINAPI cublasCreate_v2 (cublasHandle_t *handle) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t *);
+cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCreate_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDestroy_v2 (cublasHandle_t handle) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t);
+cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDestroy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, int *version) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int *);
+cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle,
+                                                int *version) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVersion_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, version);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(libraryPropertyType, int *);
+cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type,
+                                              int *value) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetStream_v2 (cublasHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle,
+                                               cudaStream_t streamId) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetStream_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetStream_v2 (cublasHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *);
+cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle,
+                                               cudaStream_t *streamId) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetStream_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t *mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *);
+cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle,
+                                                    cublasPointerMode_t *mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetPointerMode_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2 (cublasHandle_t handle, cublasPointerMode_t mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t);
+cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle,
+                                                    cublasPointerMode_t mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetPointerMode_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t *mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *);
+cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle,
+                                                 cublasAtomicsMode_t *mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetAtomicsMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, cublasAtomicsMode_t mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t);
+cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle,
+                                                 cublasAtomicsMode_t mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetAtomicsMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, cublasMath_t *mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *);
+cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle,
+                                              cublasMath_t *mode) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetMathMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasMath_t);
+cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle,
+                                              cublasMath_t mode) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetMathMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetVector (int n, int elemSize, const void *x, 
-                                             int incx, void *devicePtr, int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x,
+                                            int incx, void *devicePtr,
+                                            int incy) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetVector");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, x, incx, devicePtr, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetVector (int n, int elemSize, const void *x, 
-                                             int incx, void *y, int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x,
+                                            int incx, void *y, int incy) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVector");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetMatrix (int rows, int cols, int elemSize, 
-                                             const void *A, int lda, void *B, 
-                                             int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize,
+                                            const void *A, int lda, void *B,
+                                            int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *,
+                                                 int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetMatrix");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetMatrix (int rows, int cols, int elemSize, 
-                                             const void *A, int lda, void *B,
-                                             int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int);
+cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize,
+                                            const void *A, int lda, void *B,
+                                            int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *,
+                                                 int, void *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetMatrix");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetVectorAsync (int n, int elemSize, 
-                                                  const void *hostPtr, int incx, 
-                                                  void *devicePtr, int incy,
-                                                  cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize,
+                                                 const void *hostPtr, int incx,
+                                                 void *devicePtr, int incy,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int,
+                                                 void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetVectorAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetVectorAsync (int n, int elemSize,
-                                                  const void *devicePtr, int incx,
-                                                  void *hostPtr, int incy,
-                                                  cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize,
+                                                 const void *devicePtr,
+                                                 int incx, void *hostPtr,
+                                                 int incy,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int,
+                                                 void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVectorAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync (int rows, int cols, int elemSize,
-                                                  const void *A, int lda, void *B,
-                                                  int ldb, cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols,
+                                                 int elemSize, const void *A,
+                                                 int lda, void *B, int ldb,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      int, int, int, const void *, int, void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetMatrixAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync (int rows, int cols, int elemSize,
-                                                  const void *A, int lda, void *B,
-                                                  int ldb, cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, int, const void *, int, void *, int, cudaStream_t);
+cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols,
+                                                 int elemSize, const void *A,
+                                                 int lda, void *B, int ldb,
+                                                 cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      int, int, int, const void *, int, void *, int, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetMatrixAsync");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream);
 }
 
-void CUBLASWINAPI cublasXerbla (const char *srName, int info) {
-  using FuncPtr = void (CUBLASWINAPI *)(const char *, int);
+void CUBLASWINAPI cublasXerbla(const char *srName, int info) {
+  using FuncPtr = void(CUBLASWINAPI *)(const char *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasXerbla");
   if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla");
   return func_ptr(srName, info);
 }
 
-cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const void *x, 
-                                                     cudaDataType xType,
-                                                     int incx, 
-                                                     void *result,
-                                                     cudaDataType resultType,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, int, void *, cudaDataType, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n,
+                                         const void *x, cudaDataType xType,
+                                         int incx, void *result,
+                                         cudaDataType resultType,
+                                         cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, int, void *,
+      cudaDataType, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasNrm2Ex");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, xType, incx, result, resultType, executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const float *x, 
-                                                     int incx, 
-                                                     float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n,
+                                           const float *x, int incx,
+                                           float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSnrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const double *x, 
-                                                     int incx, 
-                                                     double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n,
+                                           const double *x, int incx,
+                                           double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDnrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScnrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDznrm2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDotEx (cublasHandle_t handle,
-                                                     int n, 
-                                                     const void *x,
-                                                     cudaDataType xType, 
-                                                     int incx, 
-                                                     const void *y, 
-                                                     cudaDataType yType,
-                                                     int incy,
-                                                     void *result,
-                                                     cudaDataType resultType,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, int, const void *, cudaDataType, int, void *, cudaDataType, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n,
+                                        const void *x, cudaDataType xType,
+                                        int incx, const void *y,
+                                        cudaDataType yType, int incy,
+                                        void *result, cudaDataType resultType,
+                                        cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, void *, cudaDataType, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDotEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, executionType);
+  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType,
+                  executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDotcEx (cublasHandle_t handle,
-                                                     int n, 
-                                                     const void *x,
-                                                     cudaDataType xType, 
-                                                     int incx, 
-                                                     const void *y, 
-                                                     cudaDataType yType,
-                                                     int incy,
-                                                     void *result,
-                                                     cudaDataType resultType,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, int, const void *, cudaDataType, int, void *, cudaDataType, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n,
+                                         const void *x, cudaDataType xType,
+                                         int incx, const void *y,
+                                         cudaDataType yType, int incy,
+                                         void *result, cudaDataType resultType,
+                                         cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, void *, cudaDataType, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDotcEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, executionType);
+  return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType,
+                  executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSdot_v2 (cublasHandle_t handle,
-                                                     int n, 
-                                                     const float *x, 
-                                                     int incx, 
-                                                     const float *y, 
-                                                     int incy,
-                                                     float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n,
+                                          const float *x, int incx,
+                                          const float *y, int incy,
+                                          float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, int, const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSdot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDdot_v2 (cublasHandle_t handle,
-                                                     int n, 
-                                                     const double *x, 
-                                                     int incx, 
-                                                     const double *y,
-                                                     int incy,
-                                                     double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n,
+                                          const double *x, int incx,
+                                          const double *y, int incy,
+                                          double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, int, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDdot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCdotu_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      const cuComplex *y, 
-                                                      int incy,
-                                                      cuComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n,
+                                           const cuComplex *x, int incx,
+                                           const cuComplex *y, int incy,
+                                           cuComplex *result) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *,
+                                     int, const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotu_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCdotc_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      const cuComplex *y, 
-                                                      int incy,
-                                                      cuComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n,
+                                           const cuComplex *x, int incx,
+                                           const cuComplex *y, int incy,
+                                           cuComplex *result) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *,
+                                     int, const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdotu_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      const cuDoubleComplex *y, 
-                                                      int incy,
-                                                      cuDoubleComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n,
+                                           const cuDoubleComplex *x, int incx,
+                                           const cuDoubleComplex *y, int incy,
+                                           cuDoubleComplex *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotu_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdotc_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx,
-                                                      const cuDoubleComplex *y, 
-                                                      int incy,
-                                                      cuDoubleComplex *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n,
+                                           const cuDoubleComplex *x, int incx,
+                                           const cuDoubleComplex *y, int incy,
+                                           cuDoubleComplex *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScalEx(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const void *alpha,  /* host or device pointer */
-                                                     cudaDataType alphaType,
-                                                     void *x, 
-                                                     cudaDataType xType,
-                                                     int incx,
-                                                     cudaDataType executionType) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, int, cudaDataType);
+cublasStatus_t CUBLASWINAPI
+cublasScalEx(cublasHandle_t handle, int n,
+             const void *alpha, /* host or device pointer */
+             cudaDataType alphaType, void *x, cudaDataType xType, int incx,
+             cudaDataType executionType) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType,
+      int, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScalEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const float *alpha,  /* host or device pointer */
-                                                     float *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSscal_v2(cublasHandle_t handle, int n,
+               const float *alpha, /* host or device pointer */
+               float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const double *alpha,  /* host or device pointer */
-                                                     double *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDscal_v2(cublasHandle_t handle, int n,
+               const double *alpha, /* host or device pointer */
+               double *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const cuComplex *alpha, /* host or device pointer */
-                                                     cuComplex *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCscal_v2(cublasHandle_t handle, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsscal_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsscal_v2(cublasHandle_t handle, int n,
+                const float *alpha, /* host or device pointer */
+                cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZscal_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */
-                                                     cuDoubleComplex *x, 
-                                                     int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZscal_v2(cublasHandle_t handle, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdscal_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const double *alpha, /* host or device pointer */
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZdscal_v2(cublasHandle_t handle, int n,
+                const double *alpha, /* host or device pointer */
+                cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdscal_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasAxpyEx (cublasHandle_t handle,
-                                                      int n,
-                                                      const void *alpha, /* host or device pointer */
-                                                      cudaDataType alphaType,
-                                                      const void *x,
-                                                      cudaDataType xType,
-                                                      int incx,
-                                                      void *y,
-                                                      cudaDataType yType,
-                                                      int incy,
-                                                      cudaDataType executiontype) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const void *, cudaDataType, const void *, cudaDataType, int, void *, cudaDataType, int, cudaDataType);
+cublasStatus_t CUBLASWINAPI cublasAxpyEx(
+    cublasHandle_t handle, int n,
+    const void *alpha, /* host or device pointer */
+    cudaDataType alphaType, const void *x, cudaDataType xType, int incx,
+    void *y, cudaDataType yType, int incy, cudaDataType executiontype) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const void *, cudaDataType, const void *,
+      cudaDataType, int, void *, cudaDataType, int, cudaDataType);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasAxpyEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, executiontype);
+  return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy,
+                  executiontype);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */
-                                                      const float *x, 
-                                                      int incx, 
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSaxpy_v2(cublasHandle_t handle, int n,
+               const float *alpha, /* host or device pointer */
+               const float *x, int incx, float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const double *alpha, /* host or device pointer */
-                                                      const double *x, 
-                                                      int incx, 
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDaxpy_v2(cublasHandle_t handle, int n,
+               const double *alpha, /* host or device pointer */
+               const double *x, int incx, double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *alpha, /* host or device pointer */
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      cuComplex *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCaxpy_v2(cublasHandle_t handle, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, cuComplex *y, int incy) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *,
+                                     const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZaxpy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      cuDoubleComplex *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZaxpy_v2(
+    cublasHandle_t handle, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *,
+      int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZaxpy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, alpha, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n,
+                                           const float *x, int incx, float *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDcopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const double *x, 
-                                                      int incx, 
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n,
+                                           const double *x, int incx, double *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDcopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCcopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n,
+                                           const cuComplex *x, int incx,
+                                           cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCcopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZcopy_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n,
+                                           const cuDoubleComplex *x, int incx,
+                                           cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const cuDoubleComplex *, int,
+                                                 cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZcopy_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      float *x, 
-                                                      int incx, 
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n,
+                                           float *x, int incx, float *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *,
+                                                 int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      double *x, 
-                                                      int incx, 
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n,
+                                           double *x, int incx, double *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *,
+                                                 int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      cuComplex *x, 
-                                                      int incx, 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n,
+                                           cuComplex *x, int incx, cuComplex *y,
+                                           int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZswap_v2 (cublasHandle_t handle,
-                                                      int n, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n,
+                                           cuDoubleComplex *x, int incx,
+                                           cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZswap_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n,
+                                            const float *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const double *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n,
+                                            const double *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const cuComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamax_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n,
+                                            const float *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const double *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n,
+                                            const double *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const cuComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      int *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, int *);
+cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            int *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamin_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const float *x, 
-                                                     int incx, 
-                                                     float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n,
+                                           const float *x, int incx,
+                                           float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     const double *x, 
-                                                     int incx, 
-                                                     double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n,
+                                           const double *x, int incx,
+                                           double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int,
+                                                 const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuComplex *x, 
-                                                      int incx, 
-                                                      float *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n,
+                                            const cuComplex *x, int incx,
+                                            float *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, 
-                                                      int n, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx, 
-                                                      double *result) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n,
+                                            const cuDoubleComplex *x, int incx,
+                                            double *result) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDzasum_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, result);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     float *x, 
-                                                     int incx, 
-                                                     float *y, 
-                                                     int incy, 
-                                                     const float *c,  /* host or device pointer */
-                                                     const float *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, int, const float *, const float *);
+cublasStatus_t CUBLASWINAPI
+cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y,
+              int incy, const float *c, /* host or device pointer */
+              const float *s) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *,
+                                     int, const float *, const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     double *x, 
-                                                     int incx, 
-                                                     double *y, 
-                                                     int incy, 
-                                                     const double *c,  /* host or device pointer */
-                                                     const double *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *, int, double *, int, const double *, const double *);
+cublasStatus_t CUBLASWINAPI
+cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y,
+              int incy, const double *c, /* host or device pointer */
+              const double *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, double *, int, double *, int, const double *,
+      const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuComplex *x, 
-                                                     int incx, 
-                                                     cuComplex *y, 
-                                                     int incy, 
-                                                     const float *c,      /* host or device pointer */
-                                                     const cuComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, const cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCrot_v2(
+    cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y,
+    int incy, const float *c, /* host or device pointer */
+    const cuComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *,
+      const cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsrot_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuComplex *x, 
-                                                     int incx, 
-                                                     cuComplex *y, 
-                                                     int incy, 
-                                                     const float *c,  /* host or device pointer */
-                                                     const float *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, const float *);
+cublasStatus_t CUBLASWINAPI cublasCsrot_v2(
+    cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y,
+    int incy, const float *c, /* host or device pointer */
+    const float *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *,
+      const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZrot_v2 (cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuDoubleComplex *x, 
-                                                     int incx, 
-                                                     cuDoubleComplex *y, 
-                                                     int incy, 
-                                                     const double *c,            /* host or device pointer */
-                                                     const cuDoubleComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, const double *, const cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZrot_v2(
+    cublasHandle_t handle, int n, cuDoubleComplex *x, int incx,
+    cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */
+    const cuDoubleComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int,
+      const double *, const cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZdrot_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     cuDoubleComplex *x, 
-                                                     int incx, 
-                                                     cuDoubleComplex *y, 
-                                                     int incy, 
-                                                     const double *c,  /* host or device pointer */
-                                                     const double *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, const double *, const double *);
+cublasStatus_t CUBLASWINAPI cublasZdrot_v2(
+    cublasHandle_t handle, int n, cuDoubleComplex *x, int incx,
+    cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */
+    const double *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int,
+      const double *, const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdrot_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrotg_v2(cublasHandle_t handle, 
-                                                     float *a,   /* host or device pointer */
-                                                     float *b,   /* host or device pointer */
-                                                     float *c,   /* host or device pointer */
-                                                     float *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, float *, float *, float *, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */
+               float *b,                        /* host or device pointer */
+               float *c,                        /* host or device pointer */
+               float *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *,
+                                                 float *, float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrotg_v2(cublasHandle_t handle, 
-                                                     double *a,  /* host or device pointer */
-                                                     double *b,  /* host or device pointer */
-                                                     double *c,  /* host or device pointer */
-                                                     double *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, double *, double *, double *, double *);
+cublasStatus_t CUBLASWINAPI
+cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */
+               double *b,                        /* host or device pointer */
+               double *c,                        /* host or device pointer */
+               double *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *,
+                                                 double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCrotg_v2(cublasHandle_t handle, 
-                                                     cuComplex *a,  /* host or device pointer */
-                                                     cuComplex *b,  /* host or device pointer */
-                                                     float *c,      /* host or device pointer */
-                                                     cuComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */
+               cuComplex *b,                        /* host or device pointer */
+               float *c,                            /* host or device pointer */
+               cuComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZrotg_v2(cublasHandle_t handle, 
-                                                     cuDoubleComplex *a,  /* host or device pointer */
-                                                     cuDoubleComplex *b,  /* host or device pointer */
-                                                     double *c,           /* host or device pointer */
-                                                     cuDoubleComplex *s) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZrotg_v2(
+    cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */
+    cuDoubleComplex *b,                        /* host or device pointer */
+    double *c,                                 /* host or device pointer */
+    cuDoubleComplex *s) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *,
+      cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrotg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, a, b, c, s);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     float *x, 
-                                                     int incx, 
-                                                     float *y, 
-                                                     int incy, 
-                                                     const float* param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, int, const float *);
+cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n,
+                                           float *x, int incx, float *y,
+                                           int incy, const float *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, float *, int, float *, int, const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, param);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, 
-                                                     int n, 
-                                                     double *x, 
-                                                     int incx, 
-                                                     double *y, 
-                                                     int incy, 
-                                                     const double* param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *, int, double *, int, const double *);
+cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n,
+                                           double *x, int incx, double *y,
+                                           int incy, const double *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, double *, int, double *, int, const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, x, incx, y, incy, param);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSrotmg_v2(cublasHandle_t handle, 
-                                                      float *d1,        /* host or device pointer */
-                                                      float *d2,        /* host or device pointer */
-                                                      float *x1,        /* host or device pointer */
-                                                      const float *y1,  /* host or device pointer */
-                                                      float *param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, float *, float *, float *, const float *, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */
+                float *d2,                        /* host or device pointer */
+                float *x1,                        /* host or device pointer */
+                const float *y1,                  /* host or device pointer */
+                float *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, float *, float *, float *, const float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotmg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, d1, d2, x1, y1, param);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDrotmg_v2(cublasHandle_t handle, 
-                                                      double *d1,        /* host or device pointer */  
-                                                      double *d2,        /* host or device pointer */  
-                                                      double *x1,        /* host or device pointer */  
-                                                      const double *y1,  /* host or device pointer */  
-                                                      double *param) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, double *, double *, double *, const double *, double *);
+cublasStatus_t CUBLASWINAPI
+cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */
+                double *d2,                        /* host or device pointer */
+                double *x1,                        /* host or device pointer */
+                const double *y1,                  /* host or device pointer */
+                double *param) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, double *, double *, double *, const double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotmg_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, d1, d2, x1, y1, param);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemv_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t trans, 
-                                                      int m, 
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */
-                                                      const float *A, 
-                                                      int lda, 
-                                                      const float *x, 
-                                                      int incx, 
-                                                      const float *beta,  /* host or device pointer */
-                                                      float *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const float *, const float *,
+      int, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemv_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta, /* host or device pointer */
-                                                      double *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const double *,
+      const double *, int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x, 
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda, 
-                                                      const cuDoubleComplex *x, 
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgemv_v2(
+    cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgbmv_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A, 
-                                                      int lda, 
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               int kl, int ku, const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int, const float *,
+      const float *, int, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgbmv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const double *alpha, /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda, 
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta, /* host or device pointer */ 
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+               int kl, int ku, const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int, const double *,
+      const double *, int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgbmv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda, 
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgbmv_v2(
+    cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl,
+    int ku, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *x, int incx,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgbmv_v2 (cublasHandle_t handle,
-                                                      cublasOperation_t trans, 
-                                                      int m,
-                                                      int n,
-                                                      int kl,
-                                                      int ku, 
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                      const cuDoubleComplex *A,
-                                                      int lda, 
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgbmv_v2(
+    cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl,
+    int ku, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
+  return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y,
+                  incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const float *A, int lda, float *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *AP, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStpmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *AP, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *AP, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtpmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const double *AP, double *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *AP, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtpmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *AP, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *AP,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const float *AP, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStpsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const float *AP, float *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const double *AP, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtpsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const double *AP, double *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuComplex *AP, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtpsv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtpsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      const cuDoubleComplex *AP, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n,
+                                           const cuDoubleComplex *AP,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, AP, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const float *A, 
-                                                      int lda, 
-                                                      float *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const float *A, int lda, float *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const double *A, int lda, double *x,
+                                           int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuComplex *A, 
-                                                      int lda, 
-                                                      cuComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuComplex *A, int lda,
+                                           cuComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtbsv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      cublasDiagType_t diag, 
-                                                      int n, 
-                                                      int k, 
-                                                      const cuDoubleComplex *A, 
-                                                      int lda, 
-                                                      cuDoubleComplex *x, 
-                                                      int incx) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle,
+                                           cublasFillMode_t uplo,
+                                           cublasOperation_t trans,
+                                           cublasDiagType_t diag, int n, int k,
+                                           const cuDoubleComplex *A, int lda,
+                                           cuDoubleComplex *x, int incx) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t,
+      int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbsv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsymv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */ 
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *beta, /* host or device pointer */ 
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsymv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta, /* host or device pointer */ 
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsymv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsymv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */ 
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta,   /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsymv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsymv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChemv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhemv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */ 
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta,   /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZhemv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsbmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha,   /* host or device pointer */ 
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *x, 
-                                                      int incx,
-                                                      const float *beta,  /* host or device pointer */ 
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+               const float *alpha, /* host or device pointer */
+               const float *A, int lda, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *,
+      int, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsbmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha,   /* host or device pointer */ 
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *x, 
-                                                      int incx,
-                                                      const double *beta,   /* host or device pointer */ 
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+               const double *alpha, /* host or device pointer */
+               const double *A, int lda, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const double *,
+      const double *, int, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChbmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */ 
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *x, 
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */ 
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *A, int lda, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, const cuComplex *,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhbmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *x, 
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                      cuDoubleComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZhbmv_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhbmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSspmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const float *alpha,  /* host or device pointer */                                           
-                                                      const float *AP,
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *beta,   /* host or device pointer */  
-                                                      float *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const float *alpha, /* host or device pointer */
+               const float *AP, const float *x, int incx,
+               const float *beta, /* host or device pointer */
+               float *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *,
+      const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDspmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *AP,
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *beta,  /* host or device pointer */  
-                                                      double *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const double *alpha, /* host or device pointer */
+               const double *AP, const double *x, int incx,
+               const double *beta, /* host or device pointer */
+               double *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChpmv_v2 (cublasHandle_t handle, 
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *AP,
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *y,
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *AP, const cuComplex *x, int incx,
+               const cuComplex *beta, /* host or device pointer */
+               cuComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhpmv_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *AP,
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *y, 
-                                                      int incy) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx,
+               const cuDoubleComplex *beta, /* host or device pointer */
+               cuDoubleComplex *y, int incy) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpmv_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSger_v2 (cublasHandle_t handle,
-                                                     int m,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const float *x,
-                                                     int incx,
-                                                     const float *y,
-                                                     int incy,
-                                                     float *A,
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const float *, const float *, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSger_v2(
+    cublasHandle_t handle, int m, int n,
+    const float *alpha, /* host or device pointer */
+    const float *x, int incx, const float *y, int incy, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const float *, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSger_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDger_v2 (cublasHandle_t handle, 
-                                                     int m,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */   
-                                                     const double *x,
-                                                     int incx,
-                                                     const double *y,
-                                                     int incy,
-                                                     double *A,
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const double *, const double *, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDger_v2(
+    cublasHandle_t handle, int m, int n,
+    const double *alpha, /* host or device pointer */
+    const double *x, int incx, const double *y, int incy, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const double *, const double *, int,
+      const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDger_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgeru_v2 (cublasHandle_t handle, 
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *y,
-                                                      int incy,
-                                                      cuComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgeru_v2(cublasHandle_t handle, int m, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeru_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgerc_v2 (cublasHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *y,
-                                                      int incy,
-                                                      cuComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgerc_v2(cublasHandle_t handle, int m, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgerc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgeru_v2 (cublasHandle_t handle, 
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgeru_v2(cublasHandle_t handle, int m, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeru_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgerc_v2 (cublasHandle_t handle,
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgerc_v2(cublasHandle_t handle, int m, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgerc_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const float *x,
-                                                     int incx,
-                                                     float *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const float *x, int incx, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const double *x,
-                                                     int incx,
-                                                     double *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const double *x, int incx, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const cuComplex *alpha, /* host or device pointer */  
-                                                     const cuComplex *x,
-                                                     int incx,
-                                                     cuComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const cuComplex *alpha, /* host or device pointer */
+              const cuComplex *x, int incx, cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *x,
-                                                     int incx,
-                                                     cuDoubleComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const cuDoubleComplex *alpha, /* host or device pointer */
+              const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCher_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const cuComplex *x,
-                                                     int incx,
-                                                     cuComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const cuComplex *x, int incx, cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *,
+      int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZher_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *x,
-                                                     int incx,
-                                                     cuDoubleComplex *A, 
-                                                     int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSspr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const float *x,
-                                                     int incx,
-                                                     float *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const float *x, int incx, float *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDspr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const double *x,
-                                                     int incx,
-                                                     double *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI
+cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const double *x, int incx, double *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChpr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const float *alpha, /* host or device pointer */  
-                                                     const cuComplex *x,
-                                                     int incx,
-                                                     cuComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const float *alpha, /* host or device pointer */
+              const cuComplex *x, int incx, cuComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *,
+      int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhpr_v2 (cublasHandle_t handle,
-                                                     cublasFillMode_t uplo,
-                                                     int n,
-                                                     const double *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *x,
-                                                     int incx,
-                                                     cuDoubleComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+              const double *alpha, /* host or device pointer */
+              const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *y,
-                                                      int incy,
-                                                      float *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const float *alpha, /* host or device pointer */
+    const float *x, int incx, const float *y, int incy, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *x,
-                                                      int incx,
-                                                      const double *y,
-                                                      int incy,
-                                                      double *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const double *alpha, /* host or device pointer */
+    const double *x, int incx, const double *y, int incy, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, int n, 
-                                                      const cuComplex *alpha,  /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx, 
-                                                      const cuComplex *y,
-                                                      int incy, 
-                                                      cuComplex *A, 
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCher2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, int n, 
-                                                      const cuComplex *alpha,  /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx, 
-                                                      const cuComplex *y,
-                                                      int incy, 
-                                                      cuComplex *A, 
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuComplex *alpha, /* host or device pointer */
+               const cuComplex *x, int incx, const cuComplex *y, int incy,
+               cuComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZher2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n, 
-                                                      const cuDoubleComplex *alpha,  /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *A,
-                                                      int lda) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSspr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const float *alpha,  /* host or device pointer */  
-                                                      const float *x,
-                                                      int incx,
-                                                      const float *y,
-                                                      int incy,
-                                                      float *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI
+cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const float *alpha, /* host or device pointer */
+               const float *x, int incx, const float *y, int incy, float *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int,
+      const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDspr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const double *alpha,  /* host or device pointer */  
-                                                      const double *x,
-                                                      int incx, 
-                                                      const double *y,
-                                                      int incy,
-                                                      double *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, const double *, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDspr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const double *alpha, /* host or device pointer */
+    const double *x, int incx, const double *y, int incy, double *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, const double *,
+      int, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChpr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *x,
-                                                      int incx,
-                                                      const cuComplex *y,
-                                                      int incy,
-                                                      cuComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasChpr2_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, int n,
+    const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuComplex *,
+      const cuComplex *, int, const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhpr2_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *x,
-                                                      int incx,
-                                                      const cuDoubleComplex *y,
-                                                      int incy,
-                                                      cuDoubleComplex *AP) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI
+cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n,
+               const cuDoubleComplex *alpha, /* host or device pointer */
+               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
+               int incy, cuDoubleComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr2_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A, 
-                                                      int lda,
-                                                      const float *B,
-                                                      int ldb, 
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A, 
-                                                      int lda,
-                                                      const double *B,
-                                                      int ldb, 
-                                                      const double *beta, /* host or device pointer */  
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A, 
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb, 
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3m  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A, 
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb, 
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3m(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3m");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3mEx (cublasHandle_t handle, 
-                                                     cublasOperation_t transa, cublasOperation_t transb,  
-                                                     int m, int n, int k, 
-                                                     const cuComplex *alpha, 
-                                                     const void *A, 
-                                                     cudaDataType Atype, 
-                                                     int lda, 
-                                                     const void *B, 
-                                                     cudaDataType Btype, 
-                                                     int ldb,
-                                                     const cuComplex *beta, 
-                                                     void *C, 
-                                                     cudaDataType Ctype, 
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const void *, cudaDataType, int, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3mEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, const void *A,
+    cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb,
+    const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3mEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemm_v2 (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A, 
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb, 
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgemm_v2(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemm3m  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A, 
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb, 
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa,
+              cublasOperation_t transb, int m, int n, int k,
+              const cuDoubleComplex *alpha, /* host or device pointer */
+              const cuDoubleComplex *A, int lda, const cuDoubleComplex *B,
+              int ldb, const cuDoubleComplex *beta, /* host or device pointer */
+              cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemm3m");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemmEx  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype,
-                                                      int lda,
-                                                      const void *B,
-                                                      cudaDataType Btype,
-                                                      int ldb, 
-                                                      const float *beta, /* host or device pointer */  
-                                                      void *C,
-                                                      cudaDataType Ctype,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const void *, cudaDataType, int, const void *, cudaDataType, int, const float *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasSgemmEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda, const void *B,
+    cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, const float *, void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasGemmEx  (cublasHandle_t handle, 
-                                                      cublasOperation_t transa,
-                                                      cublasOperation_t transb, 
-                                                      int m,
-                                                      int n,
-                                                      int k,
-                                                      const void *alpha, /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype,
-                                                      int lda,
-                                                      const void *B,
-                                                      cudaDataType Btype,
-                                                      int ldb, 
-                                                      const void *beta, /* host or device pointer */  
-                                                      void *C,
-                                                      cudaDataType Ctype,
-                                                      int ldc,
-                                                      cudaDataType computeType,
-                                                      cublasGemmAlgo_t algo) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const void *, const void *, cudaDataType, int, const void *, cudaDataType, int, const void *, void *, cudaDataType, int, cudaDataType, cublasGemmAlgo_t);
+cublasStatus_t CUBLASWINAPI cublasGemmEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const void *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda, const void *B,
+    cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc, cudaDataType computeType,
+    cublasGemmAlgo_t algo) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const void *, const void *, cudaDataType, int, const void *, cudaDataType,
+      int, const void *, void *, cudaDataType, int, cudaDataType,
+      cublasGemmAlgo_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGemmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc, computeType, algo);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc, computeType, algo);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemmEx (cublasHandle_t handle, 
-                                                     cublasOperation_t transa, cublasOperation_t transb,  
-                                                     int m, int n, int k, 
-                                                     const cuComplex *alpha, 
-                                                     const void *A, 
-                                                     cudaDataType Atype, 
-                                                     int lda, 
-                                                     const void *B, 
-                                                     cudaDataType Btype, 
-                                                     int ldb,
-                                                     const cuComplex *beta, 
-                                                     void *C, 
-                                                     cudaDataType Ctype, 
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const void *, cudaDataType, int, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCgemmEx(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, const void *A,
+    cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb,
+    const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const void *,
+      cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, Btype, ldb, beta, C, Ctype, ldc);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B,
+                  Btype, ldb, beta, C, Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasUint8gemmBias (cublasHandle_t handle, 
-                                                           cublasOperation_t transa, cublasOperation_t transb, cublasOperation_t transc,  
-                                                           int m, int n, int k, 
-                                                           const unsigned char *A, int A_bias, int lda, 
-                                                           const unsigned char *B, int B_bias, int ldb,
-                                                                 unsigned char *C, int C_bias, int ldc,
-                                                           int C_mult, int C_shift) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, int, int, int, const unsigned char *, int, int, const unsigned char *, int, int, unsigned char *, int, int, int, int);
+cublasStatus_t CUBLASWINAPI cublasUint8gemmBias(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    cublasOperation_t transc, int m, int n, int k, const unsigned char *A,
+    int A_bias, int lda, const unsigned char *B, int B_bias, int ldb,
+    unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t,
+      int, int, int, const unsigned char *, int, int, const unsigned char *,
+      int, int, unsigned char *, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasUint8gemmBias");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, B_bias, ldb, C, C_bias, ldc, C_mult, C_shift);
+  return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B,
+                  B_bias, ldb, C, C_bias, ldc, C_mult, C_shift);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha,           /* host or device pointer */
+    const float *A, int lda, const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha,  /* host or device pointer */  
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *beta,  /* host or device pointer */  
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha,            /* host or device pointer */
+    const double *A, int lda, const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyrk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsyrk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyrk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrkEx ( cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype, 
-                                                      int lda,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      void *C, 
-                                                      cudaDataType Ctype, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrkEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda,
+    const cuComplex *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const cuComplex *,
+      void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrkEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx(cublasHandle_t handle,
-                                                      cublasFillMode_t uplo, 
-                                                      cublasOperation_t trans, 
-                                                      int n, 
-                                                      int k,
-                                                      const cuComplex *alpha, 
-                                                      const void *A, 
-                                                      cudaDataType Atype, 
-                                                      int lda,
-                                                      const cuComplex *beta, 
-                                                      void *C, 
-                                                      cudaDataType Ctype, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const void *, cudaDataType, int, const cuComplex *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype,
+    int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const void *, cudaDataType, int, const cuComplex *,
+      void *, cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrk3mEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha,  /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const float *beta,   /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const cuComplex *, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCherk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha,               /* host or device pointer */
+    const cuComplex *A, int lda, const float *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const cuComplex *, int, const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZherk_v2 (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const double *alpha,  /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const double *beta,  /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZherk_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    const double *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const cuDoubleComplex *, int, const double *,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZherk_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherkEx  (cublasHandle_t handle,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      int n,
-                                                      int k,
-                                                      const float *alpha,  /* host or device pointer */  
-                                                      const void *A, 
-                                                      cudaDataType Atype,
-                                                      int lda,
-                                                      const float *beta,   /* host or device pointer */  
-                                                      void *C,
-                                                      cudaDataType Ctype,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const void *, cudaDataType, int, const float *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCherkEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, /* host or device pointer */
+    const void *A, cudaDataType Atype, int lda,
+    const float *beta, /* host or device pointer */
+    void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const void *, cudaDataType, int, const float *, void *,
+      cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherkEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherk3mEx (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo, 
-                                                       cublasOperation_t trans, 
-                                                       int n, 
-                                                       int k,
-                                                       const float *alpha, 
-                                                       const void *A, cudaDataType Atype, 
-                                                       int lda,
-                                                       const float *beta, 
-                                                       void *C, 
-                                                       cudaDataType Ctype, 
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const void *, cudaDataType, int, const float *, void *, cudaDataType, int);
+cublasStatus_t CUBLASWINAPI cublasCherk3mEx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, const void *A, cudaDataType Atype,
+    int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const void *, cudaDataType, int, const float *, void *,
+      cudaDataType, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherk3mEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, Ctype, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C,
+                  Ctype, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const float *alpha, /* host or device pointer */  
-                                                       const float *A,
-                                                       int lda,
-                                                       const float *B,
-                                                       int ldb,
-                                                       const float *beta, /* host or device pointer */  
-                                                       float *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const double *alpha, /* host or device pointer */  
-                                                       const double *A,
-                                                       int lda,
-                                                       const double *B,
-                                                       int ldb,
-                                                       const double *beta, /* host or device pointer */  
-                                                       double *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const cuComplex *alpha, /* host or device pointer */  
-                                                       const cuComplex *A,
-                                                       int lda,
-                                                       const cuComplex *B,
-                                                       int ldb,
-                                                       const cuComplex *beta, /* host or device pointer */  
-                                                       cuComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const cuDoubleComplex *alpha,  /* host or device pointer */  
-                                                       const cuDoubleComplex *A,
-                                                       int lda,
-                                                       const cuDoubleComplex *B,
-                                                       int ldb,
-                                                       const cuDoubleComplex *beta,  /* host or device pointer */  
-                                                       cuDoubleComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCher2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans,
-                                                       int n,
-                                                       int k,
-                                                       const cuComplex *alpha, /* host or device pointer */  
-                                                       const cuComplex *A,
-                                                       int lda,
-                                                       const cuComplex *B,
-                                                       int ldb,
-                                                       const float *beta,   /* host or device pointer */  
-                                                       cuComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCher2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const float *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZher2k_v2 (cublasHandle_t handle,
-                                                       cublasFillMode_t uplo,
-                                                       cublasOperation_t trans, 
-                                                       int n,
-                                                       int k,
-                                                       const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                       const cuDoubleComplex *A, 
-                                                       int lda,
-                                                       const cuDoubleComplex *B,
-                                                       int ldb,
-                                                       const double *beta, /* host or device pointer */  
-                                                       cuDoubleComplex *C,
-                                                       int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZher2k_v2(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const double *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2k_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const float *alpha, /* host or device pointer */ 
-                                                    const float *A,
-                                                    int lda,
-                                                    const float *B,
-                                                    int ldb,
-                                                    const float *beta, /* host or device pointer */ 
-                                                    float *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const double *alpha, /* host or device pointer */ 
-                                                    const double *A,
-                                                    int lda,
-                                                    const double *B,
-                                                    int ldb,
-                                                    const double *beta, /* host or device pointer */ 
-                                                    double *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuComplex *alpha, /* host or device pointer */ 
-                                                    const cuComplex *A,
-                                                    int lda,
-                                                    const cuComplex *B,
-                                                    int ldb,
-                                                    const cuComplex *beta, /* host or device pointer */ 
-                                                    cuComplex *C, 
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsyrkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo, 
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                    const cuDoubleComplex *A,
-                                                    int lda,
-                                                    const cuDoubleComplex *B,
-                                                    int ldb,
-                                                    const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                    cuDoubleComplex *C, 
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsyrkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyrkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCherkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuComplex *alpha, /* host or device pointer */ 
-                                                    const cuComplex *A,
-                                                    int lda,
-                                                    const cuComplex *B,
-                                                    int ldb,
-                                                    const float *beta, /* host or device pointer */ 
-                                                    cuComplex *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const float *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCherkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const float *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const float *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZherkx (cublasHandle_t handle,
-                                                    cublasFillMode_t uplo,
-                                                    cublasOperation_t trans,
-                                                    int n,
-                                                    int k,
-                                                    const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                    const cuDoubleComplex *A,
-                                                    int lda,
-                                                    const cuDoubleComplex *B,
-                                                    int ldb,
-                                                    const double *beta, /* host or device pointer */ 
-                                                    cuDoubleComplex *C,
-                                                    int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZherkx(
+    cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans,
+    int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const double *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZherkx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda,
-                                                      const float *B,
-                                                      int ldb,
-                                                      const float *beta, /* host or device pointer */  
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const float *, const float *, int, const float *, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const float *, const float *, int, const float *, int, const float *,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m, 
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *B,
-                                                      int ldb,
-                                                      const double *beta, /* host or device pointer */  
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const double *, const double *, int, const double *, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const double *, const double *, int, const double *, int, const double *,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZsymm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZsymm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsymm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasChemm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuComplex *alpha, /* host or device pointer */  
-                                                      const cuComplex *A,
-                                                      int lda,
-                                                      const cuComplex *B,
-                                                      int ldb,
-                                                      const cuComplex *beta, /* host or device pointer */  
-                                                      cuComplex *C, 
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasChemm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *, int,
+      const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZhemm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      int m,
-                                                      int n,
-                                                      const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                      const cuDoubleComplex *A,
-                                                      int lda,
-                                                      const cuDoubleComplex *B,
-                                                      int ldb,
-                                                      const cuDoubleComplex *beta, /* host or device pointer */  
-                                                      cuDoubleComplex *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZhemm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m,
+    int n, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
+  return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrsm_v2 (cublasHandle_t handle, 
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda,
-                                                      float *B,
-                                                      int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const float *alpha, /* host or device pointer */
+    const float *A, int lda, float *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const float *, const float *, int, float *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrsm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A, 
-                                                      int lda, 
-                                                      double *B,
-                                                      int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const double *alpha, /* host or device pointer */
+    const double *A, int lda, double *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const double *, const double *, int, double *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(cublasHandle_t handle,
-                                                     cublasSideMode_t side,
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuComplex *alpha, /* host or device pointer */  
-                                                     const cuComplex *A,
-                                                     int lda,
-                                                     cuComplex *B,
-                                                     int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, cuComplex *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int,
+      cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(cublasHandle_t handle, 
-                                                     cublasSideMode_t side,
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *A,                                        
-                                                     int lda,
-                                                     cuDoubleComplex *B,
-                                                     int ldb) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrsm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrmm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const float *alpha, /* host or device pointer */  
-                                                      const float *A,
-                                                      int lda, 
-                                                      const float *B,
-                                                      int ldb,
-                                                      float *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, const float *, int, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const float *alpha, /* host or device pointer */
+    const float *A, int lda, const float *B, int ldb, float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const float *, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrmm_v2 (cublasHandle_t handle,
-                                                      cublasSideMode_t side,
-                                                      cublasFillMode_t uplo,
-                                                      cublasOperation_t trans,
-                                                      cublasDiagType_t diag,
-                                                      int m,
-                                                      int n,
-                                                      const double *alpha, /* host or device pointer */  
-                                                      const double *A,
-                                                      int lda,
-                                                      const double *B,
-                                                      int ldb,
-                                                      double *C,
-                                                      int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, const double *, int, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const double *alpha, /* host or device pointer */
+    const double *A, int lda, const double *B, int ldb, double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const double *, const double *, int,
+      const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(cublasHandle_t handle,
-                                                     cublasSideMode_t side,
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuComplex *alpha, /* host or device pointer */  
-                                                     const cuComplex *A,
-                                                     int lda,
-                                                     const cuComplex *B,
-                                                     int ldb,
-                                                     cuComplex *C,
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C,
+    int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(cublasHandle_t handle, cublasSideMode_t side, 
-                                                     cublasFillMode_t uplo,
-                                                     cublasOperation_t trans,
-                                                     cublasDiagType_t diag,
-                                                     int m,
-                                                     int n,
-                                                     const cuDoubleComplex *alpha, /* host or device pointer */  
-                                                     const cuDoubleComplex *A,
-                                                     int lda,
-                                                     const cuDoubleComplex *B,
-                                                     int ldb,
-                                                     cuDoubleComplex *C,
-                                                     int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtrmm_v2(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
+    cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmm_v2");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, C, ldc);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemmBatched (cublasHandle_t handle,
-                                                          cublasOperation_t transa,
-                                                          cublasOperation_t transb, 
-                                                          int m,
-                                                          int n,
-                                                          int k,
-                                                          const float *alpha,  /* host or device pointer */  
-                                                          const float *Aarray[], 
-                                                          int lda,
-                                                          const float *Barray[],
-                                                          int ldb, 
-                                                          const float *beta,   /* host or device pointer */  
-                                                          float *Carray[],
-                                                          int ldc,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const float *[], int, const float *[], int, const float *, float *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasSgemmBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha, /* host or device pointer */
+    const float *Aarray[], int lda, const float *Barray[], int ldb,
+    const float *beta, /* host or device pointer */
+    float *Carray[], int ldc, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const float *[], int, const float *[], int, const float *,
+      float *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemmBatched (cublasHandle_t handle,
-                                                          cublasOperation_t transa,
-                                                          cublasOperation_t transb, 
-                                                          int m,
-                                                          int n,
-                                                          int k,
-                                                          const double *alpha,  /* host or device pointer */ 
-                                                          const double *Aarray[], 
-                                                          int lda,
-                                                          const double *Barray[],
-                                                          int ldb, 
-                                                          const double *beta,  /* host or device pointer */ 
-                                                          double *Carray[],
-                                                          int ldc,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const double *, const double *[], int, const double *[], int, const double *, double *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasDgemmBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const double *alpha, /* host or device pointer */
+    const double *Aarray[], int lda, const double *Barray[], int ldb,
+    const double *beta, /* host or device pointer */
+    double *Carray[], int ldc, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const double *, const double *[], int, const double *[], int,
+      const double *, double *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemmBatched (cublasHandle_t handle,
-                                                          cublasOperation_t transa,
-                                                          cublasOperation_t transb, 
-                                                          int m,
-                                                          int n,
-                                                          int k,
-                                                          const cuComplex *alpha, /* host or device pointer */ 
-                                                          const cuComplex *Aarray[], 
-                                                          int lda,
-                                                          const cuComplex *Barray[],
-                                                          int ldb, 
-                                                          const cuComplex *beta, /* host or device pointer */ 
-                                                          cuComplex *Carray[],
-                                                          int ldc,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *[], int, const cuComplex *[], int, const cuComplex *, cuComplex *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasCgemmBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *Aarray[], int lda, const cuComplex *Barray[], int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *Carray[], int ldc, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *[], int, const cuComplex *[], int,
+      const cuComplex *, cuComplex *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched (cublasHandle_t handle,
-                                                          cublasOperation_t transa,
-                                                          cublasOperation_t transb, 
-                                                          int m,
-                                                          int n,
-                                                          int k,
-                                                          const cuComplex *alpha, /* host or device pointer */ 
-                                                          const cuComplex *Aarray[], 
-                                                          int lda,
-                                                          const cuComplex *Barray[],
-                                                          int ldb, 
-                                                          const cuComplex *beta, /* host or device pointer */ 
-                                                          cuComplex *Carray[],
-                                                          int ldc,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *[], int, const cuComplex *[], int, const cuComplex *, cuComplex *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *Aarray[], int lda, const cuComplex *Barray[], int ldb,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *Carray[], int ldc, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *[], int, const cuComplex *[], int,
+      const cuComplex *, cuComplex *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3mBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemmBatched (cublasHandle_t handle,
-                                                          cublasOperation_t transa,
-                                                          cublasOperation_t transb, 
-                                                          int m,
-                                                          int n,
-                                                          int k,
-                                                          const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                          const cuDoubleComplex *Aarray[], 
-                                                          int lda,
-                                                          const cuDoubleComplex *Barray[],
-                                                          int ldb, 
-                                                          const cuDoubleComplex *beta, /* host or device pointer */ 
-                                                          cuDoubleComplex *Carray[],
-                                                          int ldc,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *[], int, const cuDoubleComplex *[], int, const cuDoubleComplex *, cuDoubleComplex *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasZgemmBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *Aarray[], int lda, const cuDoubleComplex *Barray[],
+    int ldb, const cuDoubleComplex *beta, /* host or device pointer */
+    cuDoubleComplex *Carray[], int ldc, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *[], int,
+      const cuDoubleComplex *[], int, const cuDoubleComplex *,
+      cuDoubleComplex *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, ldb, beta, Carray, ldc, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray,
+                  ldb, beta, Carray, ldc, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const float *alpha,  /* host or device pointer */
-                                                                 const float *A,
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const float *B,
-                                                                 int ldb,
-                                                                 long long int strideB,
-                                                                 const float *beta,   /* host or device pointer */
-                                                                 float *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const float *, const float *, int, long long, const float *, int, long long, const float *, float *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const float *alpha,        /* host or device pointer */
+    const float *A, int lda, long long int strideA, /* purposely signed */
+    const float *B, int ldb, long long int strideB,
+    const float *beta, /* host or device pointer */
+    float *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const float *, const float *, int, long long, const float *, int,
+      long long, const float *, float *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const double *alpha,  /* host or device pointer */
-                                                                 const double *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const double *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const double *beta,   /* host or device pointer */
-                                                                 double *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const double *, const double *, int, long long, const double *, int, long long, const double *, double *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const double *alpha, /* host or device pointer */
+    const double *A, int lda, long long int strideA, /* purposely signed */
+    const double *B, int ldb, long long int strideB,
+    const double *beta, /* host or device pointer */
+    double *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const double *, const double *, int, long long, const double *, int,
+      long long, const double *, double *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const cuComplex *alpha,  /* host or device pointer */
-                                                                 const cuComplex *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const cuComplex *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const cuComplex *beta,   /* host or device pointer */
-                                                                 cuComplex *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, long long, const cuComplex *, int, long long, const cuComplex *, cuComplex *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, long long int strideA, /* purposely signed */
+    const cuComplex *B, int ldb, long long int strideB,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, long long, const cuComplex *,
+      int, long long, const cuComplex *, cuComplex *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const cuComplex *alpha,  /* host or device pointer */
-                                                                 const cuComplex *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const cuComplex *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const cuComplex *beta,   /* host or device pointer */
-                                                                 cuComplex *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuComplex *, const cuComplex *, int, long long, const cuComplex *, int, long long, const cuComplex *, cuComplex *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda, long long int strideA, /* purposely signed */
+    const cuComplex *B, int ldb, long long int strideB,
+    const cuComplex *beta, /* host or device pointer */
+    cuComplex *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuComplex *, const cuComplex *, int, long long, const cuComplex *,
+      int, long long, const cuComplex *, cuComplex *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm3mStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched (cublasHandle_t handle,
-                                                                 cublasOperation_t transa,
-                                                                 cublasOperation_t transb, 
-                                                                 int m,
-                                                                 int n,
-                                                                 int k,
-                                                                 const cuDoubleComplex *alpha,  /* host or device pointer */
-                                                                 const cuDoubleComplex *A, 
-                                                                 int lda,
-                                                                 long long int strideA,   /* purposely signed */
-                                                                 const cuDoubleComplex *B,
-                                                                 int ldb, 
-                                                                 long long int strideB,
-                                                                 const cuDoubleComplex *beta,   /* host or device poi */
-                                                                 cuDoubleComplex *C,
-                                                                 int ldc,
-                                                                 long long int strideC,
-                                                                 int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, long long, const cuDoubleComplex *, cuDoubleComplex *, int, long long, int);
+cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, int k,
+    const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    long long int strideA, /* purposely signed */
+    const cuDoubleComplex *B, int ldb, long long int strideB,
+    const cuDoubleComplex *beta, /* host or device poi */
+    cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int, long long,
+      const cuDoubleComplex *, int, long long, const cuDoubleComplex *,
+      cuDoubleComplex *, int, long long, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemmStridedBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb, strideB, beta, C, ldc, strideC, batchCount);
+  return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B,
+                  ldb, strideB, beta, C, ldc, strideC, batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const float *alpha, /* host or device pointer */ 
-                                                  const float *A, 
-                                                  int lda,
-                                                  const float *beta , /* host or device pointer */ 
-                                                  const float *B, 
-                                                  int ldb,
-                                                  float *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const float *, const float *, int, const float *, const float *, int, float *, int);
+cublasStatus_t CUBLASWINAPI cublasSgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const float *alpha,           /* host or device pointer */
+    const float *A, int lda, const float *beta, /* host or device pointer */
+    const float *B, int ldb, float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const float *, const float *, int, const float *, const float *, int,
+      float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const double *alpha, /* host or device pointer */ 
-                                                  const double *A, 
-                                                  int lda,
-                                                  const double *beta, /* host or device pointer */ 
-                                                  const double *B, 
-                                                  int ldb,
-                                                  double *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const double *, const double *, int, const double *, const double *, int, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const double *alpha,            /* host or device pointer */
+    const double *A, int lda, const double *beta, /* host or device pointer */
+    const double *B, int ldb, double *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const double *, const double *, int, const double *, const double *, int,
+      double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const cuComplex *alpha, /* host or device pointer */ 
-                                                  const cuComplex *A, 
-                                                  int lda,
-                                                  const cuComplex *beta, /* host or device pointer */  
-                                                  const cuComplex *B, 
-                                                  int ldb,
-                                                  cuComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const cuComplex *, const cuComplex *, int, const cuComplex *, const cuComplex *, int, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const cuComplex *alpha, /* host or device pointer */
+    const cuComplex *A, int lda,
+    const cuComplex *beta, /* host or device pointer */
+    const cuComplex *B, int ldb, cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const cuComplex *, const cuComplex *, int, const cuComplex *,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgeam(cublasHandle_t handle,
-                                                  cublasOperation_t transa, 
-                                                  cublasOperation_t transb,
-                                                  int m, 
-                                                  int n,
-                                                  const cuDoubleComplex *alpha, /* host or device pointer */ 
-                                                  const cuDoubleComplex *A, 
-                                                  int lda,
-                                                  const cuDoubleComplex *beta, /* host or device pointer */  
-                                                  const cuDoubleComplex *B, 
-                                                  int ldb,
-                                                  cuDoubleComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *, int, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZgeam(
+    cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb,
+    int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */
+    const cuDoubleComplex *A, int lda,
+    const cuDoubleComplex *beta, /* host or device pointer */
+    const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *,
+      int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeam");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+  return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C,
+                  ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle,
-                                                  int n, 
-                                                  float *A[],                      /*Device pointer*/
-                                                  int lda, 
-                                                  int *P,                          /*Device Pointer*/
-                                                  int *info,                       /*Device Pointer*/
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, float *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI cublasSgetrfBatched(cublasHandle_t handle, int n,
+                                                float *A[], /*Device pointer*/
+                                                int lda,
+                                                int *P,    /*Device Pointer*/
+                                                int *info, /*Device Pointer*/
+                                                int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *[],
+                                                 int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgetrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle,
-                                                  int n, 
-                                                  double *A[],                     /*Device pointer*/
-                                                  int lda, 
-                                                  int *P,                          /*Device Pointer*/
-                                                  int *info,                       /*Device Pointer*/
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, double *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI cublasDgetrfBatched(cublasHandle_t handle, int n,
+                                                double *A[], /*Device pointer*/
+                                                int lda,
+                                                int *P,    /*Device Pointer*/
+                                                int *info, /*Device Pointer*/
+                                                int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, double *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgetrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(cublasHandle_t handle,
-                                                  int n, 
-                                                  cuComplex *A[],                 /*Device pointer*/
-                                                  int lda, 
-                                                  int *P,                         /*Device Pointer*/
-                                                  int *info,                      /*Device Pointer*/
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuComplex *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI cublasCgetrfBatched(
+    cublasHandle_t handle, int n, cuComplex *A[], /*Device pointer*/
+    int lda, int *P,                              /*Device Pointer*/
+    int *info,                                    /*Device Pointer*/
+    int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuComplex *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgetrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(cublasHandle_t handle,
-                                                  int n, 
-                                                  cuDoubleComplex *A[],           /*Device pointer*/
-                                                  int lda, 
-                                                  int *P,                         /*Device Pointer*/
-                                                  int *info,                      /*Device Pointer*/
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, cuDoubleComplex *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI cublasZgetrfBatched(
+    cublasHandle_t handle, int n, cuDoubleComplex *A[], /*Device pointer*/
+    int lda, int *P,                                    /*Device Pointer*/
+    int *info,                                          /*Device Pointer*/
+    int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, cuDoubleComplex *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgetrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgetriBatched(cublasHandle_t handle,
-                                                  int n,
-                                                  const float *A[],               /*Device pointer*/
-                                                  int lda,
-                                                  const int *P,                   /*Device pointer*/
-                                                  float *C[],                     /*Device pointer*/
-                                                  int ldc,
-                                                  int *info,
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *[], int, const int *, float *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasSgetriBatched(
+    cublasHandle_t handle, int n, const float *A[], /*Device pointer*/
+    int lda, const int *P,                          /*Device pointer*/
+    float *C[],                                     /*Device pointer*/
+    int ldc, int *info, int batchSize) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const float *[], int,
+                                     const int *, float *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgetriBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgetriBatched(cublasHandle_t handle,
-                                                  int n,
-                                                  const double *A[],              /*Device pointer*/
-                                                  int lda,
-                                                  const int *P,                   /*Device pointer*/
-                                                  double *C[],                    /*Device pointer*/
-                                                  int ldc,
-                                                  int *info,
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *[], int, const int *, double *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasDgetriBatched(
+    cublasHandle_t handle, int n, const double *A[], /*Device pointer*/
+    int lda, const int *P,                           /*Device pointer*/
+    double *C[],                                     /*Device pointer*/
+    int ldc, int *info, int batchSize) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const double *[], int,
+                                     const int *, double *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgetriBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCgetriBatched(cublasHandle_t handle,
-                                                  int n,
-                                                  const cuComplex *A[],            /*Device pointer*/
-                                                  int lda,
-                                                  const int *P,                   /*Device pointer*/
-                                                  cuComplex *C[],                 /*Device pointer*/
-                                                  int ldc,
-                                                  int *info,
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *[], int, const int *, cuComplex *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasCgetriBatched(
+    cublasHandle_t handle, int n, const cuComplex *A[], /*Device pointer*/
+    int lda, const int *P,                              /*Device pointer*/
+    cuComplex *C[],                                     /*Device pointer*/
+    int ldc, int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuComplex *[], int, const int *, cuComplex *[],
+      int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgetriBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZgetriBatched(cublasHandle_t handle,
-                                                  int n,
-                                                  const cuDoubleComplex *A[],     /*Device pointer*/
-                                                  int lda,
-                                                  const int *P,                   /*Device pointer*/
-                                                  cuDoubleComplex *C[],           /*Device pointer*/
-                                                  int ldc,
-                                                  int *info,
-                                                  int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *[], int, const int *, cuDoubleComplex *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasZgetriBatched(
+    cublasHandle_t handle, int n, const cuDoubleComplex *A[], /*Device pointer*/
+    int lda, const int *P,                                    /*Device pointer*/
+    cuDoubleComplex *C[],                                     /*Device pointer*/
+    int ldc, int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *[], int, const int *,
+      cuDoubleComplex *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgetriBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasSgetrsBatched( cublasHandle_t handle, 
-                                                            cublasOperation_t trans, 
-                                                            int n, 
-                                                            int nrhs, 
-                                                            const float *Aarray[], 
-                                                            int lda, 
-                                                            const int *devIpiv, 
-                                                            float *Barray[], 
-                                                            int ldb, 
-                                                            int *info,
-                                                            int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const float *[], int, const int *, float *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasSgetrsBatched(cublasHandle_t handle,
+                                                cublasOperation_t trans, int n,
+                                                int nrhs, const float *Aarray[],
+                                                int lda, const int *devIpiv,
+                                                float *Barray[], int ldb,
+                                                int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const float *[], int,
+      const int *, float *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( cublasHandle_t handle, 
-                                                           cublasOperation_t trans, 
-                                                           int n, 
-                                                           int nrhs, 
-                                                           const double *Aarray[], 
-                                                           int lda, 
-                                                           const int *devIpiv, 
-                                                           double *Barray[], 
-                                                           int ldb, 
-                                                           int *info,
-                                                           int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const double *[], int, const int *, double *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasDgetrsBatched(
+    cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs,
+    const double *Aarray[], int lda, const int *devIpiv, double *Barray[],
+    int ldb, int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const double *[], int,
+      const int *, double *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasCgetrsBatched( cublasHandle_t handle, 
-                                                            cublasOperation_t trans, 
-                                                            int n, 
-                                                            int nrhs, 
-                                                            const cuComplex *Aarray[], 
-                                                            int lda, 
-                                                            const int *devIpiv, 
-                                                            cuComplex *Barray[], 
-                                                            int ldb, 
-                                                            int *info,
-                                                            int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const cuComplex *[], int, const int *, cuComplex *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasCgetrsBatched(
+    cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs,
+    const cuComplex *Aarray[], int lda, const int *devIpiv, cuComplex *Barray[],
+    int ldb, int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const cuComplex *[], int,
+      const int *, cuComplex *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
-cublasStatus_t  CUBLASWINAPI cublasZgetrsBatched( cublasHandle_t handle, 
-                                                            cublasOperation_t trans, 
-                                                            int n, 
-                                                            int nrhs, 
-                                                            const cuDoubleComplex *Aarray[], 
-                                                            int lda, 
-                                                            const int *devIpiv, 
-                                                            cuDoubleComplex *Barray[], 
-                                                            int ldb, 
-                                                            int *info,
-                                                            int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *[], int, const int *, cuDoubleComplex *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasZgetrsBatched(
+    cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs,
+    const cuDoubleComplex *Aarray[], int lda, const int *devIpiv,
+    cuDoubleComplex *Barray[], int ldb, int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *[],
+      int, const int *, cuDoubleComplex *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgetrsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, info, batchSize);
+  return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb,
+                  info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrsmBatched( cublasHandle_t    handle, 
-                                                          cublasSideMode_t  side, 
-                                                          cublasFillMode_t  uplo,
-                                                          cublasOperation_t trans, 
-                                                          cublasDiagType_t  diag,
-                                                          int m, 
-                                                          int n, 
-                                                          const float *alpha,           /*Host or Device Pointer*/
-                                                          const float *A[], 
-                                                          int lda,
-                                                          float *B[], 
-                                                          int ldb,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const float *, const float *[], int, float *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasStrsmBatched(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const float *alpha, /*Host or Device Pointer*/
+    const float *A[], int lda, float *B[], int ldb, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const float *, const float *[], int,
+      float *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( cublasHandle_t    handle, 
-                                                          cublasSideMode_t  side, 
-                                                          cublasFillMode_t  uplo,
-                                                          cublasOperation_t trans, 
-                                                          cublasDiagType_t  diag,
-                                                          int m, 
-                                                          int n, 
-                                                          const double *alpha,          /*Host or Device Pointer*/
-                                                          const double *A[], 
-                                                          int lda,
-                                                          double *B[], 
-                                                          int ldb,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const double *, const double *[], int, double *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasDtrsmBatched(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const double *alpha, /*Host or Device Pointer*/
+    const double *A[], int lda, double *B[], int ldb, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const double *, const double *[], int,
+      double *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( cublasHandle_t    handle, 
-                                                          cublasSideMode_t  side, 
-                                                          cublasFillMode_t  uplo,
-                                                          cublasOperation_t trans, 
-                                                          cublasDiagType_t  diag,
-                                                          int m, 
-                                                          int n, 
-                                                          const cuComplex *alpha,       /*Host or Device Pointer*/
-                                                          const cuComplex *A[], 
-                                                          int lda,
-                                                          cuComplex *B[], 
-                                                          int ldb,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuComplex *, const cuComplex *[], int, cuComplex *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasCtrsmBatched(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuComplex *alpha, /*Host or Device Pointer*/
+    const cuComplex *A[], int lda, cuComplex *B[], int ldb, int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuComplex *, const cuComplex *[], int,
+      cuComplex *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( cublasHandle_t    handle, 
-                                                          cublasSideMode_t  side, 
-                                                          cublasFillMode_t  uplo,
-                                                          cublasOperation_t trans, 
-                                                          cublasDiagType_t  diag,
-                                                          int m, 
-                                                          int n, 
-                                                          const cuDoubleComplex *alpha, /*Host or Device Pointer*/
-                                                          const cuDoubleComplex *A[], 
-                                                          int lda,
-                                                          cuDoubleComplex *B[], 
-                                                          int ldb,
-                                                          int batchCount) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, int, int, const cuDoubleComplex *, const cuDoubleComplex *[], int, cuDoubleComplex *[], int, int);
+cublasStatus_t CUBLASWINAPI cublasZtrsmBatched(
+    cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo,
+    cublasOperation_t trans, cublasDiagType_t diag, int m, int n,
+    const cuDoubleComplex *alpha, /*Host or Device Pointer*/
+    const cuDoubleComplex *A[], int lda, cuDoubleComplex *B[], int ldb,
+    int batchCount) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t,
+      cublasDiagType_t, int, int, const cuDoubleComplex *,
+      const cuDoubleComplex *[], int, cuDoubleComplex *[], int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsmBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, batchCount);
+  return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb,
+                  batchCount);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSmatinvBatched(cublasHandle_t handle,
-                                                          int n, 
-                                                          const float *A[],                  /*Device pointer*/
-                                                          int lda, 
-                                                          float *Ainv[],               /*Device pointer*/
-                                                          int lda_inv, 
-                                                          int *info,                   /*Device Pointer*/
-                                                          int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const float *[], int, float *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasSmatinvBatched(
+    cublasHandle_t handle, int n, const float *A[], /*Device pointer*/
+    int lda, float *Ainv[],                         /*Device pointer*/
+    int lda_inv, int *info,                         /*Device Pointer*/
+    int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const float *[], int, float *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSmatinvBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDmatinvBatched(cublasHandle_t handle,
-                                                          int n, 
-                                                          const double *A[],                 /*Device pointer*/
-                                                          int lda, 
-                                                          double *Ainv[],              /*Device pointer*/
-                                                          int lda_inv, 
-                                                          int *info,                   /*Device Pointer*/
-                                                          int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const double *[], int, double *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasDmatinvBatched(
+    cublasHandle_t handle, int n, const double *A[], /*Device pointer*/
+    int lda, double *Ainv[],                         /*Device pointer*/
+    int lda_inv, int *info,                          /*Device Pointer*/
+    int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const double *[], int, double *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDmatinvBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCmatinvBatched(cublasHandle_t handle,
-                                                          int n, 
-                                                          const cuComplex *A[],              /*Device pointer*/
-                                                          int lda, 
-                                                          cuComplex *Ainv[],           /*Device pointer*/
-                                                          int lda_inv, 
-                                                          int *info,                   /*Device Pointer*/
-                                                          int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *[], int, cuComplex *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasCmatinvBatched(
+    cublasHandle_t handle, int n, const cuComplex *A[], /*Device pointer*/
+    int lda, cuComplex *Ainv[],                         /*Device pointer*/
+    int lda_inv, int *info,                             /*Device Pointer*/
+    int batchSize) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *[],
+                                     int, cuComplex *[], int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCmatinvBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZmatinvBatched(cublasHandle_t handle,
-                                                          int n, 
-                                                          const cuDoubleComplex *A[],        /*Device pointer*/
-                                                          int lda, 
-                                                          cuDoubleComplex *Ainv[],     /*Device pointer*/
-                                                          int lda_inv, 
-                                                          int *info,                   /*Device Pointer*/
-                                                          int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, const cuDoubleComplex *[], int, cuDoubleComplex *[], int, int *, int);
+cublasStatus_t CUBLASWINAPI cublasZmatinvBatched(
+    cublasHandle_t handle, int n, const cuDoubleComplex *A[], /*Device pointer*/
+    int lda, cuDoubleComplex *Ainv[],                         /*Device pointer*/
+    int lda_inv, int *info,                                   /*Device Pointer*/
+    int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, const cuDoubleComplex *[], int, cuDoubleComplex *[],
+      int, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZmatinvBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched( cublasHandle_t handle, 
-                                                           int m, 
-                                                           int n,
-                                                           float *Aarray[],           /*Device pointer*/
-                                                           int lda, 
-                                                           float *TauArray[],        /* Device pointer*/                                                           
-                                                           int *info,
-                                                           int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, float *[], int, float *[], int *, int);
+cublasStatus_t CUBLASWINAPI cublasSgeqrfBatched(
+    cublasHandle_t handle, int m, int n, float *Aarray[], /*Device pointer*/
+    int lda, float *TauArray[],                           /* Device pointer*/
+    int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, float *[], int, float *[], int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgeqrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasDgeqrfBatched( cublasHandle_t handle, 
-                                                            int m, 
-                                                            int n,
-                                                            double *Aarray[],           /*Device pointer*/
-                                                            int lda, 
-                                                            double *TauArray[],        /* Device pointer*/                                                            
-                                                            int *info,
-                                                            int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, double *[], int, double *[], int *, int);
+cublasStatus_t CUBLASWINAPI cublasDgeqrfBatched(
+    cublasHandle_t handle, int m, int n, double *Aarray[], /*Device pointer*/
+    int lda, double *TauArray[],                           /* Device pointer*/
+    int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, double *[], int, double *[], int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgeqrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasCgeqrfBatched( cublasHandle_t handle, 
-                                                            int m, 
-                                                            int n,
-                                                            cuComplex *Aarray[],           /*Device pointer*/
-                                                            int lda, 
-                                                            cuComplex *TauArray[],        /* Device pointer*/                                                            
-                                                            int *info,
-                                                            int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, cuComplex *[], int, cuComplex *[], int *, int);
+cublasStatus_t CUBLASWINAPI cublasCgeqrfBatched(
+    cublasHandle_t handle, int m, int n, cuComplex *Aarray[], /*Device pointer*/
+    int lda, cuComplex *TauArray[], /* Device pointer*/
+    int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, cuComplex *[], int, cuComplex *[], int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeqrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasZgeqrfBatched( cublasHandle_t handle, 
-                                                            int m, 
-                                                            int n,
-                                                            cuDoubleComplex *Aarray[],           /*Device pointer*/
-                                                            int lda, 
-                                                            cuDoubleComplex *TauArray[],        /* Device pointer*/                                                          
-                                                            int *info,
-                                                            int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, int, int, cuDoubleComplex *[], int, cuDoubleComplex *[], int *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgeqrfBatched(cublasHandle_t handle, int m, int n,
+                    cuDoubleComplex *Aarray[],            /*Device pointer*/
+                    int lda, cuDoubleComplex *TauArray[], /* Device pointer*/
+                    int *info, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, int, int, cuDoubleComplex *[], int, cuDoubleComplex *[],
+      int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeqrfBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasSgelsBatched( cublasHandle_t handle, 
-                                                           cublasOperation_t trans, 
-                                                           int m,  
-                                                           int n,
-                                                           int nrhs,
-                                                           float *Aarray[], /*Device pointer*/
-                                                           int lda, 
-                                                           float *Carray[], /* Device pointer*/
-                                                           int ldc,                                                                 
-                                                           int *info, 
-                                                           int *devInfoArray, /* Device pointer*/
-                                                           int batchSize ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, float *[], int, float *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI
+cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+                   int nrhs, float *Aarray[],             /*Device pointer*/
+                   int lda, float *Carray[],              /* Device pointer*/
+                   int ldc, int *info, int *devInfoArray, /* Device pointer*/
+                   int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, float *[], int,
+      float *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasDgelsBatched( cublasHandle_t handle,
-                                                           cublasOperation_t trans,  
-                                                           int m,  
-                                                           int n,
-                                                           int nrhs,
-                                                           double *Aarray[], /*Device pointer*/
-                                                           int lda, 
-                                                           double *Carray[], /* Device pointer*/
-                                                           int ldc,                                                                 
-                                                           int *info, 
-                                                           int *devInfoArray, /* Device pointer*/
-                                                           int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, double *[], int, double *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI
+cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+                   int nrhs, double *Aarray[],            /*Device pointer*/
+                   int lda, double *Carray[],             /* Device pointer*/
+                   int ldc, int *info, int *devInfoArray, /* Device pointer*/
+                   int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, double *[], int,
+      double *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasCgelsBatched( cublasHandle_t handle, 
-                                                           cublasOperation_t trans, 
-                                                           int m,  
-                                                           int n,
-                                                           int nrhs,
-                                                           cuComplex *Aarray[], /*Device pointer*/
-                                                           int lda, 
-                                                           cuComplex *Carray[], /* Device pointer*/
-                                                           int ldc,                                                                 
-                                                           int *info, 
-                                                           int *devInfoArray,
-                                                           int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *[], int, cuComplex *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI
+cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+                   int nrhs, cuComplex *Aarray[], /*Device pointer*/
+                   int lda, cuComplex *Carray[],  /* Device pointer*/
+                   int ldc, int *info, int *devInfoArray, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *[], int,
+      cuComplex *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
-cublasStatus_t CUBLASWINAPI  cublasZgelsBatched( cublasHandle_t handle, 
-                                                           cublasOperation_t trans, 
-                                                           int m,  
-                                                           int n,
-                                                           int nrhs,
-                                                           cuDoubleComplex *Aarray[], /*Device pointer*/
-                                                           int lda, 
-                                                           cuDoubleComplex *Carray[], /* Device pointer*/
-                                                           int ldc,                                                                 
-                                                           int *info, 
-                                                           int *devInfoArray,
-                                                           int batchSize) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasOperation_t, int, int, int, cuDoubleComplex *[], int, cuDoubleComplex *[], int, int *, int *, int);
+cublasStatus_t CUBLASWINAPI
+cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n,
+                   int nrhs, cuDoubleComplex *Aarray[], /*Device pointer*/
+                   int lda, cuDoubleComplex *Carray[],  /* Device pointer*/
+                   int ldc, int *info, int *devInfoArray, int batchSize) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasOperation_t, int, int, int, cuDoubleComplex *[],
+      int, cuDoubleComplex *[], int, int *, int *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgelsBatched");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, devInfoArray, batchSize);
+  return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info,
+                  devInfoArray, batchSize);
 }
 
 cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const float *A, 
-                                                  int lda,
-                                                  const float *x, 
-                                                  int incx,
-                                                  float *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const float *, int, const float *, int, float *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const float *A, int lda, const float *x,
+                                        int incx, float *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const float *, int,
+      const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const double *A, 
-                                                  int lda,
-                                                  const double *x, 
-                                                  int incx,
-                                                  double *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const double *, int, const double *, int, double *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const double *A, int lda,
+                                        const double *x, int incx, double *C,
+                                        int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const double *, int,
+      const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const cuComplex *A, 
-                                                  int lda,
-                                                  const cuComplex *x, 
-                                                  int incx,
-                                                  cuComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const cuComplex *A, int lda,
+                                        const cuComplex *x, int incx,
+                                        cuComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int,
+      const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
 cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle,
-                                                  cublasSideMode_t mode, 
-                                                  int m, 
-                                                  int n,
-                                                  const cuDoubleComplex *A, 
-                                                  int lda,
-                                                  const cuDoubleComplex *x, 
-                                                  int incx,
-                                                  cuDoubleComplex *C, 
-                                                  int ldc) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+                                        cublasSideMode_t mode, int m, int n,
+                                        const cuDoubleComplex *A, int lda,
+                                        const cuDoubleComplex *x, int incx,
+                                        cuDoubleComplex *C, int ldc) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdgmm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const float *AP,
-                                                     float *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, float *, int);
+cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const float *AP, float *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const double *AP,
-                                                     double *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, double *, int);
+cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const double *AP, double *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuComplex *AP,
-                                                     cuComplex *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, cuComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuComplex *AP, cuComplex *A,
+                                         int lda) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int,
+                                     const cuComplex *, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtpttr ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuDoubleComplex *AP,
-                                                     cuDoubleComplex *A,  
-                                                     int lda ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuDoubleComplex *AP,
+                                         cuDoubleComplex *A, int lda) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *,
+      cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpttr");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, AP, A, lda);
 }
 
-cublasStatus_t CUBLASWINAPI cublasStrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const float *A,
-                                                     int lda,
-                                                     float *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const float *, int, float *);
+cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const float *A, int lda, float *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasDtrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const double *A,
-                                                     int lda,
-                                                     double *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const double *, int, double *);
+cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const double *A, int lda, double *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasCtrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuComplex *A,
-                                                     int lda,
-                                                     cuComplex *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuComplex *, int, cuComplex *);
+cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuComplex *A, int lda,
+                                         cuComplex *AP) {
+  using FuncPtr =
+      cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int,
+                                     const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus_t CUBLASWINAPI cublasZtrttp ( cublasHandle_t handle, 
-                                                     cublasFillMode_t uplo, 
-                                                     int n,                                     
-                                                     const cuDoubleComplex *A,
-                                                     int lda,
-                                                     cuDoubleComplex *AP ) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle,
+                                         cublasFillMode_t uplo, int n,
+                                         const cuDoubleComplex *A, int lda,
+                                         cuDoubleComplex *AP) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(
+      cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int,
+      cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrttp");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, uplo, n, A, lda, AP);
 }
 
-cublasStatus CUBLASWINAPI cublasInit (void) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)();
+cublasStatus CUBLASWINAPI cublasInit(void) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasInit");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr();
 }
 
-cublasStatus CUBLASWINAPI cublasShutdown (void) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)();
+cublasStatus CUBLASWINAPI cublasShutdown(void) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasShutdown");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr();
 }
 
-cublasStatus CUBLASWINAPI cublasGetError (void) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)();
+cublasStatus CUBLASWINAPI cublasGetError(void) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr();
 }
 
 cublasStatus CUBLASWINAPI cublasGetVersion(int *version) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int *);
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasGetVersion");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(version);
 }
 
-cublasStatus CUBLASWINAPI cublasAlloc (int n, int elemSize, void **devicePtr) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(int, int, void **);
+cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasAlloc");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(n, elemSize, devicePtr);
 }
 
-cublasStatus CUBLASWINAPI cublasFree (void *devicePtr) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(void *);
+cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasFree");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(devicePtr);
 }
 
-cublasStatus CUBLASWINAPI cublasSetKernelStream (cudaStream_t stream) {
-  using FuncPtr = cublasStatus_t (CUBLASWINAPI *)(cudaStream_t);
+cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) {
+  using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSetKernelStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stream);
 }
 
-float CUBLASWINAPI cublasSnrm2 (int n, const float *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const float *, int);
+float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSnrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDnrm2 (int n, const double *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const double *, int);
+double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDnrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasScnrm2 (int n, const cuComplex *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const cuComplex *, int);
+float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScnrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDznrm2 (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDznrm2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasSdot (int n, const float *x, int incx, const float *y, 
-                               int incy) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const float *, int, const float *, int);
+float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y,
+                              int incy) {
+  using FuncPtr =
+      float(CUBLASWINAPI *)(int, const float *, int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSdot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSdot");
   return func_ptr(n, x, incx, y, incy);
 }
 
-double CUBLASWINAPI cublasDdot (int n, const double *x, int incx, const double *y, 
-                               int incy) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const double *, int, const double *, int);
+double CUBLASWINAPI cublasDdot(int n, const double *x, int incx,
+                               const double *y, int incy) {
+  using FuncPtr =
+      double(CUBLASWINAPI *)(int, const double *, int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDdot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDdot");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuComplex CUBLASWINAPI cublasCdotu (int n, const cuComplex *x, int incx, const cuComplex *y, 
-                               int incy) {
-  using FuncPtr = cuComplex (CUBLASWINAPI *)(int, const cuComplex *, int, const cuComplex *, int);
+cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx,
+                                   const cuComplex *y, int incy) {
+  using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int,
+                                            const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotu");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuComplex CUBLASWINAPI cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y, 
-                               int incy) {
-  using FuncPtr = cuComplex (CUBLASWINAPI *)(int, const cuComplex *, int, const cuComplex *, int);
+cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx,
+                                   const cuComplex *y, int incy) {
+  using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int,
+                                            const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCdotc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuDoubleComplex CUBLASWINAPI cublasZdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, 
-                               int incy) {
-  using FuncPtr = cuDoubleComplex (CUBLASWINAPI *)(int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
+cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x,
+                                         int incx, const cuDoubleComplex *y,
+                                         int incy) {
+  using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)(
+      int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotu");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu");
   return func_ptr(n, x, incx, y, incy);
 }
 
-cuDoubleComplex CUBLASWINAPI cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, 
-                               int incy) {
-  using FuncPtr = cuDoubleComplex (CUBLASWINAPI *)(int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
+cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x,
+                                         int incx, const cuDoubleComplex *y,
+                                         int incy) {
+  using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)(
+      int, const cuDoubleComplex *, int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdotc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasSscal (int n, float alpha, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float, float *, int);
+void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasDscal (int n, double alpha, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double, double *, int);
+void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasZscal (int n, cuDoubleComplex alpha, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x,
+                              int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasCsscal (int n, float alpha, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float, cuComplex *, int);
+void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x,
+                               int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdscal");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal");
   return func_ptr(n, alpha, x, incx);
 }
 
-void CUBLASWINAPI cublasSaxpy (int n, float alpha, const float *x, int incx, 
-                               float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx,
+                              float *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, float, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasDaxpy (int n, double alpha, const double *x, 
-                               int incx, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx,
+                              double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, double, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasCaxpy (int n, cuComplex alpha, const cuComplex *x, 
-                               int incx, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x,
+                              int incx, cuComplex *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x, 
-                               int incx, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int,
+                           cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZaxpy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy");
   return func_ptr(n, alpha, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasScopy (int n, const float *x, int incx, float *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const float *, int, float *, int);
+void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasScopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasDcopy (int n, const double *x, int incx, double *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDcopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y,
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCcopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y,
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int,
+                                       cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZcopy");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasSswap (int n, float *x, int incx, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float *, int, float *, int);
+void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasDswap (int n, double *x, int incx, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double *, int, double *, int);
+void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasCswap (int n, cuComplex *x, int incx, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-void CUBLASWINAPI cublasZswap (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZswap");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZswap");
   return func_ptr(n, x, incx, y, incy);
 }
 
-int CUBLASWINAPI cublasIsamax (int n, const float *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const float *, int);
+int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIdamax (int n, const double *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const double *, int);
+int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIcamax (int n, const cuComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuComplex *, int);
+int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIzamax (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamax");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIsamin (int n, const float *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const float *, int);
+int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIsamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIdamin (int n, const double *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const double *, int);
+int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIdamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIcamin (int n, const cuComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuComplex *, int);
+int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIcamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin");
   return func_ptr(n, x, incx);
 }
 
-int CUBLASWINAPI cublasIzamin (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = int (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasIzamin");
   if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasSasum (int n, const float *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const float *, int);
+float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSasum");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDasum (int n, const double *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const double *, int);
+double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDasum");
   return func_ptr(n, x, incx);
 }
 
-float CUBLASWINAPI cublasScasum (int n, const cuComplex *x, int incx) {
-  using FuncPtr = float (CUBLASWINAPI *)(int, const cuComplex *, int);
+float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) {
+  using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasScasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasScasum");
   return func_ptr(n, x, incx);
 }
 
-double CUBLASWINAPI cublasDzasum (int n, const cuDoubleComplex *x, int incx) {
-  using FuncPtr = double (CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
+double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) {
+  using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDzasum");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum");
   return func_ptr(n, x, incx);
 }
 
-void CUBLASWINAPI cublasSrot (int n, float *x, int incx, float *y, int incy, 
-                              float sc, float ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float *, int, float *, int, float, float);
+void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy,
+                             float sc, float ss) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrot");
   return func_ptr(n, x, incx, y, incy, sc, ss);
 }
 
-void CUBLASWINAPI cublasDrot (int n, double *x, int incx, double *y, int incy, 
-                              double sc, double ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double *, int, double *, int, double, double);
+void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy,
+                             double sc, double ss) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrot");
   return func_ptr(n, x, incx, y, incy, sc, ss);
 }
 
-void CUBLASWINAPI cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, 
-                              int incy, float c, cuComplex s) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, float, cuComplex);
+void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y,
+                             int incy, float c, cuComplex s) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int,
+                                       float, cuComplex);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCrot");
   return func_ptr(n, x, incx, y, incy, c, s);
 }
 
-void CUBLASWINAPI cublasZrot (int n, cuDoubleComplex *x, int incx, 
-                              cuDoubleComplex *y, int incy, double sc, 
-                              cuDoubleComplex cs) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, double, cuDoubleComplex);
+void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx,
+                             cuDoubleComplex *y, int incy, double sc,
+                             cuDoubleComplex cs) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int,
+                           double, cuDoubleComplex);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZrot");
   return func_ptr(n, x, incx, y, incy, sc, cs);
 }
 
-void CUBLASWINAPI cublasCsrot (int n, cuComplex *x, int incx, cuComplex *y,
-                               int incy, float c, float s) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, float, float);
+void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y,
+                              int incy, float c, float s) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int,
+                                       float, float);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot");
   return func_ptr(n, x, incx, y, incy, c, s);
 }
 
-void CUBLASWINAPI cublasZdrot (int n, cuDoubleComplex *x, int incx, 
-                               cuDoubleComplex *y, int incy, double c, double s) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, double, double);
+void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx,
+                              cuDoubleComplex *y, int incy, double c,
+                              double s) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int,
+                                       cuDoubleComplex *, int, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZdrot");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot");
   return func_ptr(n, x, incx, y, incy, c, s);
 }
 
-void CUBLASWINAPI cublasSrotg (float *sa, float *sb, float *sc, float *ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(float *, float *, float *, float *);
+void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) {
+  using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg");
   return func_ptr(sa, sb, sc, ss);
 }
 
-void CUBLASWINAPI cublasDrotg (double *sa, double *sb, double *sc, double *ss) {
-  using FuncPtr = void (CUBLASWINAPI *)(double *, double *, double *, double *);
+void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) {
+  using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg");
   return func_ptr(sa, sb, sc, ss);
 }
 
-void CUBLASWINAPI cublasCrotg (cuComplex *ca, cuComplex cb, float *sc,
-                               cuComplex *cs) {
-  using FuncPtr = void (CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *);
+void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc,
+                              cuComplex *cs) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg");
   return func_ptr(ca, cb, sc, cs);
 }
 
-void CUBLASWINAPI cublasZrotg (cuDoubleComplex *ca, cuDoubleComplex cb, double *sc,
-                               cuDoubleComplex *cs) {
-  using FuncPtr = void (CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, double *, cuDoubleComplex *);
+void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb,
+                              double *sc, cuDoubleComplex *cs) {
+  using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex,
+                                       double *, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZrotg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg");
   return func_ptr(ca, cb, sc, cs);
 }
 
-void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, 
-                              const float* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, float *, int, float *, int, const float *);
+void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy,
+                              const float *sparam) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm");
   return func_ptr(n, x, incx, y, incy, sparam);
 }
 
-void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, 
-                              const double* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, double *, int, double *, int, const double *);
+void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy,
+                              const double *sparam) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm");
   return func_ptr(n, x, incx, y, incy, sparam);
 }
 
-void CUBLASWINAPI cublasSrotmg (float *sd1, float *sd2, float *sx1, 
-                                const float *sy1, float* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(float *, float *, float *, const float *, float *);
+void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1,
+                               const float *sy1, float *sparam) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSrotmg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg");
   return func_ptr(sd1, sd2, sx1, sy1, sparam);
 }
 
-void CUBLASWINAPI cublasDrotmg (double *sd1, double *sd2, double *sx1, 
-                                const double *sy1, double* sparam) {
-  using FuncPtr = void (CUBLASWINAPI *)(double *, double *, double *, const double *, double *);
+void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1,
+                               const double *sy1, double *sparam) {
+  using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *,
+                                       const double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDrotmg");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg");
   return func_ptr(sd1, sd2, sx1, sy1, sparam);
 }
 
-void CUBLASWINAPI cublasSgemv (char trans, int m, int n, float alpha,
-                               const float *A, int lda, const float *x, int incx,
-                               float beta, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha,
+                              const float *A, int lda, const float *x, int incx,
+                              float beta, float *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDgemv (char trans, int m, int n, double alpha,
-                               const double *A, int lda, const double *x, int incx,
-                               double beta, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha,
+                              const double *A, int lda, const double *x,
+                              int incx, double beta, double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasCgemv (char trans, int m, int n, cuComplex alpha,
-                               const cuComplex *A, int lda, const cuComplex *x, int incx,
-                               cuComplex beta, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha,
+                              const cuComplex *A, int lda, const cuComplex *x,
+                              int incx, cuComplex beta, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx,
-                               cuDoubleComplex beta, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv");
   return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSgbmv (char trans, int m, int n, int kl, int ku, 
-                               float alpha, const float *A, int lda, 
-                               const float *x, int incx, float beta, float *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku,
+                              float alpha, const float *A, int lda,
+                              const float *x, int incx, float beta, float *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDgbmv (char trans, int m, int n, int kl, int ku, 
-                               double alpha, const double *A, int lda, 
-                               const double *x, int incx, double beta, double *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku,
+                              double alpha, const double *A, int lda,
+                              const double *x, int incx, double beta, double *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *,
+                           int, const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasCgbmv (char trans, int m, int n, int kl, int ku, 
-                               cuComplex alpha, const cuComplex *A, int lda, 
-                               const cuComplex *x, int incx, cuComplex beta, cuComplex *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *x, int incx, cuComplex beta,
+                              cuComplex *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZgbmv (char trans, int m, int n, int kl, int ku, 
-                               cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, 
-                               const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, 
-                               int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv");
   return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasStrmv (char uplo, char trans, char diag, int n, 
-                               const float *A, int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *,
+                                       int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtrmv (char uplo, char trans, char diag, int n, 
-                               const double *A, int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtrmv (char uplo, char trans, char diag, int n, 
-                               const cuComplex *A, int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtrmv (char uplo, char trans, char diag, int n, 
-                               const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int,
+                           cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasStbmv (char uplo, char trans, char diag, int n, int k, 
-                               const float *A, int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtbmv (char uplo, char trans, char diag, int n, int k, 
-                               const double *A, int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtbmv (char uplo, char trans, char diag, int n, int k, 
-                               const cuComplex *A, int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtbmv (char uplo, char trans, char diag, int n, int k, 
-                               const cuDoubleComplex *A, int lda, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *,
+                           int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, const float *AP, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
+void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n,
+                              const float *AP, float *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, double *, int);
+void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n,
+                              const double *AP, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, cuComplex *, int);
+void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n,
+                              const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *AP, cuDoubleComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, const float *A, int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *,
+                                       int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, const double *A, int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, const cuComplex *A, int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *A, int lda, 
+void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *A, int lda,
                               cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int,
+                           cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv");
   return func_ptr(uplo, trans, diag, n, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, const float *AP, 
-                              float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
+void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n,
+                              const float *AP, float *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, const double *AP, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const double *, double *, int);
+void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n,
+                              const double *AP, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *,
+                                       double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, const cuComplex *AP, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuComplex *, cuComplex *, int);
+void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n,
+                              const cuComplex *AP, cuComplex *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, 
-                              cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n,
+                              const cuDoubleComplex *AP, cuDoubleComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtpsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv");
   return func_ptr(uplo, trans, diag, n, AP, x, incx);
 }
 
-void CUBLASWINAPI cublasStbsv(char uplo, char trans, 
-                              char diag, int n, int k, const float *A, 
-                              int lda, float *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k,
+                              const float *A, int lda, float *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasDtbsv(char uplo, char trans, 
-                              char diag, int n, int k, const double *A, 
-                              int lda, double *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k,
+                              const double *A, int lda, double *x, int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasCtbsv(char uplo, char trans, 
-                              char diag, int n, int k, const cuComplex *A, 
-                              int lda, cuComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k,
+                              const cuComplex *A, int lda, cuComplex *x,
+                              int incx) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, char, int, int, const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasZtbsv(char uplo, char trans, 
-                              char diag, int n, int k, const cuDoubleComplex *A, 
-                              int lda, cuDoubleComplex *x, int incx) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *x, int incx) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *,
+                           int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtbsv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv");
   return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx);
 }
 
-void CUBLASWINAPI cublasSsymv (char uplo, int n, float alpha, const float *A,
-                               int lda, const float *x, int incx, float beta, 
-                               float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A,
+                              int lda, const float *x, int incx, float beta,
+                              float *y, int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int,
+                                       const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDsymv (char uplo, int n, double alpha, const double *A,
-                               int lda, const double *x, int incx, double beta, 
-                               double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A,
+                              int lda, const double *x, int incx, double beta,
+                              double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A,
-                               int lda, const cuComplex *x, int incx, cuComplex beta, 
-                               cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha,
+                              const cuComplex *A, int lda, const cuComplex *x,
+                              int incx, cuComplex beta, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChemv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A,
-                               int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta, 
-                               cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv");
   return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSsbmv (char uplo, int n, int k, float alpha, 
-                               const float *A, int lda, const float *x, int incx, 
-                               float beta, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha,
+                              const float *A, int lda, const float *x, int incx,
+                              float beta, float *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDsbmv (char uplo, int n, int k, double alpha, 
-                               const double *A, int lda, const double *x, int incx, 
-                               double beta, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha,
+                              const double *A, int lda, const double *x,
+                              int incx, double beta, double *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasChbmv (char uplo, int n, int k, cuComplex alpha, 
-                               const cuComplex *A, int lda, const cuComplex *x, int incx, 
-                               cuComplex beta, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha,
+                              const cuComplex *A, int lda, const cuComplex *x,
+                              int incx, cuComplex beta, cuComplex *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, 
-                               const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, 
-                               cuDoubleComplex beta, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhbmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv");
   return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha,
-                              const float *AP, const float *x,
-                              int incx, float beta, float *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP,
+                              const float *x, int incx, float beta, float *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *,
+                                       const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha,
-                              const double *AP, const double *x,
-                              int incx, double beta, double *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP,
+                              const double *x, int incx, double beta, double *y,
+                              int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, double, const double *, const double *,
+                           int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
 void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha,
-                              const cuComplex *AP, const cuComplex *x,
-                              int incx, cuComplex beta, cuComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, const cuComplex *, int, cuComplex, cuComplex *, int);
+                              const cuComplex *AP, const cuComplex *x, int incx,
+                              cuComplex beta, cuComplex *y, int incy) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *,
+                           const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
 void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha,
-                              const cuDoubleComplex *AP, const cuDoubleComplex *x,
-                              int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+                              const cuDoubleComplex *AP,
+                              const cuDoubleComplex *x, int incx,
+                              cuDoubleComplex beta, cuDoubleComplex *y,
+                              int incy) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpmv");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv");
   return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy);
 }
 
-void CUBLASWINAPI cublasSger (int m, int n, float alpha, const float *x, int incx,
-                              const float *y, int incy, float *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, float, const float *, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x,
+                             int incx, const float *y, int incy, float *A,
+                             int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSger");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSger");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasDger (int m, int n, double alpha, const double *x, int incx,
-                              const double *y, int incy, double *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, double, const double *, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x,
+                             int incx, const double *y, int incy, double *A,
+                             int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDger");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDger");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x,
-                               int incx, const cuComplex *y, int incy,
-                               cuComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x,
+                              int incx, const cuComplex *y, int incy,
+                              cuComplex *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgeru");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x,
-                               int incx, const cuComplex *y, int incy,
-                               cuComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x,
+                              int incx, const cuComplex *y, int incy,
+                              cuComplex *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgerc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
-                               int incx, const cuDoubleComplex *y, int incy,
-                               cuDoubleComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgeru");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x,
-                               int incx, const cuDoubleComplex *y, int incy,
-                               cuDoubleComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgerc");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc");
   return func_ptr(m, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasSsyr (char uplo, int n, float alpha, const float *x,
-                              int incx, float *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x,
+                             int incx, float *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasDsyr (char uplo, int n, double alpha, const double *x,
-                              int incx, double *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x,
+                             int incx, double *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                                       double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasCher (char uplo, int n, float alpha, 
-                              const cuComplex *x, int incx, cuComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x,
+                             int incx, cuComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int,
+                                       cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCher");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasZher (char uplo, int n, double alpha, 
-                              const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZher(char uplo, int n, double alpha,
+                             const cuDoubleComplex *x, int incx,
+                             cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZher");
   return func_ptr(uplo, n, alpha, x, incx, A, lda);
 }
 
-void CUBLASWINAPI cublasSspr (char uplo, int n, float alpha, const float *x,
-                              int incx, float *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, float *);
+void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x,
+                             int incx, float *AP) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, float, const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSspr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasDspr (char uplo, int n, double alpha, const double *x,
-                              int incx, double *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, double *);
+void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x,
+                             int incx, double *AP) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, double, const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDspr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasChpr (char uplo, int n, float alpha, const cuComplex *x,
-                              int incx, cuComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const cuComplex *, int, cuComplex *);
+void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x,
+                             int incx, cuComplex *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int,
+                                       cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChpr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x,
-                              int incx, cuDoubleComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *);
+void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha,
+                             const cuDoubleComplex *x, int incx,
+                             cuDoubleComplex *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr");
   return func_ptr(uplo, n, alpha, x, incx, AP);
 }
 
-void CUBLASWINAPI cublasSsyr2 (char uplo, int n, float alpha, const float *x, 
-                               int incx, const float *y, int incy, float *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, const float *, int, float *, int);
+void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x,
+                              int incx, const float *y, int incy, float *A,
+                              int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasDsyr2 (char uplo, int n, double alpha, const double *x, 
-                               int incx, const double *y, int incy, double *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, const double *, int, double *, int);
+void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x,
+                              int incx, const double *y, int incy, double *A,
+                              int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x, 
-                               int incx, const cuComplex *y, int incy, cuComplex *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha,
+                              const cuComplex *x, int incx, const cuComplex *y,
+                              int incy, cuComplex *A, int lda) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCher2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, 
-                               int incx, const cuDoubleComplex *y, int incy, cuDoubleComplex *A, 
-                               int lda) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *A, int lda) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZher2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda);
 }
 
-void CUBLASWINAPI cublasSspr2 (char uplo, int n, float alpha, const float *x, 
-                               int incx, const float *y, int incy, float *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, float, const float *, int, const float *, int, float *);
+void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x,
+                              int incx, const float *y, int incy, float *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int,
+                                       const float *, int, float *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSspr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasDspr2 (char uplo, int n, double alpha,
-                               const double *x, int incx, const double *y,
-                               int incy, double *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, double, const double *, int, const double *, int, double *);
+void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x,
+                              int incx, const double *y, int incy, double *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int,
+                                       const double *, int, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDspr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasChpr2 (char uplo, int n, cuComplex alpha,
-                               const cuComplex *x, int incx, const cuComplex *y,
-                               int incy, cuComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex *);
+void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha,
+                              const cuComplex *x, int incx, const cuComplex *y,
+                              int incy, cuComplex *AP) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int,
+                           const cuComplex *, int, cuComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChpr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *x, int incx, const cuDoubleComplex *y,
-                               int incy, cuDoubleComplex *AP) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex *);
+void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *x, int incx,
+                              const cuDoubleComplex *y, int incy,
+                              cuDoubleComplex *AP) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhpr2");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2");
   return func_ptr(uplo, n, alpha, x, incx, y, incy, AP);
 }
 
-void CUBLASWINAPI cublasSgemm (char transa, char transb, int m, int n, int k, 
-                               float alpha, const float *A, int lda, 
-                               const float *B, int ldb, float beta, float *C, 
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k,
+                              float alpha, const float *A, int lda,
+                              const float *B, int ldb, float beta, float *C,
+                              int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDgemm (char transa, char transb, int m, int n, int k,
-                               double alpha, const double *A, int lda, 
-                               const double *B, int ldb, double beta, double *C, 
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k,
+                              double alpha, const double *A, int lda,
+                              const double *B, int ldb, double beta, double *C,
+                              int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *,
+                           int, const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCgemm (char transa, char transb, int m, int n, int k, 
-                               cuComplex alpha, const cuComplex *A, int lda,
-                               const cuComplex *B, int ldb, cuComplex beta,
-                               cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *B, int ldb, cuComplex beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZgemm (char transa, char transb, int m, int n,
-                               int k, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda,
-                               const cuDoubleComplex *B, int ldb,
-                               cuDoubleComplex beta, cuDoubleComplex *C,
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *B, int ldb,
+                              cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZgemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm");
   return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasSsyrk (char uplo, char trans, int n, int k, float alpha, 
-                               const float *A, int lda, float beta, float *C, 
-                               int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha,
+                              const float *A, int lda, float beta, float *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float,
+                                       const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDsyrk (char uplo, char trans, int n, int k,
-                               double alpha, const double *A, int lda,
-                               double beta, double *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha,
+                              const double *A, int lda, double beta, double *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, double, const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCsyrk (char uplo, char trans, int n, int k,
-                               cuComplex alpha, const cuComplex *A, int lda,
-                               cuComplex beta, cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              cuComplex beta, cuComplex *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *,
+                           int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZsyrk (char uplo, char trans, int n, int k,
-                               cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda,
-                               cuDoubleComplex beta,
-                               cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex,
+                                       const cuDoubleComplex *, int,
+                                       cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyrk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCherk (char uplo, char trans, int n, int k,
-                               float alpha, const cuComplex *A, int lda,
-                               float beta, cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, float, cuComplex *, int);
+void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha,
+                              const cuComplex *A, int lda, float beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int,
+                           float, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCherk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCherk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZherk (char uplo, char trans, int n, int k,
-                               double alpha,
-                               const cuDoubleComplex *A, int lda,
-                               double beta,
-                               cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const cuDoubleComplex *, int, double, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha,
+                              const cuDoubleComplex *A, int lda, double beta,
+                              cuDoubleComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double,
+                                       const cuDoubleComplex *, int, double,
+                                       cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZherk");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZherk");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasSsyr2k (char uplo, char trans, int n, int k, float alpha, 
-                                const float *A, int lda, const float *B, int ldb, 
-                                float beta, float *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha,
+                               const float *A, int lda, const float *B, int ldb,
+                               float beta, float *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDsyr2k (char uplo, char trans, int n, int k,
-                                double alpha, const double *A, int lda,
-                                const double *B, int ldb, double beta,
-                                double *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k,
+                               double alpha, const double *A, int lda,
+                               const double *B, int ldb, double beta, double *C,
+                               int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCsyr2k (char uplo, char trans, int n, int k,
-                                cuComplex alpha, const cuComplex *A, int lda,
-                                const cuComplex *B, int ldb, cuComplex beta,
-                                cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k,
+                               cuComplex alpha, const cuComplex *A, int lda,
+                               const cuComplex *B, int ldb, cuComplex beta,
+                               cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZsyr2k (char uplo, char trans, int n, int k,
-                                cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
-                                const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
-                                cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k,
+                               cuDoubleComplex alpha, const cuDoubleComplex *A,
+                               int lda, const cuDoubleComplex *B, int ldb,
+                               cuDoubleComplex beta, cuDoubleComplex *C,
+                               int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsyr2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCher2k (char uplo, char trans, int n, int k,
-                                cuComplex alpha, const cuComplex *A, int lda,
-                                const cuComplex *B, int ldb, float beta,
-                                cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, float, cuComplex *, int);
+void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k,
+                               cuComplex alpha, const cuComplex *A, int lda,
+                               const cuComplex *B, int ldb, float beta,
+                               cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, float, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCher2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZher2k (char uplo, char trans, int n, int k,
-                                cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
-                                const cuDoubleComplex *B, int ldb, double beta,
-                                cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, double, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k,
+                               cuDoubleComplex alpha, const cuDoubleComplex *A,
+                               int lda, const cuDoubleComplex *B, int ldb,
+                               double beta, cuDoubleComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, double, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZher2k");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k");
   return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasSsymm (char side, char uplo, int m, int n, float alpha, 
-                               const float *A, int lda, const float *B, int ldb,
-                               float beta, float *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, float, const float *, int, const float *, int, float, float *, int);
+void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha,
+                              const float *A, int lda, const float *B, int ldb,
+                              float beta, float *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int,
+                           const float *, int, float, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasSsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasDsymm (char side, char uplo, int m, int n, double alpha, 
-                               const double *A, int lda, const double *B, int ldb,
-                               double beta, double *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, double, const double *, int, const double *, int, double, double *, int);
+void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha,
+                              const double *A, int lda, const double *B,
+                              int ldb, double beta, double *C, int ldc) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int,
+                           const double *, int, double, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha, 
-                               const cuComplex *A, int lda, const cuComplex *B, int ldb,
-                               cuComplex beta, cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *B, int ldb, cuComplex beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha, 
-                               const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb,
-                               cuDoubleComplex beta, cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *B, int ldb,
+                              cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZsymm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasChemm (char side, char uplo, int m, int n,
-                               cuComplex alpha, const cuComplex *A, int lda,
-                               const cuComplex *B, int ldb, cuComplex beta,
-                               cuComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, int, const cuComplex *, int, cuComplex, cuComplex *, int);
+void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n,
+                              cuComplex alpha, const cuComplex *A, int lda,
+                              const cuComplex *B, int ldb, cuComplex beta,
+                              cuComplex *C, int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuComplex, const cuComplex *, int,
+      const cuComplex *, int, cuComplex, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasChemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasChemm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasZhemm (char side, char uplo, int m, int n,
-                               cuDoubleComplex alpha, const cuDoubleComplex *A, int lda,
-                               const cuDoubleComplex *B, int ldb, cuDoubleComplex beta,
-                               cuDoubleComplex *C, int ldc) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n,
+                              cuDoubleComplex alpha, const cuDoubleComplex *A,
+                              int lda, const cuDoubleComplex *B, int ldb,
+                              cuDoubleComplex beta, cuDoubleComplex *C,
+                              int ldc) {
+  using FuncPtr = void(CUBLASWINAPI *)(
+      char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int,
+      const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZhemm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm");
   return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-void CUBLASWINAPI cublasStrsm (char side, char uplo, char transa, char diag,
-                               int m, int n, float alpha, const float *A, int lda,
-                               float *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, float alpha, const float *A,
+                              int lda, float *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasDtrsm (char side, char uplo, char transa,
-                               char diag, int m, int n, double alpha,
-                               const double *A, int lda, double *B,
-                               int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, double alpha, const double *A,
+                              int lda, double *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasCtrsm (char side, char uplo, char transa, char diag,
-                               int m, int n, cuComplex alpha, const cuComplex *A,
-                               int lda, cuComplex *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuComplex alpha, const cuComplex *A,
+                              int lda, cuComplex *B, int ldb) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasZtrsm (char side, char uplo, char transa,
-                               char diag, int m, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda,
-                               cuDoubleComplex *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int,
+                                       cuDoubleComplex, const cuDoubleComplex *,
+                                       int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrsm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasStrmm (char side, char uplo, char transa, char diag,
-                               int m, int n, float alpha, const float *A, int lda,
-                               float *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, float, const float *, int, float *, int);
+void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, float alpha, const float *A,
+                              int lda, float *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float,
+                                       const float *, int, float *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasStrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasDtrmm (char side, char uplo, char transa,
-                               char diag, int m, int n, double alpha,
-                               const double *A, int lda, double *B,
-                               int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, double, const double *, int, double *, int);
+void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, double alpha, const double *A,
+                              int lda, double *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double,
+                                       const double *, int, double *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasDtrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasCtrmm (char side, char uplo, char transa, char diag,
-                               int m, int n, cuComplex alpha, const cuComplex *A,
-                               int lda, cuComplex *B, int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, const cuComplex *, int, cuComplex *, int);
+void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuComplex alpha, const cuComplex *A,
+                              int lda, cuComplex *B, int ldb) {
+  using FuncPtr =
+      void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex,
+                           const cuComplex *, int, cuComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasCtrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
 }
 
-void CUBLASWINAPI cublasZtrmm (char side, char uplo, char transa,
-                               char diag, int m, int n, cuDoubleComplex alpha,
-                               const cuDoubleComplex *A, int lda, cuDoubleComplex *B,
-                               int ldb) {
-  using FuncPtr = void (CUBLASWINAPI *)(char, char, char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, cuDoubleComplex *, int);
+void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag,
+                              int m, int n, cuDoubleComplex alpha,
+                              const cuDoubleComplex *A, int lda,
+                              cuDoubleComplex *B, int ldb) {
+  using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int,
+                                       cuDoubleComplex, const cuDoubleComplex *,
+                                       int, cuDoubleComplex *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cublasZtrmm");
   if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm");
   return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb);
diff --git a/tensorflow/stream_executor/cuda/cudnn_6_0.inc b/tensorflow/stream_executor/cuda/cudnn_6_0.inc
index 6ac7a695d9f..11288983a4a 100644
--- a/tensorflow/stream_executor/cuda/cudnn_6_0.inc
+++ b/tensorflow/stream_executor/cuda/cudnn_6_0.inc
@@ -3,1771 +3,1823 @@
 extern "C" {
 
 size_t CUDNNWINAPI cudnnGetVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
 size_t CUDNNWINAPI cudnnGetCudartVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCudartVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-const char *  CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
-  using FuncPtr = const char * (CUDNNWINAPI *)(cudnnStatus_t);
+const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
+  using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetErrorString");
   if (!func_ptr) return "cudnnGetErrorString symbol not found.";
   return func_ptr(status);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(libraryPropertyType, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type,
+                                           int *value) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreate        (cudnnHandle_t *handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreate");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroy       (cudnnHandle_t handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroy");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetStream     (cudnnHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle,
+                                         cudaStream_t streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetStream     (cudnnHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle,
+                                         cudaStream_t *streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateTensorDescriptor(
-                                cudnnTensorDescriptor_t            *tensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnTensorFormat_t                 format,
-                                cudnnDataType_t                     dataType, // image data type
-                                int                                 n,        // number of inputs (batch size)
-                                int                                 c,        // number of input feature maps
-                                int                                 h,        // height of input section
-                                int                                 w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int);
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType,  // image data type
+    int n,                     // number of inputs (batch size)
+    int c,                     // number of input feature maps
+    int h,                     // height of input section
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnDataType_t                     dataType, // image data type
-                                int                                 n,        // number of inputs (batch size)
-                                int                                 c,        // number of input feature maps
-                                int                                 h,        // height of input section
-                                int                                 w,        // width of input section
-                                int                                 nStride,
-                                int                                 cStride,
-                                int                                 hStride,
-                                int                                 wStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, int, int, int, int, int, int, int);
+    cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t dataType,  // image data type
+    int n,                     // number of inputs (batch size)
+    int c,                     // number of input feature maps
+    int h,                     // height of input section
+    int w,                     // width of input section
+    int nStride, int cStride, int hStride, int wStride) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t,
+                                   int, int, int, int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                cudnnDataType_t                    *dataType, // image data type
-                                int                                *n,        // number of inputs (batch size)
-                                int                                *c,        // number of input feature maps
-                                int                                *h,        // height of input section
-                                int                                *w,        // width of input section
-                                int                                *nStride,
-                                int                                *cStride,
-                                int                                *hStride,
-                                int                                *wStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, int *, int *, int *, int *, int *);
+    const cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t *dataType,  // image data type
+    int *n,                     // number of inputs (batch size)
+    int *c,                     // number of input feature maps
+    int *h,                     // height of input section
+    int *w,                     // width of input section
+    int *nStride, int *cStride, int *hStride, int *wStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *,
+      int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnDataType_t                     dataType,
-                                int                                 nbDims,
-                                const int                           dimA[],
-                                const int                           strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, const int [], const int []);
+    cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnTensorFormat_t                 format,
-                                cudnnDataType_t                     dataType,
-                                int                                 nbDims,
-                                const int                           dimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int []);
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, int nbDims, const int dimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, nbDims, dimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                int                                 nbDimsRequested,
-                                cudnnDataType_t                    *dataType,
-                                int                                *nbDims,
-                                int                                 dimA[],
-                                int                                 strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, cudnnDataType_t *, int *, int [], int []);
+    const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int,
+                                   cudnnDataType_t *, int *, int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                size_t                              *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
+    const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, size);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnAddTensor(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle,
+                                         const void *alpha,
+                                         const cudnnTensorDescriptor_t aDesc,
+                                         const void *A, const void *beta,
+                                         const cudnnTensorDescriptor_t cDesc,
+                                         void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnAddTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t          *opTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t           opTensorDesc,
-                                cudnnOpTensorOp_t                   opTensorOp,
-                                cudnnDataType_t                     opTensorCompType,
-                                cudnnNanPropagation_t               opTensorNanOpt ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t);
+    cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp,
+    cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t,
+                                   cudnnDataType_t, cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(
-                                const cudnnOpTensorDescriptor_t     opTensorDesc,
-                                cudnnOpTensorOp_t                  *opTensorOp,
-                                cudnnDataType_t                    *opTensorCompType,
-                                cudnnNanPropagation_t              *opTensorNanOpt ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *);
+    const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp,
+    cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *,
+      cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t           opTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnOpTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnOpTensorDescriptor_t     opTensorDesc,
-                                const void                         *alpha1,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *alpha2,
-                                const cudnnTensorDescriptor_t       bDesc,
-                                const void                         *B,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc,
+    const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A,
+    const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B,
+    const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnOpTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, beta, cDesc, C);
+  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B,
+                  beta, cDesc, C);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t          *reduceTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
+    cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t           reduceTensorDesc,
-                                cudnnReduceTensorOp_t                   reduceTensorOp,
-                                cudnnDataType_t                     reduceTensorCompType,
-                                cudnnNanPropagation_t               reduceTensorNanOpt,
-                                cudnnReduceTensorIndices_t          reduceTensorIndices,
-                                cudnnIndicesType_t                  reduceTensorIndicesType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
+    cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType,
+    cudnnNanPropagation_t reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t reduceTensorIndices,
+    cudnnIndicesType_t reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t,
+      cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(
-                                const cudnnReduceTensorDescriptor_t     reduceTensorDesc,
-                                cudnnReduceTensorOp_t                  *reduceTensorOp,
-                                cudnnDataType_t                    *reduceTensorCompType,
-                                cudnnNanPropagation_t              *reduceTensorNanOpt,
-                                cudnnReduceTensorIndices_t         *reduceTensorIndices,
-                                cudnnIndicesType_t                 *reduceTensorIndicesType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, cudnnIndicesType_t *);
+    const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t *reduceTensorOp,
+    cudnnDataType_t *reduceTensorCompType,
+    cudnnNanPropagation_t *reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t *reduceTensorIndices,
+    cudnnIndicesType_t *reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *,
+      cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *,
+      cudnnIndicesType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t           reduceTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
+    cudnnReduceTensorDescriptor_t reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionIndicesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                void                               *indices,
-                                size_t                              indicesSizeInBytes,
-                                void                               *workspace,
-                                size_t                              workspaceSizeInBytes,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    void *indices, size_t indicesSizeInBytes, void *workspace,
+    size_t workspaceSizeInBytes, const void *alpha,
+    const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta,
+    const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t,
+      void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReduceTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, C);
+  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes,
+                  workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc,
+                  C);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const void                         *valuePtr ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle,
+                                         const cudnnTensorDescriptor_t yDesc,
+                                         void *y, const void *valuePtr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, valuePtr);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const void                         *alpha ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle,
+                                           const cudnnTensorDescriptor_t yDesc,
+                                           void *y, const void *alpha) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnScaleTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, alpha);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateFilterDescriptor(
-                                cudnnFilterDescriptor_t            *filterDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc,
-                                cudnnDataType_t                     dataType, // image data type
-                                cudnnTensorFormat_t                 format,
-                                int                                 k,        // number of output feature maps
-                                int                                 c,        // number of input feature maps
-                                int                                 h,        // height of each input filter
-                                int                                 w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
+                           cudnnDataType_t dataType,  // image data type
+                           cudnnTensorFormat_t format,
+                           int k,  // number of output feature maps
+                           int c,  // number of input feature maps
+                           int h,  // height of each input filter
+                           int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                cudnnDataType_t                    *dataType, // image data type
-                                cudnnTensorFormat_t                *format,
-                                int                                *k,        // number of output feature maps
-                                int                                *c,        // number of input feature maps
-                                int                                *h,        // height of each input filter
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
+                           cudnnDataType_t *dataType,  // image data type
+                           cudnnTensorFormat_t *format,
+                           int *k,  // number of output feature maps
+                           int *c,  // number of input feature maps
+                           int *h,  // height of each input filter
+                           int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *,
+      int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc,
-                                cudnnDataType_t                     dataType, // image data type
-                                cudnnTensorFormat_t                 format,
-                                int                                 nbDims,
-                                const int                           filterDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int []);
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType,  // image data type
+    cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, nbDims, filterDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                 nbDimsRequested,
-                                cudnnDataType_t                    *dataType, // image data type
-                                cudnnTensorFormat_t                *format,
-                                int                                *nbDims,
-                                int                                 filterDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, int, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int []);
+    const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType,  // image data type
+    cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, int, cudnnDataType_t *,
+      cudnnTensorFormat_t *, int *, int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, filterDimA);
+  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims,
+                  filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyFilterDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateConvolutionDescriptor(
-                                cudnnConvolutionDescriptor_t       *convDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( cudnnConvolutionDescriptor_t convDesc,
-                                                             int pad_h,    // zero-padding height
-                                                             int pad_w,    // zero-padding width
-                                                             int u,   // vertical filter stride
-                                                             int v,   // horizontal filter stride
-                                                             int dilation_h, // filter dilation in the vertical dimension
-                                                             int dilation_w, // filter dilation in the horizontal dimension
-                                                             cudnnConvolutionMode_t mode,
-                                                             cudnnDataType_t computeType
-                                                           ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor(
+    cudnnConvolutionDescriptor_t convDesc,
+    int pad_h,       // zero-padding height
+    int pad_w,       // zero-padding width
+    int u,           // vertical filter stride
+    int v,           // horizontal filter stride
+    int dilation_h,  // filter dilation in the vertical dimension
+    int dilation_w,  // filter dilation in the horizontal dimension
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(  const cudnnConvolutionDescriptor_t convDesc,
-                                                            int* pad_h,    // zero-padding height
-                                                            int* pad_w,    // zero-padding width
-                                                            int* u,        // vertical filter stride
-                                                            int* v,        // horizontal filter stride
-                                                            int* dilation_h, // filter dilation in the vertical dimension
-                                                            int* dilation_w, // filter dilation in the horizontal dimension
-                                                            cudnnConvolutionMode_t* mode,
-                                                            cudnnDataType_t *computeType
-                                                         ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,       // zero-padding height
+    int *pad_w,       // zero-padding width
+    int *u,           // vertical filter stride
+    int *v,           // horizontal filter stride
+    int *dilation_h,  // filter dilation in the vertical dimension
+    int *dilation_w,  // filter dilation in the horizontal dimension
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                *n,
-                                int                                *c,
-                                int                                *h,
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor(
-                                cudnnConvolutionDescriptor_t        convDesc,
-                                int                                 arrayLength,             /* nbDims-2 size */
-                                const int                           padA[],
-                                const int                           filterStrideA[],
-                                const int                           dilationA[],
-                                cudnnConvolutionMode_t              mode,
-                                cudnnDataType_t                     computeType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, const int [], const int [], const int [], cudnnConvolutionMode_t, cudnnDataType_t);
+    cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */
+    const int padA[], const int filterStrideA[], const int dilationA[],
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[],
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                int                                 arrayLengthRequested,
-                                int                                *arrayLength,
-                                int                                 padA[],
-                                int                                 strideA[],
-                                int                                 dilationA[],
-                                cudnnConvolutionMode_t             *mode,
-                                cudnnDataType_t                    *computeType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int, int *, int [], int [], int [], cudnnConvolutionMode_t *, cudnnDataType_t *);
+    const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested,
+    int *arrayLength, int padA[], int strideA[], int dilationA[],
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[],
+      cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA,
+                  dilationA, mode, computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                 nbDims,
-                                int                                 tensorOutputDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int nbDims,
+    int tensorOutputDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, tensorOutputDimA);
+  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims,
+                  tensorOutputDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyConvolutionDescriptor(
-                                cudnnConvolutionDescriptor_t        convDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t      *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t      *perfResults,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults,
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                cudnnConvolutionFwdPreference_t     preference,
-                                size_t                              memoryLimitInBytes,
-                                cudnnConvolutionFwdAlgo_t          *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionFwdAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t,
+      cudnnConvolutionFwdAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, yDesc, y);
+  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha1,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *alpha2,
-                                const cudnnTensorDescriptor_t       zDesc,
-                                const void                         *z,
-                                const cudnnTensorDescriptor_t       biasDesc,
-                                const void                         *bias,
-                                const cudnnActivationDescriptor_t   activationDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
+    cudnnHandle_t handle, const void *alpha1,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2,
+    const cudnnTensorDescriptor_t zDesc, const void *z,
+    const cudnnTensorDescriptor_t biasDesc, const void *bias,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, activationDesc, yDesc, y);
+  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias,
+                  activationDesc, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dbDesc,
-                                void                               *db ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dbDesc, void *db) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardBias");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnFilterDescriptor_t       dwDesc,
-                                const int                           requestedAlgoCount,
-                                int                                 *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx(
-                                cudnnHandle_t                        handle,
-                                const cudnnTensorDescriptor_t        xDesc,
-                                const void                          *x,
-                                const cudnnTensorDescriptor_t        dyDesc,
-                                const void                          *y,
-                                const cudnnConvolutionDescriptor_t   convDesc,
-                                const cudnnFilterDescriptor_t        dwDesc,
-                                void                                *dw,
-                                const int                            requestedAlgoCount,
-                                int                                 *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
-                                void                                *workSpace,
-                                size_t                               workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *y,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm(
-                                cudnnHandle_t                         handle,
-                                const cudnnTensorDescriptor_t         xDesc,
-                                const cudnnTensorDescriptor_t         dyDesc,
-                                const cudnnConvolutionDescriptor_t    convDesc,
-                                const cudnnFilterDescriptor_t         dwDesc,
-                                cudnnConvolutionBwdFilterPreference_t preference,
-                                size_t                                memoryLimitInBytes,
-                                cudnnConvolutionBwdFilterAlgo_t      *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, size_t, cudnnConvolutionBwdFilterAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc,
+    cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdFilterAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t,
+      size_t, cudnnConvolutionBwdFilterAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnFilterDescriptor_t       gradDesc,
-                                cudnnConvolutionBwdFilterAlgo_t     algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionBwdFilterAlgo_t     algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnFilterDescriptor_t       dwDesc,
-                                void                               *dw ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnFilterDescriptor_t dwDesc, void *dw) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t,
+      void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
+  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                cudnnConvolutionBwdDataPreference_t preference,
-                                size_t                              memoryLimitInBytes,
-                                cudnnConvolutionBwdDataAlgo_t      *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, size_t, cudnnConvolutionBwdDataAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdDataAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t,
+      size_t, cudnnConvolutionBwdDataAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                cudnnConvolutionBwdDataAlgo_t       algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionBwdDataAlgo_t       algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
+  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnIm2Col(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                void                               *colBuffer ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI
+cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+            const void *x, const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t,
+                                   const void *, const cudnnFilterDescriptor_t,
+                                   const cudnnConvolutionDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnIm2Col");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(
-                                cudnnHandle_t                       handle,
-                                cudnnSoftmaxAlgorithm_t             algo,
-                                cudnnSoftmaxMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnSoftmaxAlgorithm_t             algo,
-                                cudnnSoftmaxMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, dx);
+  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc,
+                  dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreatePoolingDescriptor(
-                                cudnnPoolingDescriptor_t           *poolingDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc,
-                                cudnnPoolingMode_t                  mode,
-                                cudnnNanPropagation_t               maxpoolingNanOpt,
-                                int                                 windowHeight,
-                                int                                 windowWidth,
-                                int                                 verticalPadding,
-                                int                                 horizontalPadding,
-                                int                                 verticalStride,
-                                int                                 horizontalStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, int, int, int, int, int);
+    cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode,
+    cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth,
+    int verticalPadding, int horizontalPadding, int verticalStride,
+    int horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int,
+      int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                cudnnPoolingMode_t                 *mode,
-                                cudnnNanPropagation_t              *maxpoolingNanOpt,
-                                int                                *windowHeight,
-                                int                                *windowWidth,
-                                int                                *verticalPadding,
-                                int                                *horizontalPadding,
-                                int                                *verticalStride,
-                                int                                *horizontalStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
+    const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode,
+    cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight,
+    int *windowWidth, int *verticalPadding, int *horizontalPadding,
+    int *verticalStride, int *horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc,
-                                const cudnnPoolingMode_t            mode,
-                                const cudnnNanPropagation_t         maxpoolingNanOpt,
-                                int                                 nbDims,
-                                const int                           windowDimA[],
-                                const int                           paddingA[],
-                                const int                           strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int [], const int [], const int []);
+    cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode,
+    const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims,
+    const int windowDimA[], const int paddingA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, const cudnnPoolingMode_t,
+      const cudnnNanPropagation_t, int, const int[], const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA,
+                  paddingA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                int                                 nbDimsRequested,
-                                cudnnPoolingMode_t                 *mode,
-                                cudnnNanPropagation_t              *maxpoolingNanOpt,
-                                int                                *nbDims,
-                                int                                 windowDimA[],
-                                int                                 paddingA[],
-                                int                                 strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int [], int [], int []);
+    const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested,
+    cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt,
+    int *nbDims, int windowDimA[], int paddingA[], int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int[], int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims,
+                  windowDimA, paddingA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdForwardOutputDim(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                int                                 nbDims,
-                                int                                 outputTensorDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
+                                  const cudnnTensorDescriptor_t inputTensorDesc,
+                                  int nbDims, int outputTensorDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                   const cudnnTensorDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dForwardOutputDim(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                int                                *n,
-                                int                                *c,
-                                int                                *h,
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
+                                  const cudnnTensorDescriptor_t inputTensorDesc,
+                                  int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyPoolingDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(
-                                cudnnHandle_t                       handle,
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(
-                                cudnnHandle_t                       handle,
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const void                          *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateActivationDescriptor(
-                                cudnnActivationDescriptor_t        *activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(
-                                cudnnActivationDescriptor_t         activationDesc,
-                                cudnnActivationMode_t               mode,
-                                cudnnNanPropagation_t               reluNanOpt,
-                                double                              coef ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t, cudnnActivationMode_t, cudnnNanPropagation_t, double);
+    cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode,
+    cudnnNanPropagation_t reluNanOpt, double coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t,
+                                               cudnnActivationMode_t,
+                                               cudnnNanPropagation_t, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptor(
-                                const cudnnActivationDescriptor_t   activationDesc,
-                                cudnnActivationMode_t              *mode,
-                                cudnnNanPropagation_t              *reluNanOpt,
-                                double*                             coef ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnActivationDescriptor_t, cudnnActivationMode_t *, cudnnNanPropagation_t *, double *);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
+                             cudnnActivationMode_t *mode,
+                             cudnnNanPropagation_t *reluNanOpt, double *coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnActivationDescriptor_t, cudnnActivationMode_t *,
+      cudnnNanPropagation_t *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyActivationDescriptor(
-                                cudnnActivationDescriptor_t activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnActivationForward(
-                                cudnnHandle_t                       handle,
-                                cudnnActivationDescriptor_t         activationDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnActivationDescriptor_t         activationDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateLRNDescriptor(
-                                cudnnLRNDescriptor_t               *normDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(
-                                cudnnLRNDescriptor_t                normDesc,
-                                unsigned                            lrnN,
-                                double                              lrnAlpha,
-                                double                              lrnBeta,
-                                double                              lrnK ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int, double, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned lrnN, double lrnAlpha,
+                                                double lrnBeta, double lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int, double, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(
-                                cudnnLRNDescriptor_t                normDesc,
-                                unsigned*                           lrnN,
-                                double*                             lrnAlpha,
-                                double*                             lrnBeta,
-                                double*                             lrnK ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned *lrnN,
+                                                double *lrnAlpha,
+                                                double *lrnBeta, double *lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyLRNDescriptor( cudnnLRNDescriptor_t lrnDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(lrnDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnLRNMode_t                      lrnMode,
-                                const void*                         alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnLRNMode_t                      lrnMode,
-                                const void*                         alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc,
+                  x, beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnDivNormMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc, // same desc for means, temp, temp2
-                                const void                         *x,
-                                const void                         *means, // if NULL, means are assumed to be zero
-                                void                               *temp,
-                                void                               *temp2,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc,  // same desc for means, temp, temp2
+    const void *x,
+    const void *means,  // if NULL, means are assumed to be zero
+    void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, void *, void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, beta, yDesc, y);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2,
+                  beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnDivNormMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc, // same desc for x, means, dy, temp, temp2
-                                const void                         *x,
-                                const void                         *means, // if NULL, means are assumed to be zero
-                                const void                         *dy,
-                                void                               *temp,
-                                void                               *temp2,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dXdMeansDesc, // same desc for dx, dMeans
-                                void                               *dx, // output x differential
-                                void                               *dMeans ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t
+        xDesc,  // same desc for x, means, dy, temp, temp2
+    const void *x,
+    const void *means,  // if NULL, means are assumed to be zero
+    const void *dy, void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t dXdMeansDesc,  // same desc for dx, dMeans
+    void *dx,                                    // output x differential
+    void *dMeans) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *, void *, const void *, const cudnnTensorDescriptor_t, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, temp2, beta, dXdMeansDesc, dx, dMeans);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp,
+                  temp2, beta, dXdMeansDesc, dx, dMeans);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(
-                                cudnnTensorDescriptor_t             derivedBnDesc,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                cudnnBatchNormMode_t                mode ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, cudnnBatchNormMode_t);
+    cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc,
+    cudnnBatchNormMode_t mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               cudnnBatchNormMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDeriveBNTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(derivedBnDesc, xDesc, mode);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
 
-                                const void                         *alpha, // alpha[0] = result blend factor
-                                const void                         *beta,  // beta[0] = dest layer blend factor
+    const void *alpha,  // alpha[0] = result blend factor
+    const void *beta,   // beta[0] = dest layer blend factor
 
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,     // NxCxHxW
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,     // NxCxHxW
+    const cudnnTensorDescriptor_t xDesc,
+    const void *x,  // NxCxHxW
+    const cudnnTensorDescriptor_t yDesc,
+    void *y,  // NxCxHxW
 
-                                /* Shared desc for the next 6 tensors in the argument list.
-                                   Data type to be set as follows:
-                                   type = (typeOf(x) == double) ? double : float
-                                   Dimensions for this descriptor depend on normalization mode
-                                   - Spatial Normalization : tensors are expected to have dims 1xCx1x1
-                                    (normalization is performed across NxHxW)
-                                   - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW 
-                                    (normalization is performed across N) */
-                                const cudnnTensorDescriptor_t       bnScaleBiasMeanVarDesc,
+    /* Shared desc for the next 6 tensors in the argument list.
+       Data type to be set as follows:
+       type = (typeOf(x) == double) ? double : float
+       Dimensions for this descriptor depend on normalization mode
+       - Spatial Normalization : tensors are expected to have dims 1xCx1x1
+        (normalization is performed across NxHxW)
+       - Per-Activation Normalization : tensors are expected to have dims of
+       1xCxHxW (normalization is performed across N) */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
 
-                                // 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
-                                const void                         *bnScale,
-                                const void                         *bnBias,
+    // 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
+    const void *bnScale, const void *bnBias,
 
-                                /* MUST use factor=1 in the very first call of a complete training cycle.
-                                   Use a factor=1/(1+n) at N-th call to the function to get
-                                   Cumulative Moving Average (CMA) behavior
-                                   CMA[n] = (x[1]+...+x[n])/n
-                                   Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
-                                   ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
-                                   CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
-                                double                              exponentialAverageFactor,
+    /* MUST use factor=1 in the very first call of a complete training cycle.
+       Use a factor=1/(1+n) at N-th call to the function to get
+       Cumulative Moving Average (CMA) behavior
+       CMA[n] = (x[1]+...+x[n])/n
+       Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
+       ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
+       CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
+    double exponentialAverageFactor,
 
-                                /* Used in Training phase only. 
-                                   runningMean = newMean*factor + runningMean*(1-factor) */
-                                void                               *resultRunningMean,
-                                /* Output in training mode, input in inference. Is the moving average
-                                   of  variance[x] (factor is applied in the same way as for runningMean) */
-                                void                               *resultRunningVariance,
+    /* Used in Training phase only.
+       runningMean = newMean*factor + runningMean*(1-factor) */
+    void *resultRunningMean,
+    /* Output in training mode, input in inference. Is the moving average
+       of  variance[x] (factor is applied in the same way as for runningMean) */
+    void *resultRunningVariance,
 
-                                /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
-                                double                              epsilon,
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
+    double epsilon,
 
-                                /* Optionally save intermediate results from the forward pass here
-                                   - can be reused to speed up backward pass. NULL if unused */
-                                void                               *resultSaveMean,
-                                void                               *resultSaveInvVariance ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
+    /* Optionally save intermediate results from the forward pass here
+       - can be reused to speed up backward pass. NULL if unused */
+    void *resultSaveMean, void *resultSaveInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
+  return func_ptr(
+      handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc,
+      bnScale, bnBias, exponentialAverageFactor, resultRunningMean,
+      resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
-                                const void                         *alpha, // alpha[0] = result blend factor
-                                const void                         *beta,  // beta[0] = dest layer blend factor
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,     // NxCxHxW
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,     // NxCxHxW
-                                const cudnnTensorDescriptor_t       bnScaleBiasMeanVarDesc,
-                                const void                         *bnScale,
-                                const void                         *bnBias,
-                                const void                         *estimatedMean,
-                                const void                         *estimatedVariance,
-                                double                              epsilon ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, const void *, double);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
+    const void *alpha,  // alpha[0] = result blend factor
+    const void *beta,   // beta[0] = dest layer blend factor
+    const cudnnTensorDescriptor_t xDesc,
+    const void *x,  // NxCxHxW
+    const cudnnTensorDescriptor_t yDesc,
+    void *y,  // NxCxHxW
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+    const void *bnBias, const void *estimatedMean,
+    const void *estimatedVariance, double epsilon) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, const void *, const void *, double);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, estimatedVariance, epsilon);
+  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y,
+                  bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean,
+                  estimatedVariance, epsilon);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
-                                const void                         *alphaDataDiff,
-                                const void                         *betaDataDiff,
-                                const void                         *alphaParamDiff,
-                                const void                         *betaParamDiff,
-                                const cudnnTensorDescriptor_t       xDesc, // same desc for x, dx, dy
-                                const void                         *x,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx,
-                                /* Shared tensor desc for the 4 tensors below */
-                                const cudnnTensorDescriptor_t       dBnScaleBiasDesc,
-                                const void                         *bnScale, // bnBias doesn't affect backpropagation
-                                /* scale and bias diff are not backpropagated below this layer */
-                                void                               *dBnScaleResult,
-                                void                               *dBnBiasResult,
-                                /* Same epsilon as forward pass */
-                                double                              epsilon,
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff,
+    const void *betaDataDiff, const void *alphaParamDiff,
+    const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc,  // same desc for x, dx, dy
+    const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const void *bnScale,  // bnBias doesn't affect backpropagation
+    /* scale and bias diff are not backpropagated below this layer */
+    void *dBnScaleResult, void *dBnBiasResult,
+    /* Same epsilon as forward pass */
+    double epsilon,
 
-                                /* Optionally cached intermediate results from
-                                   forward pass */
-                                const void                         *savedMean,
-                                const void                         *savedInvVariance ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, void *, void *, double, const void *, const void *);
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const void *, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, void *, void *, double, const void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, epsilon, savedMean, savedInvVariance);
+  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff,
+                  betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx,
+                  dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult,
+                  epsilon, savedMean, savedInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( 
+cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(
 
-                               cudnnSpatialTransformerDescriptor_t        *stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
+    cudnnSpatialTransformerDescriptor_t *stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(
-                                cudnnSpatialTransformerDescriptor_t         stDesc,
-                                cudnnSamplerType_t                          samplerType, 
-                                cudnnDataType_t                             dataType,
-                                const int                                   nbDims,
-                                const int                                   dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, const int, const int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
+    cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType,
+    cudnnDataType_t dataType, const int nbDims, const int dimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t,
+      const int, const int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc, samplerType, dataType, nbDims, dimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(
-                                 cudnnSpatialTransformerDescriptor_t        stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
+    cudnnSpatialTransformerDescriptor_t stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(
-                                 cudnnHandle_t                              handle,
-                                 const cudnnSpatialTransformerDescriptor_t  stDesc,
-                                 const void                                *theta,
-                                 void                                      *grid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *theta, void *grid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, theta, grid);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(
-                                 cudnnHandle_t                              handle,
-                                 const cudnnSpatialTransformerDescriptor_t  stDesc,
-                                 const void                                *dgrid,
-                                 void                                      *dtheta) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *dgrid, void *dtheta) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, dgrid, dtheta);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(
-                                 cudnnHandle_t                              handle,
-                                 cudnnSpatialTransformerDescriptor_t        stDesc,
-                                 const void                                *alpha,                                    
-                                 const cudnnTensorDescriptor_t              xDesc,
-                                 const void                                *x,
-                                 const void                                *grid,
-                                 const void                                *beta,
-                                 cudnnTensorDescriptor_t                    yDesc,
-                                 void                                      *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc,
+    void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(
-                                 cudnnHandle_t                              handle,
-                                 cudnnSpatialTransformerDescriptor_t        stDesc,
-                                 const void                                *alpha,
-                                 const cudnnTensorDescriptor_t              xDesc,
-                                 const void                                *x,
-                                 const void                                *beta,
-                                 const cudnnTensorDescriptor_t              dxDesc,
-                                 void                                      *dx,
-                                 const void                                *alphaDgrid,
-                                 const cudnnTensorDescriptor_t              dyDesc,
-                                 const void                                *dy,
-                                 const void                                *grid,
-                                 const void                                *betaDgrid,
-                                 void                                      *dgrid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *);
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc,
+    const void *dy, const void *grid, const void *betaDgrid, void *dgrid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, dyDesc, dy, grid, betaDgrid, dgrid);
+  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid,
+                  dyDesc, dy, grid, betaDgrid, dgrid);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t * dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t * sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle,
+                                                    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetStatesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t * sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(
+    cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(xdesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                    cudnnHandle_t handle,
-                                                    float dropout, 
-                                                    void * states, 
-                                                    size_t stateSizeInBytes, 
-                                                    unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(cudnnHandle_t handle, 
-                                                      const cudnnDropoutDescriptor_t dropoutDesc,
-                                                      const cudnnTensorDescriptor_t xdesc, 
-                                                      const void * x,
-                                                      const cudnnTensorDescriptor_t ydesc,
-                                                      void * y,
-                                                      void * reserveSpace,
-                                                      size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t xdesc, const void *x,
+    const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(cudnnHandle_t handle, 
-                                               const cudnnDropoutDescriptor_t dropoutDesc,
-                                               const cudnnTensorDescriptor_t dydesc, 
-                                               const void * dy,
-                                               const cudnnTensorDescriptor_t dxdesc,
-                                               void * dx,
-                                               void * reserveSpace,
-                                               size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t dydesc, const void *dy,
+    const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t * rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                                             const int minibatch,
-                                             const cudnnDataType_t dataType,
-                                             cudnnPersistentRNNPlan_t * plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, const cudnnDataType_t, cudnnPersistentRNNPlan_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, const int minibatch,
+    const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int,
+                                               const cudnnDataType_t,
+                                               cudnnPersistentRNNPlan_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, minibatch, dataType, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                                          cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(cudnnHandle_t handle, 
-                                                cudnnRNNDescriptor_t rnnDesc,
-                                                const int hiddenSize, 
-                                                const int numLayers, 
-                                                cudnnDropoutDescriptor_t dropoutDesc, // Between layers, not between recurrent steps.
-                                                cudnnRNNInputMode_t inputMode,                                                 
-                                                cudnnDirectionMode_t direction, 
-                                                cudnnRNNMode_t mode, 
-                                                cudnnRNNAlgo_t algo, 
-                                                cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc,  // Between layers, not between recurrent steps.
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v6");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(cudnnRNNDescriptor_t rnnDesc,
-                                                int hiddenSize, 
-                                                int numLayers, 
-                                                cudnnDropoutDescriptor_t dropoutDesc, // Between layers, not between recurrent steps.
-                                                cudnnRNNInputMode_t inputMode,                                                 
-                                                cudnnDirectionMode_t direction, 
-                                                cudnnRNNMode_t mode, 
-                                                cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(
+    cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc,  // Between layers, not between recurrent steps.
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t,
+      cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t,
+      cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, dataType);
+  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode,
+                  direction, mode, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( cudnnHandle_t              handle,
-                                                    const cudnnRNNDescriptor_t rnnDesc,  
-                                                    const int seqLength, 
-                                                    const cudnnTensorDescriptor_t    *xDesc,
-                                                    size_t                     *sizeInBytes
-                                                    ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( cudnnHandle_t              handle,
-                                                          const cudnnRNNDescriptor_t rnnDesc,  
-                                                          const int seqLength, 
-                                                          const cudnnTensorDescriptor_t    *xDesc,
-                                                          size_t                     *sizeInBytes
-                                                    ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNTrainingReserveSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNParamsSize( cudnnHandle_t              handle,
-                                                 const cudnnRNNDescriptor_t rnnDesc,  
-                                                 const cudnnTensorDescriptor_t    xDesc,                                                    
-                                                 size_t                     *sizeInBytes,
-                                                 cudnnDataType_t dataType
-                                                    ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, size_t *, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                      const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes,
+                      cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNParamsSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( cudnnHandle_t              handle,
-                             const cudnnRNNDescriptor_t rnnDesc,  
-                             const int layer,
-                             const cudnnTensorDescriptor_t xDesc, 
-                             const cudnnFilterDescriptor_t wDesc, 
-                             const void * w, 
-                             const int linLayerID,  
-                             cudnnFilterDescriptor_t linLayerMatDesc, 
-                             void ** linLayerMat
-                             ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer,
+    const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc,
+    const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerMatrixParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, linLayerMatDesc, linLayerMat);
+  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID,
+                  linLayerMatDesc, linLayerMat);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( cudnnHandle_t              handle,
-                             const cudnnRNNDescriptor_t rnnDesc,  
-                             const int layer,
-                             const cudnnTensorDescriptor_t xDesc, 
-                             const cudnnFilterDescriptor_t wDesc, 
-                             const void * w, 
-                             const int linLayerID, 
-                             cudnnFilterDescriptor_t linLayerBiasDesc, 
-                             void ** linLayerBias                       
-                             ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer,
+    const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc,
+    const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerBiasParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, linLayerBiasDesc, linLayerBias);
+  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID,
+                  linLayerBiasDesc, linLayerBias);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( cudnnHandle_t handle, 
-                                                    const cudnnRNNDescriptor_t rnnDesc, 
-                                                    const int seqLength, 
-                                                    const cudnnTensorDescriptor_t * xDesc, 
-                                                    const void * x, 
-                                                    const cudnnTensorDescriptor_t hxDesc, 
-                                                    const void * hx, 
-                                                    const cudnnTensorDescriptor_t cxDesc, 
-                                                    const void * cx, 
-                                                    const cudnnFilterDescriptor_t wDesc, 
-                                                    const void * w, 
-                                                    const cudnnTensorDescriptor_t *yDesc,  
-                                                    void * y, 
-                                                    const cudnnTensorDescriptor_t hyDesc, 
-                                                    void * hy, 
-                                                    const cudnnTensorDescriptor_t cyDesc, 
-                                                    void * cy, 
-                                                    void * workspace, 
-                                                    size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( cudnnHandle_t handle, 
-                                                   const cudnnRNNDescriptor_t rnnDesc, 
-                                                   const int seqLength, 
-                                                   const cudnnTensorDescriptor_t *xDesc, 
-                                                   const void * x, 
-                                                   const cudnnTensorDescriptor_t hxDesc, 
-                                                   const void * hx, 
-                                                   const cudnnTensorDescriptor_t cxDesc, 
-                                                   const void * cx, 
-                                                   const cudnnFilterDescriptor_t wDesc, 
-                                                   const void * w, 
-                                                   const cudnnTensorDescriptor_t *yDesc,  
-                                                   void * y, 
-                                                   const cudnnTensorDescriptor_t hyDesc, 
-                                                   void * hy, 
-                                                   const cudnnTensorDescriptor_t cyDesc, 
-                                                   void * cy, 
-                                                   void * workspace, 
-                                                   size_t workSpaceSizeInBytes,
-                                                   void * reserveSpace, 
-                                                   size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardData( cudnnHandle_t handle, 
-                                                const cudnnRNNDescriptor_t rnnDesc, 
-                                                const int seqLength, 
-                                                const cudnnTensorDescriptor_t * yDesc, 
-                                                const void * y,                                                
-                                                const cudnnTensorDescriptor_t * dyDesc, 
-                                                const void * dy, 
-                                                const cudnnTensorDescriptor_t dhyDesc, 
-                                                const void * dhy, 
-                                                const cudnnTensorDescriptor_t dcyDesc, 
-                                                const void * dcy, 
-                                                const cudnnFilterDescriptor_t wDesc, 
-                                                const void * w, 
-                                                const cudnnTensorDescriptor_t hxDesc, 
-                                                const void * hx,                                                                  
-                                                const cudnnTensorDescriptor_t cxDesc, 
-                                                const void * cx,                                                 
-                                                const cudnnTensorDescriptor_t * dxDesc, 
-                                                void * dx, 
-                                                const cudnnTensorDescriptor_t dhxDesc,
-                                                void * dhx,
-                                                const cudnnTensorDescriptor_t dcxDesc,
-                                                void * dcx,
-                                                void * workspace,
-                                                size_t workSpaceSizeInBytes,
-                                                void * reserveSpace, 
-                                                size_t reserveSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                     const int seqLength, const cudnnTensorDescriptor_t *yDesc,
+                     const void *y, const cudnnTensorDescriptor_t *dyDesc,
+                     const void *dy, const cudnnTensorDescriptor_t dhyDesc,
+                     const void *dhy, const cudnnTensorDescriptor_t dcyDesc,
+                     const void *dcy, const cudnnFilterDescriptor_t wDesc,
+                     const void *w, const cudnnTensorDescriptor_t hxDesc,
+                     const void *hx, const cudnnTensorDescriptor_t cxDesc,
+                     const void *cx, const cudnnTensorDescriptor_t *dxDesc,
+                     void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+                     const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+                     void *workspace, size_t workSpaceSizeInBytes,
+                     void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( cudnnHandle_t handle, 
-                                                   const cudnnRNNDescriptor_t rnnDesc, 
-                                                   const int seqLength, 
-                                                   const cudnnTensorDescriptor_t * xDesc, 
-                                                   const void * x, 
-                                                   const cudnnTensorDescriptor_t hxDesc, 
-                                                   const void * hx,                                                   
-                                                   const cudnnTensorDescriptor_t * yDesc, 
-                                                   const void * y,
-                                                   const void * workspace, 
-                                                   size_t workSpaceSizeInBytes, 
-                                                   const cudnnFilterDescriptor_t dwDesc, 
-                                                   void * dw,
-                                                   const void * reserveSpace, 
-                                                   size_t reserveSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor_v4(
-                                cudnnConvolutionDescriptor_t        convDesc,
-                                int                                 pad_h,      // zero-padding height
-                                int                                 pad_w,      // zero-padding width
-                                int                                 u,          // vertical filter stride
-                                int                                 v,          // horizontal filter stride
-                                int                                 dilation_h, // filter dilation in the vertical dimension
-                                int                                 dilation_w, // filter dilation in the horizontal dimension
-                                cudnnConvolutionMode_t              mode ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor_v4");
+    cudnnConvolutionDescriptor_t convDesc,
+    int pad_h,       // zero-padding height
+    int pad_w,       // zero-padding width
+    int u,           // vertical filter stride
+    int v,           // horizontal filter stride
+    int dilation_h,  // filter dilation in the vertical dimension
+    int dilation_w,  // filter dilation in the horizontal dimension
+    cudnnConvolutionMode_t mode) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int,
+                                   int, int, int, cudnnConvolutionMode_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor_v4");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor_v5( cudnnConvolutionDescriptor_t convDesc,
-                                                             int pad_h,    // zero-padding height
-                                                             int pad_w,    // zero-padding width
-                                                             int u,   // vertical filter stride
-                                                             int v,   // horizontal filter stride
-                                                             int dilation_h, // filter dilation in the vertical dimension
-                                                             int dilation_w, // filter dilation in the horizontal dimension
-                                                             cudnnConvolutionMode_t mode,
-                                                             cudnnDataType_t computeType
-                                                           ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor_v5");
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor_v5(
+    cudnnConvolutionDescriptor_t convDesc,
+    int pad_h,       // zero-padding height
+    int pad_w,       // zero-padding width
+    int u,           // vertical filter stride
+    int v,           // horizontal filter stride
+    int dilation_h,  // filter dilation in the vertical dimension
+    int dilation_w,  // filter dilation in the horizontal dimension
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor_v4(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                int                                *pad_h,    // zero-padding height
-                                int                                *pad_w,    // zero-padding width
-                                int                                *u,        // vertical filter stride
-                                int                                *v,        // horizontal filter stride
-                                int                                *dilation_h, // filter dilation in the vertical dimension
-                                int                                *dilation_w, // filter dilation in the horizontal dimension
-                                cudnnConvolutionMode_t             *mode ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor_v4");
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,       // zero-padding height
+    int *pad_w,       // zero-padding width
+    int *u,           // vertical filter stride
+    int *v,           // horizontal filter stride
+    int *dilation_h,  // filter dilation in the vertical dimension
+    int *dilation_w,  // filter dilation in the horizontal dimension
+    cudnnConvolutionMode_t *mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor_v4");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor_v5(  const cudnnConvolutionDescriptor_t convDesc,
-                                                            int* pad_h,    // zero-padding height
-                                                            int* pad_w,    // zero-padding width
-                                                            int* u,        // vertical filter stride
-                                                            int* v,        // horizontal filter stride
-                                                            int* dilation_h, // filter dilation in the vertical dimension
-                                                            int* dilation_w, // filter dilation in the horizontal dimension
-                                                            cudnnConvolutionMode_t* mode,
-                                                            cudnnDataType_t *computeType
-                                                         ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor_v5");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor_v5(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,       // zero-padding height
+    int *pad_w,       // zero-padding width
+    int *u,           // vertical filter stride
+    int *v,           // horizontal filter stride
+    int *dilation_h,  // filter dilation in the vertical dimension
+    int *dilation_w,  // filter dilation in the horizontal dimension
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cudnn_7_0.inc b/tensorflow/stream_executor/cuda/cudnn_7_0.inc
index d2ea31e366b..008ae9099c0 100644
--- a/tensorflow/stream_executor/cuda/cudnn_7_0.inc
+++ b/tensorflow/stream_executor/cuda/cudnn_7_0.inc
@@ -3,1944 +3,2025 @@
 extern "C" {
 
 size_t CUDNNWINAPI cudnnGetVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
 size_t CUDNNWINAPI cudnnGetCudartVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCudartVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-const char *  CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
-  using FuncPtr = const char * (CUDNNWINAPI *)(cudnnStatus_t);
+const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
+  using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetErrorString");
   if (!func_ptr) return "cudnnGetErrorString symbol not found.";
   return func_ptr(status);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(
-                                cudnnHandle_t                       handle,
-                                cudnnStatus_t                      *rstatus,
-                                cudnnErrQueryMode_t                 mode,
-                                cudnnRuntimeTag_t                  *tag ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
+cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle,
+                                                 cudnnStatus_t *rstatus,
+                                                 cudnnErrQueryMode_t mode,
+                                                 cudnnRuntimeTag_t *tag) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnQueryRuntimeError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rstatus, mode, tag);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(libraryPropertyType, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type,
+                                           int *value) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreate        (cudnnHandle_t *handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreate");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroy       (cudnnHandle_t handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroy");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetStream     (cudnnHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle,
+                                         cudaStream_t streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetStream     (cudnnHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle,
+                                         cudaStream_t *streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateTensorDescriptor(
-                                cudnnTensorDescriptor_t            *tensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnTensorFormat_t                 format,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                int                                 n,        /* number of inputs (batch size) */
-                                int                                 c,        /* number of input feature maps */
-                                int                                 h,        /* height of input section */
-                                int                                 w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int);
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                int                                 n,        /* number of inputs (batch size) */
-                                int                                 c,        /* number of input feature maps */
-                                int                                 h,        /* height of input section */
-                                int                                 w,        /* width of input section */
-                                int                                 nStride,
-                                int                                 cStride,
-                                int                                 hStride,
-                                int                                 wStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, int, int, int, int, int, int, int);
+    cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w,                    /* width of input section */
+    int nStride, int cStride, int hStride, int wStride) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t,
+                                   int, int, int, int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                cudnnDataType_t                    *dataType, /* image data type */
-                                int                                *n,        /* number of inputs (batch size) */
-                                int                                *c,        /* number of input feature maps  */
-                                int                                *h,        /* height of input section */
-                                int                                *w,        /* width of input section */
-                                int                                *nStride,
-                                int                                *cStride,
-                                int                                *hStride,
-                                int                                *wStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, int *, int *, int *, int *, int *);
+    const cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t *dataType, /* image data type */
+    int *n,                    /* number of inputs (batch size) */
+    int *c,                    /* number of input feature maps  */
+    int *h,                    /* height of input section */
+    int *w,                    /* width of input section */
+    int *nStride, int *cStride, int *hStride, int *wStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *,
+      int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnDataType_t                     dataType,
-                                int                                 nbDims,
-                                const int                           dimA[],
-                                const int                           strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, const int [], const int []);
+    cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnTensorFormat_t                 format,
-                                cudnnDataType_t                     dataType,
-                                int                                 nbDims,
-                                const int                           dimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int []);
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, int nbDims, const int dimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, nbDims, dimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                int                                 nbDimsRequested,
-                                cudnnDataType_t                    *dataType,
-                                int                                *nbDims,
-                                int                                 dimA[],
-                                int                                 strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, cudnnDataType_t *, int *, int [], int []);
+    const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int,
+                                   cudnnDataType_t *, int *, int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                size_t                              *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
+    const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, size);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnAddTensor(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle,
+                                         const void *alpha,
+                                         const cudnnTensorDescriptor_t aDesc,
+                                         const void *A, const void *beta,
+                                         const cudnnTensorDescriptor_t cDesc,
+                                         void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnAddTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t          *opTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t           opTensorDesc,
-                                cudnnOpTensorOp_t                   opTensorOp,
-                                cudnnDataType_t                     opTensorCompType,
-                                cudnnNanPropagation_t               opTensorNanOpt ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t);
+    cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp,
+    cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t,
+                                   cudnnDataType_t, cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(
-                                const cudnnOpTensorDescriptor_t     opTensorDesc,
-                                cudnnOpTensorOp_t                  *opTensorOp,
-                                cudnnDataType_t                    *opTensorCompType,
-                                cudnnNanPropagation_t              *opTensorNanOpt ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *);
+    const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp,
+    cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *,
+      cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t           opTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnOpTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnOpTensorDescriptor_t     opTensorDesc,
-                                const void                         *alpha1,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *alpha2,
-                                const cudnnTensorDescriptor_t       bDesc,
-                                const void                         *B,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc,
+    const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A,
+    const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B,
+    const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnOpTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, beta, cDesc, C);
+  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B,
+                  beta, cDesc, C);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t          *reduceTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
+    cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t           reduceTensorDesc,
-                                cudnnReduceTensorOp_t                   reduceTensorOp,
-                                cudnnDataType_t                     reduceTensorCompType,
-                                cudnnNanPropagation_t               reduceTensorNanOpt,
-                                cudnnReduceTensorIndices_t          reduceTensorIndices,
-                                cudnnIndicesType_t                  reduceTensorIndicesType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
+    cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType,
+    cudnnNanPropagation_t reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t reduceTensorIndices,
+    cudnnIndicesType_t reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t,
+      cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(
-                                const cudnnReduceTensorDescriptor_t     reduceTensorDesc,
-                                cudnnReduceTensorOp_t                  *reduceTensorOp,
-                                cudnnDataType_t                    *reduceTensorCompType,
-                                cudnnNanPropagation_t              *reduceTensorNanOpt,
-                                cudnnReduceTensorIndices_t         *reduceTensorIndices,
-                                cudnnIndicesType_t                 *reduceTensorIndicesType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, cudnnIndicesType_t *);
+    const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t *reduceTensorOp,
+    cudnnDataType_t *reduceTensorCompType,
+    cudnnNanPropagation_t *reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t *reduceTensorIndices,
+    cudnnIndicesType_t *reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *,
+      cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *,
+      cudnnIndicesType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t           reduceTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
+    cudnnReduceTensorDescriptor_t reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionIndicesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                void                               *indices,
-                                size_t                              indicesSizeInBytes,
-                                void                               *workspace,
-                                size_t                              workspaceSizeInBytes,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    void *indices, size_t indicesSizeInBytes, void *workspace,
+    size_t workspaceSizeInBytes, const void *alpha,
+    const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta,
+    const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t,
+      void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReduceTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, C);
+  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes,
+                  workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc,
+                  C);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const void                         *valuePtr ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle,
+                                         const cudnnTensorDescriptor_t yDesc,
+                                         void *y, const void *valuePtr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, valuePtr);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const void                         *alpha ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle,
+                                           const cudnnTensorDescriptor_t yDesc,
+                                           void *y, const void *alpha) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnScaleTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, alpha);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateFilterDescriptor(
-                                cudnnFilterDescriptor_t            *filterDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                cudnnTensorFormat_t                 format,
-                                int                                 k,        /* number of output feature maps */
-                                int                                 c,        /* number of input feature maps */
-                                int                                 h,        /* height of each input filter */
-                                int                                 w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int);
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType,          /* image data type */
+    cudnnTensorFormat_t format, int k, /* number of output feature maps */
+    int c,                             /* number of input feature maps */
+    int h,                             /* height of each input filter */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                cudnnDataType_t                    *dataType, /* image data type */
-                                cudnnTensorFormat_t                *format,
-                                int                                *k,        /* number of output feature maps */
-                                int                                *c,        /* number of input feature maps */
-                                int                                *h,        /* height of each input filter */
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int *, int *, int *);
+    const cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t *dataType,           /* image data type */
+    cudnnTensorFormat_t *format, int *k, /* number of output feature maps */
+    int *c,                              /* number of input feature maps */
+    int *h,                              /* height of each input filter */
+    int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *,
+      int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                cudnnTensorFormat_t                 format,
-                                int                                 nbDims,
-                                const int                           filterDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int []);
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType, /* image data type */
+    cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, nbDims, filterDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                 nbDimsRequested,
-                                cudnnDataType_t                    *dataType, /* image data type */
-                                cudnnTensorFormat_t                *format,
-                                int                                *nbDims,
-                                int                                 filterDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, int, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int []);
+    const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, /* image data type */
+    cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, int, cudnnDataType_t *,
+      cudnnTensorFormat_t *, int *, int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, filterDimA);
+  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims,
+                  filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyFilterDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateConvolutionDescriptor(
-                                cudnnConvolutionDescriptor_t       *convDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( cudnnConvolutionDescriptor_t convDesc,
-                                                       cudnnMathType_t mathType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( cudnnConvolutionDescriptor_t convDesc,
-                                                       cudnnMathType_t *mathType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( cudnnConvolutionDescriptor_t convDesc,
-                                                         int groupCount ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( cudnnConvolutionDescriptor_t convDesc,
-                                                         int *groupCount ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( cudnnConvolutionDescriptor_t convDesc,
-                                                             int pad_h,    /* zero-padding height */
-                                                             int pad_w,    /* zero-padding width */
-                                                             int u,   /* vertical filter stride */
-                                                             int v,   /* horizontal filter stride */
-                                                             int dilation_h, /* filter dilation in the vertical dimension */
-                                                             int dilation_w, /* filter dilation in the horizontal dimension */
-                                                             cudnnConvolutionMode_t mode,
-                                                             cudnnDataType_t computeType
-                                                           ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */
+    int pad_w,                                        /* zero-padding width */
+    int u,          /* vertical filter stride */
+    int v,          /* horizontal filter stride */
+    int dilation_h, /* filter dilation in the vertical dimension */
+    int dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(  const cudnnConvolutionDescriptor_t convDesc,
-                                                            int* pad_h,    /* zero-padding height */
-                                                            int* pad_w,    /* zero-padding width */
-                                                            int* u,        /* vertical filter stride */
-                                                            int* v,        /* horizontal filter stride */
-                                                            int* dilation_h, /* filter dilation in the vertical dimension */
-                                                            int* dilation_w, /* filter dilation in the horizontal dimension */
-                                                            cudnnConvolutionMode_t* mode,
-                                                            cudnnDataType_t *computeType
-                                                         ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,      /* zero-padding height */
+    int *pad_w,      /* zero-padding width */
+    int *u,          /* vertical filter stride */
+    int *v,          /* horizontal filter stride */
+    int *dilation_h, /* filter dilation in the vertical dimension */
+    int *dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                *n,
-                                int                                *c,
-                                int                                *h,
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor(
-                                cudnnConvolutionDescriptor_t        convDesc,
-                                int                                 arrayLength,             /* nbDims-2 size */
-                                const int                           padA[],
-                                const int                           filterStrideA[],
-                                const int                           dilationA[],
-                                cudnnConvolutionMode_t              mode,
-                                cudnnDataType_t                     computeType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, const int [], const int [], const int [], cudnnConvolutionMode_t, cudnnDataType_t);
+    cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */
+    const int padA[], const int filterStrideA[], const int dilationA[],
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[],
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                int                                 arrayLengthRequested,
-                                int                                *arrayLength,
-                                int                                 padA[],
-                                int                                 strideA[],
-                                int                                 dilationA[],
-                                cudnnConvolutionMode_t             *mode,
-                                cudnnDataType_t                    *computeType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int, int *, int [], int [], int [], cudnnConvolutionMode_t *, cudnnDataType_t *);
+    const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested,
+    int *arrayLength, int padA[], int strideA[], int dilationA[],
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[],
+      cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA,
+                  dilationA, mode, computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                 nbDims,
-                                int                                 tensorOutputDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int nbDims,
+    int tensorOutputDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, tensorOutputDimA);
+  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims,
+                  tensorOutputDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyConvolutionDescriptor(
-                                cudnnConvolutionDescriptor_t        convDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithmMaxCount( cudnnHandle_t     handle,
-                                                                       int              *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t      *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t      *perfResults,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults,
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                cudnnConvolutionFwdPreference_t     preference,
-                                size_t                              memoryLimitInBytes,
-                                cudnnConvolutionFwdAlgo_t          *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionFwdAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t,
+      cudnnConvolutionFwdAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7(
-                                cudnnHandle_t                      handle,
-                                const cudnnTensorDescriptor_t      srcDesc,
-                                const cudnnFilterDescriptor_t      filterDesc,
-                                const cudnnConvolutionDescriptor_t convDesc,
-                                const cudnnTensorDescriptor_t      destDesc,
-                                const int                          requestedAlgoCount,
-                                int                               *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t     *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnFilterDescriptor_t filterDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, yDesc, y);
+  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha1,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *alpha2,
-                                const cudnnTensorDescriptor_t       zDesc,
-                                const void                         *z,
-                                const cudnnTensorDescriptor_t       biasDesc,
-                                const void                         *bias,
-                                const cudnnActivationDescriptor_t   activationDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
+    cudnnHandle_t handle, const void *alpha1,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2,
+    const cudnnTensorDescriptor_t zDesc, const void *z,
+    const cudnnTensorDescriptor_t biasDesc, const void *bias,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, activationDesc, yDesc, y);
+  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias,
+                  activationDesc, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dbDesc,
-                                void                               *db ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dbDesc, void *db) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardBias");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( cudnnHandle_t     handle,
-                                                                              int              *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnFilterDescriptor_t       dwDesc,
-                                const int                           requestedAlgoCount,
-                                int                                 *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx(
-                                cudnnHandle_t                        handle,
-                                const cudnnTensorDescriptor_t        xDesc,
-                                const void                          *x,
-                                const cudnnTensorDescriptor_t        dyDesc,
-                                const void                          *y,
-                                const cudnnConvolutionDescriptor_t   convDesc,
-                                const cudnnFilterDescriptor_t        dwDesc,
-                                void                                *dw,
-                                const int                            requestedAlgoCount,
-                                int                                 *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
-                                void                                *workSpace,
-                                size_t                               workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *y,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm(
-                                cudnnHandle_t                         handle,
-                                const cudnnTensorDescriptor_t         xDesc,
-                                const cudnnTensorDescriptor_t         dyDesc,
-                                const cudnnConvolutionDescriptor_t    convDesc,
-                                const cudnnFilterDescriptor_t         dwDesc,
-                                cudnnConvolutionBwdFilterPreference_t preference,
-                                size_t                                memoryLimitInBytes,
-                                cudnnConvolutionBwdFilterAlgo_t      *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, size_t, cudnnConvolutionBwdFilterAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc,
+    cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdFilterAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t,
+      size_t, cudnnConvolutionBwdFilterAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7(
-                                cudnnHandle_t                         handle,
-                                const cudnnTensorDescriptor_t         srcDesc,
-                                const cudnnTensorDescriptor_t         diffDesc,
-                                const cudnnConvolutionDescriptor_t    convDesc,
-                                const cudnnFilterDescriptor_t         gradDesc,
-                                const int                             requestedAlgoCount,
-                                int                                  *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t  *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnFilterDescriptor_t       gradDesc,
-                                cudnnConvolutionBwdFilterAlgo_t     algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionBwdFilterAlgo_t     algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnFilterDescriptor_t       dwDesc,
-                                void                               *dw ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnFilterDescriptor_t dwDesc, void *dw) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t,
+      void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
+  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( cudnnHandle_t     handle,
-                                                                            int              *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                cudnnConvolutionBwdDataPreference_t preference,
-                                size_t                              memoryLimitInBytes,
-                                cudnnConvolutionBwdDataAlgo_t      *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, size_t, cudnnConvolutionBwdDataAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdDataAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t,
+      size_t, cudnnConvolutionBwdDataAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                const cudnnTensorDescriptor_t       diffDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       gradDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                cudnnConvolutionBwdDataAlgo_t       algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionBwdDataAlgo_t       algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
+  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnIm2Col(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                void                               *colBuffer ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI
+cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+            const void *x, const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t,
+                                   const void *, const cudnnFilterDescriptor_t,
+                                   const cudnnConvolutionDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnIm2Col");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(
-                                cudnnHandle_t                       handle,
-                                cudnnSoftmaxAlgorithm_t             algo,
-                                cudnnSoftmaxMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnSoftmaxAlgorithm_t             algo,
-                                cudnnSoftmaxMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, dx);
+  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc,
+                  dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreatePoolingDescriptor(
-                                cudnnPoolingDescriptor_t           *poolingDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc,
-                                cudnnPoolingMode_t                  mode,
-                                cudnnNanPropagation_t               maxpoolingNanOpt,
-                                int                                 windowHeight,
-                                int                                 windowWidth,
-                                int                                 verticalPadding,
-                                int                                 horizontalPadding,
-                                int                                 verticalStride,
-                                int                                 horizontalStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, int, int, int, int, int);
+    cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode,
+    cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth,
+    int verticalPadding, int horizontalPadding, int verticalStride,
+    int horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int,
+      int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                cudnnPoolingMode_t                 *mode,
-                                cudnnNanPropagation_t              *maxpoolingNanOpt,
-                                int                                *windowHeight,
-                                int                                *windowWidth,
-                                int                                *verticalPadding,
-                                int                                *horizontalPadding,
-                                int                                *verticalStride,
-                                int                                *horizontalStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
+    const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode,
+    cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight,
+    int *windowWidth, int *verticalPadding, int *horizontalPadding,
+    int *verticalStride, int *horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc,
-                                const cudnnPoolingMode_t            mode,
-                                const cudnnNanPropagation_t         maxpoolingNanOpt,
-                                int                                 nbDims,
-                                const int                           windowDimA[],
-                                const int                           paddingA[],
-                                const int                           strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int [], const int [], const int []);
+    cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode,
+    const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims,
+    const int windowDimA[], const int paddingA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, const cudnnPoolingMode_t,
+      const cudnnNanPropagation_t, int, const int[], const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA,
+                  paddingA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                int                                 nbDimsRequested,
-                                cudnnPoolingMode_t                 *mode,
-                                cudnnNanPropagation_t              *maxpoolingNanOpt,
-                                int                                *nbDims,
-                                int                                 windowDimA[],
-                                int                                 paddingA[],
-                                int                                 strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int [], int [], int []);
+    const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested,
+    cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt,
+    int *nbDims, int windowDimA[], int paddingA[], int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int[], int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims,
+                  windowDimA, paddingA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdForwardOutputDim(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                int                                 nbDims,
-                                int                                 outputTensorDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
+                                  const cudnnTensorDescriptor_t inputTensorDesc,
+                                  int nbDims, int outputTensorDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                   const cudnnTensorDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dForwardOutputDim(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                int                                *n,
-                                int                                *c,
-                                int                                *h,
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
+                                  const cudnnTensorDescriptor_t inputTensorDesc,
+                                  int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyPoolingDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(
-                                cudnnHandle_t                       handle,
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(
-                                cudnnHandle_t                       handle,
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const void                          *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateActivationDescriptor(
-                                cudnnActivationDescriptor_t        *activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(
-                                cudnnActivationDescriptor_t         activationDesc,
-                                cudnnActivationMode_t               mode,
-                                cudnnNanPropagation_t               reluNanOpt,
-                                double                              coef ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t, cudnnActivationMode_t, cudnnNanPropagation_t, double);
+    cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode,
+    cudnnNanPropagation_t reluNanOpt, double coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t,
+                                               cudnnActivationMode_t,
+                                               cudnnNanPropagation_t, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptor(
-                                const cudnnActivationDescriptor_t   activationDesc,
-                                cudnnActivationMode_t              *mode,
-                                cudnnNanPropagation_t              *reluNanOpt,
-                                double*                             coef ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnActivationDescriptor_t, cudnnActivationMode_t *, cudnnNanPropagation_t *, double *);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
+                             cudnnActivationMode_t *mode,
+                             cudnnNanPropagation_t *reluNanOpt, double *coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnActivationDescriptor_t, cudnnActivationMode_t *,
+      cudnnNanPropagation_t *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyActivationDescriptor(
-                                cudnnActivationDescriptor_t activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnActivationForward(
-                                cudnnHandle_t                       handle,
-                                cudnnActivationDescriptor_t         activationDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnActivationDescriptor_t         activationDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateLRNDescriptor(
-                                cudnnLRNDescriptor_t               *normDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(
-                                cudnnLRNDescriptor_t                normDesc,
-                                unsigned                            lrnN,
-                                double                              lrnAlpha,
-                                double                              lrnBeta,
-                                double                              lrnK ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int, double, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned lrnN, double lrnAlpha,
+                                                double lrnBeta, double lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int, double, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(
-                                cudnnLRNDescriptor_t                normDesc,
-                                unsigned*                           lrnN,
-                                double*                             lrnAlpha,
-                                double*                             lrnBeta,
-                                double*                             lrnK ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned *lrnN,
+                                                double *lrnAlpha,
+                                                double *lrnBeta, double *lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyLRNDescriptor( cudnnLRNDescriptor_t lrnDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(lrnDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnLRNMode_t                      lrnMode,
-                                const void*                         alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnLRNMode_t                      lrnMode,
-                                const void*                         alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc,
+                  x, beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnDivNormMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc, /* same desc for means, temp, temp2 */
-                                const void                         *x,
-                                const void                         *means, /* if NULL, means are assumed to be zero */
-                                void                               *temp,
-                                void                               *temp2,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, void *, void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, beta, yDesc, y);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2,
+                  beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnDivNormMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc, /* same desc for x, means, dy, temp, temp2 */
-                                const void                         *x,
-                                const void                         *means, /* if NULL, means are assumed to be zero */
-                                const void                         *dy,
-                                void                               *temp,
-                                void                               *temp2,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dXdMeansDesc, /* same desc for dx, dMeans */
-                                void                               *dx, /* output x differential */
-                                void                               *dMeans ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t
+        xDesc, /* same desc for x, means, dy, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    const void *dy, void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
+    void *dx,                                   /* output x differential */
+    void *dMeans) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *, void *, const void *, const cudnnTensorDescriptor_t, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, temp2, beta, dXdMeansDesc, dx, dMeans);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp,
+                  temp2, beta, dXdMeansDesc, dx, dMeans);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(
-                                cudnnTensorDescriptor_t             derivedBnDesc,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                cudnnBatchNormMode_t                mode ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, cudnnBatchNormMode_t);
+    cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc,
+    cudnnBatchNormMode_t mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               cudnnBatchNormMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDeriveBNTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(derivedBnDesc, xDesc, mode);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
 
-                                const void                         *alpha, /* alpha[0] = result blend factor */
-                                const void                         *beta,  /* beta[0] = dest layer blend factor */
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
 
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,     /* NxCxHxW */
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,     /* NxCxHxW */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
 
-                                /* Shared desc for the next 6 tensors in the argument list.
-                                   Data type to be set as follows:
-                                   type = (typeOf(x) == double) ? double : float
-                                   Dimensions for this descriptor depend on normalization mode
-                                   - Spatial Normalization : tensors are expected to have dims 1xCx1x1
-                                    (normalization is performed across NxHxW)
-                                   - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW 
-                                    (normalization is performed across N) */
-                                const cudnnTensorDescriptor_t       bnScaleBiasMeanVarDesc,
+    /* Shared desc for the next 6 tensors in the argument list.
+       Data type to be set as follows:
+       type = (typeOf(x) == double) ? double : float
+       Dimensions for this descriptor depend on normalization mode
+       - Spatial Normalization : tensors are expected to have dims 1xCx1x1
+        (normalization is performed across NxHxW)
+       - Per-Activation Normalization : tensors are expected to have dims of
+       1xCxHxW (normalization is performed across N) */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
 
-                                /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
-                                const void                         *bnScale,
-                                const void                         *bnBias,
+    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
+     */
+    const void *bnScale, const void *bnBias,
 
-                                /* MUST use factor=1 in the very first call of a complete training cycle.
-                                   Use a factor=1/(1+n) at N-th call to the function to get
-                                   Cumulative Moving Average (CMA) behavior
-                                   CMA[n] = (x[1]+...+x[n])/n
-                                   Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
-                                   ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
-                                   CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
-                                double                              exponentialAverageFactor,
+    /* MUST use factor=1 in the very first call of a complete training cycle.
+       Use a factor=1/(1+n) at N-th call to the function to get
+       Cumulative Moving Average (CMA) behavior
+       CMA[n] = (x[1]+...+x[n])/n
+       Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
+       ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
+       CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
+    double exponentialAverageFactor,
 
-                                /* Used in Training phase only. 
-                                   runningMean = newMean*factor + runningMean*(1-factor) */
-                                void                               *resultRunningMean,
-                                /* Output in training mode, input in inference. Is the moving average
-                                   of  variance[x] (factor is applied in the same way as for runningMean) */
-                                void                               *resultRunningVariance,
+    /* Used in Training phase only.
+       runningMean = newMean*factor + runningMean*(1-factor) */
+    void *resultRunningMean,
+    /* Output in training mode, input in inference. Is the moving average
+       of  variance[x] (factor is applied in the same way as for runningMean) */
+    void *resultRunningVariance,
 
-                                /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
-                                double                              epsilon,
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
+    double epsilon,
 
-                                /* Optionally save intermediate results from the forward pass here
-                                   - can be reused to speed up backward pass. NULL if unused */
-                                void                               *resultSaveMean,
-                                void                               *resultSaveInvVariance ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
+    /* Optionally save intermediate results from the forward pass here
+       - can be reused to speed up backward pass. NULL if unused */
+    void *resultSaveMean, void *resultSaveInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
+  return func_ptr(
+      handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc,
+      bnScale, bnBias, exponentialAverageFactor, resultRunningMean,
+      resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
-                                const void                         *alpha, /* alpha[0] = result blend factor */
-                                const void                         *beta,  /* beta[0] = dest layer blend factor */
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,     /* NxCxHxW */
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,     /* NxCxHxW */
-                                const cudnnTensorDescriptor_t       bnScaleBiasMeanVarDesc,
-                                const void                         *bnScale,
-                                const void                         *bnBias,
-                                const void                         *estimatedMean,
-                                const void                         *estimatedVariance,
-                                double                              epsilon ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, const void *, double);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+    const void *bnBias, const void *estimatedMean,
+    const void *estimatedVariance, double epsilon) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, const void *, const void *, double);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, estimatedVariance, epsilon);
+  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y,
+                  bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean,
+                  estimatedVariance, epsilon);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
-                                const void                         *alphaDataDiff,
-                                const void                         *betaDataDiff,
-                                const void                         *alphaParamDiff,
-                                const void                         *betaParamDiff,
-                                const cudnnTensorDescriptor_t       xDesc, /* same desc for x, dx, dy */
-                                const void                         *x,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx,
-                                /* Shared tensor desc for the 4 tensors below */
-                                const cudnnTensorDescriptor_t       dBnScaleBiasDesc,
-                                const void                         *bnScale, /* bnBias doesn't affect backpropagation */
-                                /* scale and bias diff are not backpropagated below this layer */
-                                void                               *dBnScaleResult,
-                                void                               *dBnBiasResult,
-                                /* Same epsilon as forward pass */
-                                double                              epsilon,
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff,
+    const void *betaDataDiff, const void *alphaParamDiff,
+    const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
+    const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const void *bnScale, /* bnBias doesn't affect backpropagation */
+    /* scale and bias diff are not backpropagated below this layer */
+    void *dBnScaleResult, void *dBnBiasResult,
+    /* Same epsilon as forward pass */
+    double epsilon,
 
-                                /* Optionally cached intermediate results from
-                                   forward pass */
-                                const void                         *savedMean,
-                                const void                         *savedInvVariance ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, void *, void *, double, const void *, const void *);
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const void *, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, void *, void *, double, const void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, epsilon, savedMean, savedInvVariance);
+  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff,
+                  betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx,
+                  dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult,
+                  epsilon, savedMean, savedInvVariance);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(
-                               cudnnSpatialTransformerDescriptor_t        *stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
+    cudnnSpatialTransformerDescriptor_t *stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(
-                                cudnnSpatialTransformerDescriptor_t         stDesc,
-                                cudnnSamplerType_t                          samplerType, 
-                                cudnnDataType_t                             dataType,
-                                const int                                   nbDims,
-                                const int                                   dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, const int, const int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
+    cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType,
+    cudnnDataType_t dataType, const int nbDims, const int dimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t,
+      const int, const int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc, samplerType, dataType, nbDims, dimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(
-                                 cudnnSpatialTransformerDescriptor_t        stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
+    cudnnSpatialTransformerDescriptor_t stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(
-                                 cudnnHandle_t                              handle,
-                                 const cudnnSpatialTransformerDescriptor_t  stDesc,
-                                 const void                                *theta,
-                                 void                                      *grid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *theta, void *grid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, theta, grid);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(
-                                 cudnnHandle_t                              handle,
-                                 const cudnnSpatialTransformerDescriptor_t  stDesc,
-                                 const void                                *dgrid,
-                                 void                                      *dtheta) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *dgrid, void *dtheta) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, dgrid, dtheta);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(
-                                 cudnnHandle_t                              handle,
-                                 cudnnSpatialTransformerDescriptor_t        stDesc,
-                                 const void                                *alpha,                                    
-                                 const cudnnTensorDescriptor_t              xDesc,
-                                 const void                                *x,
-                                 const void                                *grid,
-                                 const void                                *beta,
-                                 cudnnTensorDescriptor_t                    yDesc,
-                                 void                                      *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc,
+    void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(
-                                 cudnnHandle_t                              handle,
-                                 cudnnSpatialTransformerDescriptor_t        stDesc,
-                                 const void                                *alpha,
-                                 const cudnnTensorDescriptor_t              xDesc,
-                                 const void                                *x,
-                                 const void                                *beta,
-                                 const cudnnTensorDescriptor_t              dxDesc,
-                                 void                                      *dx,
-                                 const void                                *alphaDgrid,
-                                 const cudnnTensorDescriptor_t              dyDesc,
-                                 const void                                *dy,
-                                 const void                                *grid,
-                                 const void                                *betaDgrid,
-                                 void                                      *dgrid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *);
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc,
+    const void *dy, const void *grid, const void *betaDgrid, void *dgrid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, dyDesc, dy, grid, betaDgrid, dgrid);
+  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid,
+                  dyDesc, dy, grid, betaDgrid, dgrid);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t * dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t * sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle,
+                                                    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetStatesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t * sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(
+    cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(xdesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                    cudnnHandle_t            handle,
-                                                    float                    dropout, 
-                                                    void *                   states, 
-                                                    size_t                   stateSizeInBytes, 
-                                                    unsigned long long       seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                        cudnnHandle_t            handle,
-                                                        float                    dropout, 
-                                                        void *                   states, 
-                                                        size_t                   stateSizeInBytes, 
-                                                        unsigned long long       seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                    cudnnHandle_t            handle,
-                                                    float *                  dropout, 
-                                                    void **                  states,
-                                                    unsigned long long *     seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float *, void **, unsigned long long *);
+cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout,
+    void **states, unsigned long long *seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float *, void **, unsigned long long *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(cudnnHandle_t                  handle, 
-                                              const cudnnDropoutDescriptor_t dropoutDesc,
-                                              const cudnnTensorDescriptor_t  xdesc, 
-                                              const void *                   x,
-                                              const cudnnTensorDescriptor_t  ydesc,
-                                              void *                         y,
-                                              void *                         reserveSpace,
-                                              size_t                         reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t xdesc, const void *x,
+    const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(cudnnHandle_t                  handle, 
-                                               const cudnnDropoutDescriptor_t dropoutDesc,
-                                               const cudnnTensorDescriptor_t  dydesc, 
-                                               const void *                   dy,
-                                               const cudnnTensorDescriptor_t  dxdesc,
-                                               void *                         dx,
-                                               void *                         reserveSpace,
-                                               size_t                         reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t dydesc, const void *dy,
+    const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t * rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t       rnnDesc,
-                                                       const int                  minibatch,
-                                                       const cudnnDataType_t      dataType,
-                                                       cudnnPersistentRNNPlan_t * plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, const cudnnDataType_t, cudnnPersistentRNNPlan_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, const int minibatch,
+    const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int,
+                                               const cudnnDataType_t,
+                                               cudnnPersistentRNNPlan_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, minibatch, dataType, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                                                    cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(cudnnHandle_t            handle,
-                                                   cudnnRNNDescriptor_t     rnnDesc,
-                                                   const int                hiddenSize,
-                                                   const int                numLayers,
-                                                   cudnnDropoutDescriptor_t dropoutDesc, /* Between layers, not between recurrent steps. */
-                                                   cudnnRNNInputMode_t      inputMode,          
-                                                   cudnnDirectionMode_t     direction,
-                                                   cudnnRNNMode_t           mode,
-                                                   cudnnRNNAlgo_t           algo,
-                                                   cudnnDataType_t          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc, /* Between layers, not between recurrent steps. */
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(cudnnHandle_t              cudnnHandle,
-                                                cudnnRNNDescriptor_t       rnnDesc,
-                                                int *                      hiddenSize, 
-                                                int *                      numLayers, 
-                                                cudnnDropoutDescriptor_t * dropoutDesc,
-                                                cudnnRNNInputMode_t *      inputMode, 
-                                                cudnnDirectionMode_t *     direction, 
-                                                cudnnRNNMode_t *           mode, 
-                                                cudnnRNNAlgo_t *           algo, 
-                                                cudnnDataType_t *          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(
+    cudnnHandle_t cudnnHandle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize,
+    int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc,
+    cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction,
+    cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *,
+      cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *,
+      cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(cudnnHandle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(cudnnHandle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNMatrixMathType (cudnnRNNDescriptor_t desc, cudnnMathType_t math) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t desc,
+                                                    cudnnMathType_t math) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(desc, math);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( cudnnHandle_t              handle,
-                                                    const cudnnRNNDescriptor_t rnnDesc,  
-                                                    const int seqLength, 
-                                                    const cudnnTensorDescriptor_t    *xDesc,
-                                                    size_t                     *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( cudnnHandle_t              handle,
-                                                          const cudnnRNNDescriptor_t rnnDesc,  
-                                                          const int                  seqLength,
-                                                          const cudnnTensorDescriptor_t    *xDesc,
-                                                          size_t                   *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNTrainingReserveSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNParamsSize( cudnnHandle_t                    handle,
-                                                 const cudnnRNNDescriptor_t       rnnDesc,  
-                                                 const cudnnTensorDescriptor_t    xDesc,
-                                                 size_t                          *sizeInBytes,
-                                                 cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, size_t *, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                      const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes,
+                      cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNParamsSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( cudnnHandle_t              handle,
-                                                           const cudnnRNNDescriptor_t rnnDesc, 
-                                                           const int layer,
-                                                           const cudnnTensorDescriptor_t xDesc,
-                                                           const cudnnFilterDescriptor_t wDesc,
-                                                           const void * w, 
-                                                           const int linLayerID,  
-                                                           cudnnFilterDescriptor_t linLayerMatDesc,
-                                                           void ** linLayerMat) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer,
+    const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc,
+    const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerMatrixParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, linLayerMatDesc, linLayerMat);
+  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID,
+                  linLayerMatDesc, linLayerMat);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( cudnnHandle_t              handle,
-                                                         const cudnnRNNDescriptor_t rnnDesc, 
-                                                         const int layer,
-                                                         const cudnnTensorDescriptor_t xDesc, 
-                                                         const cudnnFilterDescriptor_t wDesc,
-                                                         const void * w,
-                                                         const int linLayerID,
-                                                         cudnnFilterDescriptor_t linLayerBiasDesc,
-                                                         void ** linLayerBias) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int layer,
+    const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc,
+    const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerBiasParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID, linLayerBiasDesc, linLayerBias);
+  return func_ptr(handle, rnnDesc, layer, xDesc, wDesc, w, linLayerID,
+                  linLayerBiasDesc, linLayerBias);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( cudnnHandle_t handle,
-                                                    const cudnnRNNDescriptor_t rnnDesc,
-                                                    const int seqLength,
-                                                    const cudnnTensorDescriptor_t * xDesc,
-                                                    const void * x,
-                                                    const cudnnTensorDescriptor_t hxDesc,
-                                                    const void * hx,
-                                                    const cudnnTensorDescriptor_t cxDesc,
-                                                    const void * cx,
-                                                    const cudnnFilterDescriptor_t wDesc,
-                                                    const void * w,
-                                                    const cudnnTensorDescriptor_t *yDesc,
-                                                    void * y,
-                                                    const cudnnTensorDescriptor_t hyDesc,
-                                                    void * hy,
-                                                    const cudnnTensorDescriptor_t cyDesc,
-                                                    void * cy,
-                                                    void * workspace,
-                                                    size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( cudnnHandle_t handle,
-                                                   const cudnnRNNDescriptor_t rnnDesc,
-                                                   const int seqLength,
-                                                   const cudnnTensorDescriptor_t *xDesc,
-                                                   const void * x,
-                                                   const cudnnTensorDescriptor_t hxDesc,
-                                                   const void * hx,
-                                                   const cudnnTensorDescriptor_t cxDesc,
-                                                   const void * cx,
-                                                   const cudnnFilterDescriptor_t wDesc,
-                                                   const void * w,
-                                                   const cudnnTensorDescriptor_t *yDesc,
-                                                   void * y,
-                                                   const cudnnTensorDescriptor_t hyDesc,
-                                                   void * hy,
-                                                   const cudnnTensorDescriptor_t cyDesc,
-                                                   void * cy,
-                                                   void * workspace,
-                                                   size_t workSpaceSizeInBytes,
-                                                   void * reserveSpace,
-                                                   size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardData( cudnnHandle_t handle,
-                                                const cudnnRNNDescriptor_t rnnDesc,
-                                                const int seqLength,
-                                                const cudnnTensorDescriptor_t * yDesc,
-                                                const void * y,
-                                                const cudnnTensorDescriptor_t * dyDesc,
-                                                const void * dy,
-                                                const cudnnTensorDescriptor_t dhyDesc,
-                                                const void * dhy,
-                                                const cudnnTensorDescriptor_t dcyDesc,
-                                                const void * dcy,
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void * w,
-                                                const cudnnTensorDescriptor_t hxDesc,
-                                                const void * hx,
-                                                const cudnnTensorDescriptor_t cxDesc,
-                                                const void * cx,
-                                                const cudnnTensorDescriptor_t * dxDesc,
-                                                void * dx,
-                                                const cudnnTensorDescriptor_t dhxDesc,
-                                                void * dhx,
-                                                const cudnnTensorDescriptor_t dcxDesc,
-                                                void * dcx,
-                                                void * workspace,
-                                                size_t workSpaceSizeInBytes,
-                                                void * reserveSpace,
-                                                size_t reserveSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                     const int seqLength, const cudnnTensorDescriptor_t *yDesc,
+                     const void *y, const cudnnTensorDescriptor_t *dyDesc,
+                     const void *dy, const cudnnTensorDescriptor_t dhyDesc,
+                     const void *dhy, const cudnnTensorDescriptor_t dcyDesc,
+                     const void *dcy, const cudnnFilterDescriptor_t wDesc,
+                     const void *w, const cudnnTensorDescriptor_t hxDesc,
+                     const void *hx, const cudnnTensorDescriptor_t cxDesc,
+                     const void *cx, const cudnnTensorDescriptor_t *dxDesc,
+                     void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+                     const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+                     void *workspace, size_t workSpaceSizeInBytes,
+                     void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( cudnnHandle_t handle,
-                                                   const cudnnRNNDescriptor_t rnnDesc,
-                                                   const int seqLength,
-                                                   const cudnnTensorDescriptor_t * xDesc,
-                                                   const void * x,
-                                                   const cudnnTensorDescriptor_t hxDesc,
-                                                   const void * hx,
-                                                   const cudnnTensorDescriptor_t * yDesc, 
-                                                   const void * y,
-                                                   const void * workspace, 
-                                                   size_t workSpaceSizeInBytes, 
-                                                   const cudnnFilterDescriptor_t dwDesc, 
-                                                   void * dw,
-                                                   const void * reserveSpace, 
-                                                   size_t reserveSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateCTCLossDescriptor( cudnnCTCLossDescriptor_t* ctcLossDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor(
-                                cudnnCTCLossDescriptor_t         ctcLossDesc,
-                                cudnnDataType_t                  compType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor(
-                                cudnnCTCLossDescriptor_t         ctcLossDesc,
-                                cudnnDataType_t*                 compType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyCTCLossDescriptor( cudnnCTCLossDescriptor_t ctcLossDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( cudnnHandle_t handle, 
-                                        const cudnnTensorDescriptor_t probsDesc,     /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the mini batch size, A is the alphabet size)  */
-                                        const void * probs,                          /* probabilities after softmax, in GPU memory */
-                                        const int * labels,                          /* labels, in CPU memory */
-                                        const int * labelLengths,                    /* the length of each label, in CPU memory */
-                                        const int * inputLengths,                    /* the lengths of timing steps in each batch, in CPU memory */
-                                        void * costs,                                /* the returned costs of CTC, in GPU memory */
-                                        const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
-                                        const void * gradients,                      /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
-                                        cudnnCTCLossAlgo_t algo,                     /* algorithm selected, supported now 0 and 1 */
-                                        cudnnCTCLossDescriptor_t ctcLossDesc,
-                                        void * workspace,                            /* pointer to the workspace, in GPU memory */
-                                        size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, const int *, const int *, void *, const cudnnTensorDescriptor_t, const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnCTCLoss(
+    cudnnHandle_t handle,
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the mini batch size, A
+                      is the alphabet size)  */
+    const void *probs,       /* probabilities after softmax, in GPU memory */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    void *costs,             /* the returned costs of CTC, in GPU memory */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A */
+    const void *gradients,   /* the returned CTC gradients, in GPU memory, to
+                                compute costs only, set it to NULL */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc,
+    void *workspace, /* pointer to the workspace, in GPU memory */
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *,
+      const int *, const int *, void *, const cudnnTensorDescriptor_t,
+      const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCTCLoss");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths,
+                  costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       probsDesc,       /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the mini batch size, A is the alphabet size) */
-                                const cudnnTensorDescriptor_t       gradientsDesc,   /* Tensor descriptor for gradients, the dimensions are T,N,A. To compute costs only, set it to NULL */
-                                const int                          * labels,         /* labels, in CPU memory */
-                                const int                          * labelLengths,   /* the length of each label, in CPU memory */
-                                const int                          * inputLengths,   /* the lengths of timing steps in each batch, in CPU memory */
-                                cudnnCTCLossAlgo_t                  algo,            /* algorithm selected, supported now 0 and 1 */
-                                cudnnCTCLossDescriptor_t            ctcLossDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const int *, const int *, const int *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
+    cudnnHandle_t handle,
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the mini batch size, A
+                      is the alphabet size) */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A. To compute costs only, set it to NULL */
+    const int *labels, /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const int *, const int *, const int *,
+      cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc, sizeInBytes);
+  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths,
+                  inputLengths, algo, ctcLossDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(cudnnHandle_t            handle,
-                                                   cudnnRNNDescriptor_t     rnnDesc,
-                                                   const int                hiddenSize,
-                                                   const int                numLayers,
-                                                   cudnnDropoutDescriptor_t dropoutDesc, /* Between layers, not between recurrent steps. */
-                                                   cudnnRNNInputMode_t      inputMode,          
-                                                   cudnnDirectionMode_t     direction,
-                                                   cudnnRNNMode_t           mode,
-                                                   cudnnRNNAlgo_t           algo,
-                                                   cudnnDataType_t          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc, /* Between layers, not between recurrent steps. */
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v6");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(cudnnRNNDescriptor_t     rnnDesc,
-                                                int                      hiddenSize,
-                                                int                      numLayers,
-                                                cudnnDropoutDescriptor_t dropoutDesc, /* Between layers, not between recurrent steps. */
-                                                cudnnRNNInputMode_t      inputMode,
-                                                cudnnDirectionMode_t     direction,
-                                                cudnnRNNMode_t           mode,
-                                                cudnnDataType_t          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(
+    cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc, /* Between layers, not between recurrent steps. */
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t,
+      cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t,
+      cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, dataType);
+  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode,
+                  direction, mode, dataType);
 }
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cudnn_7_1.inc b/tensorflow/stream_executor/cuda/cudnn_7_1.inc
index 9f4b28f3fe3..5330e6d0584 100644
--- a/tensorflow/stream_executor/cuda/cudnn_7_1.inc
+++ b/tensorflow/stream_executor/cuda/cudnn_7_1.inc
@@ -3,2279 +3,2359 @@
 extern "C" {
 
 size_t CUDNNWINAPI cudnnGetVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
 size_t CUDNNWINAPI cudnnGetCudartVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCudartVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-const char *  CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
-  using FuncPtr = const char * (CUDNNWINAPI *)(cudnnStatus_t);
+const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
+  using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetErrorString");
   if (!func_ptr) return "cudnnGetErrorString symbol not found.";
   return func_ptr(status);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(
-                                cudnnHandle_t                       handle,
-                                cudnnStatus_t                      *rstatus,
-                                cudnnErrQueryMode_t                 mode,
-                                cudnnRuntimeTag_t                  *tag ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
+cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle,
+                                                 cudnnStatus_t *rstatus,
+                                                 cudnnErrQueryMode_t mode,
+                                                 cudnnRuntimeTag_t *tag) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnQueryRuntimeError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rstatus, mode, tag);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(libraryPropertyType, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type,
+                                           int *value) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreate        (cudnnHandle_t *handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreate");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroy       (cudnnHandle_t handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroy");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetStream     (cudnnHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle,
+                                         cudaStream_t streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetStream     (cudnnHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle,
+                                         cudaStream_t *streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateTensorDescriptor(
-                                cudnnTensorDescriptor_t            *tensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnTensorFormat_t                 format,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                int                                 n,        /* number of inputs (batch size) */
-                                int                                 c,        /* number of input feature maps */
-                                int                                 h,        /* height of input section */
-                                int                                 w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int);
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                int                                 n,        /* number of inputs (batch size) */
-                                int                                 c,        /* number of input feature maps */
-                                int                                 h,        /* height of input section */
-                                int                                 w,        /* width of input section */
-                                int                                 nStride,
-                                int                                 cStride,
-                                int                                 hStride,
-                                int                                 wStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, int, int, int, int, int, int, int);
+    cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w,                    /* width of input section */
+    int nStride, int cStride, int hStride, int wStride) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t,
+                                   int, int, int, int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                cudnnDataType_t                    *dataType, /* image data type */
-                                int                                *n,        /* number of inputs (batch size) */
-                                int                                *c,        /* number of input feature maps  */
-                                int                                *h,        /* height of input section */
-                                int                                *w,        /* width of input section */
-                                int                                *nStride,
-                                int                                *cStride,
-                                int                                *hStride,
-                                int                                *wStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, int *, int *, int *, int *, int *);
+    const cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t *dataType, /* image data type */
+    int *n,                    /* number of inputs (batch size) */
+    int *c,                    /* number of input feature maps  */
+    int *h,                    /* height of input section */
+    int *w,                    /* width of input section */
+    int *nStride, int *cStride, int *hStride, int *wStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *,
+      int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnDataType_t                     dataType,
-                                int                                 nbDims,
-                                const int                           dimA[],
-                                const int                           strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, const int [], const int []);
+    cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(
-                                cudnnTensorDescriptor_t             tensorDesc,
-                                cudnnTensorFormat_t                 format,
-                                cudnnDataType_t                     dataType,
-                                int                                 nbDims,
-                                const int                           dimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int []);
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, int nbDims, const int dimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, nbDims, dimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                int                                 nbDimsRequested,
-                                cudnnDataType_t                    *dataType,
-                                int                                *nbDims,
-                                int                                 dimA[],
-                                int                                 strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, cudnnDataType_t *, int *, int [], int []);
+    const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int,
+                                   cudnnDataType_t *, int *, int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(
-                                const cudnnTensorDescriptor_t       tensorDesc,
-                                size_t                              *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
+    const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, size);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorDescriptor(
-                                cudnnTensorDescriptor_t             tensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnAddTensor(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle,
+                                         const void *alpha,
+                                         const cudnnTensorDescriptor_t aDesc,
+                                         const void *A, const void *beta,
+                                         const cudnnTensorDescriptor_t cDesc,
+                                         void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnAddTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t          *opTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t           opTensorDesc,
-                                cudnnOpTensorOp_t                   opTensorOp,
-                                cudnnDataType_t                     opTensorCompType,
-                                cudnnNanPropagation_t               opTensorNanOpt ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t);
+    cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp,
+    cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t,
+                                   cudnnDataType_t, cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(
-                                const cudnnOpTensorDescriptor_t     opTensorDesc,
-                                cudnnOpTensorOp_t                  *opTensorOp,
-                                cudnnDataType_t                    *opTensorCompType,
-                                cudnnNanPropagation_t              *opTensorNanOpt ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *);
+    const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp,
+    cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *,
+      cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyOpTensorDescriptor(
-                                cudnnOpTensorDescriptor_t           opTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnOpTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnOpTensorDescriptor_t     opTensorDesc,
-                                const void                         *alpha1,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *alpha2,
-                                const cudnnTensorDescriptor_t       bDesc,
-                                const void                         *B,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc,
+    const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A,
+    const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B,
+    const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnOpTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, beta, cDesc, C);
+  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B,
+                  beta, cDesc, C);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t          *reduceTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
+    cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t           reduceTensorDesc,
-                                cudnnReduceTensorOp_t                   reduceTensorOp,
-                                cudnnDataType_t                     reduceTensorCompType,
-                                cudnnNanPropagation_t               reduceTensorNanOpt,
-                                cudnnReduceTensorIndices_t          reduceTensorIndices,
-                                cudnnIndicesType_t                  reduceTensorIndicesType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
+    cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType,
+    cudnnNanPropagation_t reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t reduceTensorIndices,
+    cudnnIndicesType_t reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t,
+      cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(
-                                const cudnnReduceTensorDescriptor_t     reduceTensorDesc,
-                                cudnnReduceTensorOp_t                  *reduceTensorOp,
-                                cudnnDataType_t                    *reduceTensorCompType,
-                                cudnnNanPropagation_t              *reduceTensorNanOpt,
-                                cudnnReduceTensorIndices_t         *reduceTensorIndices,
-                                cudnnIndicesType_t                 *reduceTensorIndicesType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, cudnnIndicesType_t *);
+    const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t *reduceTensorOp,
+    cudnnDataType_t *reduceTensorCompType,
+    cudnnNanPropagation_t *reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t *reduceTensorIndices,
+    cudnnIndicesType_t *reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *,
+      cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *,
+      cudnnIndicesType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(
-                                cudnnReduceTensorDescriptor_t           reduceTensorDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
+    cudnnReduceTensorDescriptor_t reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionIndicesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                                void                               *indices,
-                                size_t                              indicesSizeInBytes,
-                                void                               *workspace,
-                                size_t                              workspaceSizeInBytes,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       aDesc,
-                                const void                         *A,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       cDesc,
-                                void                               *C ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    void *indices, size_t indicesSizeInBytes, void *workspace,
+    size_t workspaceSizeInBytes, const void *alpha,
+    const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta,
+    const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t,
+      void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReduceTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, C);
+  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes,
+                  workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc,
+                  C);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const void                         *valuePtr ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle,
+                                         const cudnnTensorDescriptor_t yDesc,
+                                         void *y, const void *valuePtr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, valuePtr);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const void                         *alpha ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle,
+                                           const cudnnTensorDescriptor_t yDesc,
+                                           void *y, const void *alpha) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnScaleTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, alpha);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateFilterDescriptor(
-                                cudnnFilterDescriptor_t            *filterDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                cudnnTensorFormat_t                 format,
-                                int                                 k,        /* number of output feature maps */
-                                int                                 c,        /* number of input feature maps */
-                                int                                 h,        /* height of each input filter */
-                                int                                 w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int);
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType,          /* image data type */
+    cudnnTensorFormat_t format, int k, /* number of output feature maps */
+    int c,                             /* number of input feature maps */
+    int h,                             /* height of each input filter */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                cudnnDataType_t                    *dataType, /* image data type */
-                                cudnnTensorFormat_t                *format,
-                                int                                *k,        /* number of output feature maps */
-                                int                                *c,        /* number of input feature maps */
-                                int                                *h,        /* height of each input filter */
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int *, int *, int *);
+    const cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t *dataType,           /* image data type */
+    cudnnTensorFormat_t *format, int *k, /* number of output feature maps */
+    int *c,                              /* number of input feature maps */
+    int *h,                              /* height of each input filter */
+    int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *,
+      int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc,
-                                cudnnDataType_t                     dataType, /* image data type */
-                                cudnnTensorFormat_t                 format,
-                                int                                 nbDims,
-                                const int                           filterDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int []);
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType, /* image data type */
+    cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, nbDims, filterDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                 nbDimsRequested,
-                                cudnnDataType_t                    *dataType, /* image data type */
-                                cudnnTensorFormat_t                *format,
-                                int                                *nbDims,
-                                int                                 filterDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, int, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int []);
+    const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, /* image data type */
+    cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, int, cudnnDataType_t *,
+      cudnnTensorFormat_t *, int *, int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, filterDimA);
+  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims,
+                  filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyFilterDescriptor(
-                                cudnnFilterDescriptor_t             filterDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateConvolutionDescriptor(
-                                cudnnConvolutionDescriptor_t       *convDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( cudnnConvolutionDescriptor_t convDesc,
-                                                       cudnnMathType_t mathType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( cudnnConvolutionDescriptor_t convDesc,
-                                                       cudnnMathType_t *mathType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( cudnnConvolutionDescriptor_t convDesc,
-                                                         int groupCount ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( cudnnConvolutionDescriptor_t convDesc,
-                                                         int *groupCount ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( cudnnConvolutionDescriptor_t convDesc,
-                                                             int pad_h,    /* zero-padding height */
-                                                             int pad_w,    /* zero-padding width */
-                                                             int u,   /* vertical filter stride */
-                                                             int v,   /* horizontal filter stride */
-                                                             int dilation_h, /* filter dilation in the vertical dimension */
-                                                             int dilation_w, /* filter dilation in the horizontal dimension */
-                                                             cudnnConvolutionMode_t mode,
-                                                             cudnnDataType_t computeType
-                                                           ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */
+    int pad_w,                                        /* zero-padding width */
+    int u,          /* vertical filter stride */
+    int v,          /* horizontal filter stride */
+    int dilation_h, /* filter dilation in the vertical dimension */
+    int dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(  const cudnnConvolutionDescriptor_t convDesc,
-                                                            int* pad_h,    /* zero-padding height */
-                                                            int* pad_w,    /* zero-padding width */
-                                                            int* u,        /* vertical filter stride */
-                                                            int* v,        /* horizontal filter stride */
-                                                            int* dilation_h, /* filter dilation in the vertical dimension */
-                                                            int* dilation_w, /* filter dilation in the horizontal dimension */
-                                                            cudnnConvolutionMode_t* mode,
-                                                            cudnnDataType_t *computeType
-                                                         ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,      /* zero-padding height */
+    int *pad_w,      /* zero-padding width */
+    int *u,          /* vertical filter stride */
+    int *v,          /* horizontal filter stride */
+    int *dilation_h, /* filter dilation in the vertical dimension */
+    int *dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                *n,
-                                int                                *c,
-                                int                                *h,
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor(
-                                cudnnConvolutionDescriptor_t        convDesc,
-                                int                                 arrayLength,             /* nbDims-2 size */
-                                const int                           padA[],
-                                const int                           filterStrideA[],
-                                const int                           dilationA[],
-                                cudnnConvolutionMode_t              mode,
-                                cudnnDataType_t                     computeType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, const int [], const int [], const int [], cudnnConvolutionMode_t, cudnnDataType_t);
+    cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */
+    const int padA[], const int filterStrideA[], const int dilationA[],
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[],
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode,
+                  computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                int                                 arrayLengthRequested,
-                                int                                *arrayLength,
-                                int                                 padA[],
-                                int                                 strideA[],
-                                int                                 dilationA[],
-                                cudnnConvolutionMode_t             *mode,
-                                cudnnDataType_t                    *computeType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int, int *, int [], int [], int [], cudnnConvolutionMode_t *, cudnnDataType_t *);
+    const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested,
+    int *arrayLength, int padA[], int strideA[], int dilationA[],
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[],
+      cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA,
+                  dilationA, mode, computeType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim(
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                int                                 nbDims,
-                                int                                 tensorOutputDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int nbDims,
+    int tensorOutputDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, tensorOutputDimA);
+  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims,
+                  tensorOutputDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyConvolutionDescriptor(
-                                cudnnConvolutionDescriptor_t        convDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithmMaxCount( cudnnHandle_t     handle,
-                                                                       int              *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t      *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t      *perfResults,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults,
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                cudnnConvolutionFwdPreference_t     preference,
-                                size_t                              memoryLimitInBytes,
-                                cudnnConvolutionFwdAlgo_t          *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionFwdAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t,
+      cudnnConvolutionFwdAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7(
-                                cudnnHandle_t                      handle,
-                                const cudnnTensorDescriptor_t      srcDesc,
-                                const cudnnFilterDescriptor_t      filterDesc,
-                                const cudnnConvolutionDescriptor_t convDesc,
-                                const cudnnTensorDescriptor_t      destDesc,
-                                const int                          requestedAlgoCount,
-                                int                               *returnedAlgoCount,
-                                cudnnConvolutionFwdAlgoPerf_t     *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnFilterDescriptor_t filterDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, yDesc, y);
+  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha1,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionFwdAlgo_t           algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *alpha2,
-                                const cudnnTensorDescriptor_t       zDesc,
-                                const void                         *z,
-                                const cudnnTensorDescriptor_t       biasDesc,
-                                const void                         *bias,
-                                const cudnnActivationDescriptor_t   activationDesc,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
+    cudnnHandle_t handle, const void *alpha1,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2,
+    const cudnnTensorDescriptor_t zDesc, const void *z,
+    const cudnnTensorDescriptor_t biasDesc, const void *bias,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, activationDesc, yDesc, y);
+  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias,
+                  activationDesc, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dbDesc,
-                                void                               *db ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dbDesc, void *db) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardBias");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( cudnnHandle_t     handle,
-                                                                              int              *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnFilterDescriptor_t       dwDesc,
-                                const int                           requestedAlgoCount,
-                                int                                 *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx(
-                                cudnnHandle_t                        handle,
-                                const cudnnTensorDescriptor_t        xDesc,
-                                const void                          *x,
-                                const cudnnTensorDescriptor_t        dyDesc,
-                                const void                          *y,
-                                const cudnnConvolutionDescriptor_t   convDesc,
-                                const cudnnFilterDescriptor_t        dwDesc,
-                                void                                *dw,
-                                const int                            requestedAlgoCount,
-                                int                                 *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
-                                void                                *workSpace,
-                                size_t                               workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *y,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm(
-                                cudnnHandle_t                         handle,
-                                const cudnnTensorDescriptor_t         xDesc,
-                                const cudnnTensorDescriptor_t         dyDesc,
-                                const cudnnConvolutionDescriptor_t    convDesc,
-                                const cudnnFilterDescriptor_t         dwDesc,
-                                cudnnConvolutionBwdFilterPreference_t preference,
-                                size_t                                memoryLimitInBytes,
-                                cudnnConvolutionBwdFilterAlgo_t      *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, size_t, cudnnConvolutionBwdFilterAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc,
+    cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdFilterAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t,
+      size_t, cudnnConvolutionBwdFilterAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7(
-                                cudnnHandle_t                         handle,
-                                const cudnnTensorDescriptor_t         srcDesc,
-                                const cudnnTensorDescriptor_t         diffDesc,
-                                const cudnnConvolutionDescriptor_t    convDesc,
-                                const cudnnFilterDescriptor_t         gradDesc,
-                                const int                             requestedAlgoCount,
-                                int                                  *returnedAlgoCount,
-                                cudnnConvolutionBwdFilterAlgoPerf_t  *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnFilterDescriptor_t       gradDesc,
-                                cudnnConvolutionBwdFilterAlgo_t     algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionBwdFilterAlgo_t     algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnFilterDescriptor_t       dwDesc,
-                                void                               *dw ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnFilterDescriptor_t dwDesc, void *dw) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t,
+      void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
+  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( cudnnHandle_t     handle,
-                                                                            int              *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                cudnnConvolutionBwdDataPreference_t preference,
-                                size_t                              memoryLimitInBytes,
-                                cudnnConvolutionBwdDataAlgo_t      *algo ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, size_t, cudnnConvolutionBwdDataAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdDataAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t,
+      size_t, cudnnConvolutionBwdDataAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       filterDesc,
-                                const cudnnTensorDescriptor_t       diffDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       gradDesc,
-                                const int                           requestedAlgoCount,
-                                int                                *returnedAlgoCount,
-                                cudnnConvolutionBwdDataAlgoPerf_t  *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                cudnnConvolutionBwdDataAlgo_t       algo,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(
-                                cudnnHandle_t                       handle,
-                                const void                         *alpha,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const void                         *w,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                cudnnConvolutionBwdDataAlgo_t       algo,
-                                void                               *workSpace,
-                                size_t                              workSpaceSizeInBytes,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
+  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnIm2Col(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const cudnnFilterDescriptor_t       wDesc,
-                                const cudnnConvolutionDescriptor_t  convDesc,
-                                void                               *colBuffer ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI
+cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+            const void *x, const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t,
+                                   const void *, const cudnnFilterDescriptor_t,
+                                   const cudnnConvolutionDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnIm2Col");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(
-                                cudnnHandle_t                       handle,
-                                cudnnSoftmaxAlgorithm_t             algo,
-                                cudnnSoftmaxMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnSoftmaxAlgorithm_t             algo,
-                                cudnnSoftmaxMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, dx);
+  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc,
+                  dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreatePoolingDescriptor(
-                                cudnnPoolingDescriptor_t           *poolingDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc,
-                                cudnnPoolingMode_t                  mode,
-                                cudnnNanPropagation_t               maxpoolingNanOpt,
-                                int                                 windowHeight,
-                                int                                 windowWidth,
-                                int                                 verticalPadding,
-                                int                                 horizontalPadding,
-                                int                                 verticalStride,
-                                int                                 horizontalStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, int, int, int, int, int);
+    cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode,
+    cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth,
+    int verticalPadding, int horizontalPadding, int verticalStride,
+    int horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int,
+      int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                cudnnPoolingMode_t                 *mode,
-                                cudnnNanPropagation_t              *maxpoolingNanOpt,
-                                int                                *windowHeight,
-                                int                                *windowWidth,
-                                int                                *verticalPadding,
-                                int                                *horizontalPadding,
-                                int                                *verticalStride,
-                                int                                *horizontalStride ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
+    const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode,
+    cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight,
+    int *windowWidth, int *verticalPadding, int *horizontalPadding,
+    int *verticalStride, int *horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc,
-                                const cudnnPoolingMode_t            mode,
-                                const cudnnNanPropagation_t         maxpoolingNanOpt,
-                                int                                 nbDims,
-                                const int                           windowDimA[],
-                                const int                           paddingA[],
-                                const int                           strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int [], const int [], const int []);
+    cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode,
+    const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims,
+    const int windowDimA[], const int paddingA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, const cudnnPoolingMode_t,
+      const cudnnNanPropagation_t, int, const int[], const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA,
+                  paddingA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                int                                 nbDimsRequested,
-                                cudnnPoolingMode_t                 *mode,
-                                cudnnNanPropagation_t              *maxpoolingNanOpt,
-                                int                                *nbDims,
-                                int                                 windowDimA[],
-                                int                                 paddingA[],
-                                int                                 strideA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int [], int [], int []);
+    const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested,
+    cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt,
+    int *nbDims, int windowDimA[], int paddingA[], int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int[], int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims,
+                  windowDimA, paddingA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdForwardOutputDim(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                int                                 nbDims,
-                                int                                 outputTensorDimA[] ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
+                                  const cudnnTensorDescriptor_t inputTensorDesc,
+                                  int nbDims, int outputTensorDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                   const cudnnTensorDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dForwardOutputDim(
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const cudnnTensorDescriptor_t       inputTensorDesc,
-                                int                                *n,
-                                int                                *c,
-                                int                                *h,
-                                int                                *w ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI
+cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
+                                  const cudnnTensorDescriptor_t inputTensorDesc,
+                                  int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyPoolingDescriptor(
-                                cudnnPoolingDescriptor_t            poolingDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(
-                                cudnnHandle_t                       handle,
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(
-                                cudnnHandle_t                       handle,
-                                const cudnnPoolingDescriptor_t      poolingDesc,
-                                const void                          *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateActivationDescriptor(
-                                cudnnActivationDescriptor_t        *activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(
-                                cudnnActivationDescriptor_t         activationDesc,
-                                cudnnActivationMode_t               mode,
-                                cudnnNanPropagation_t               reluNanOpt,
-                                double                              coef ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t, cudnnActivationMode_t, cudnnNanPropagation_t, double);
+    cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode,
+    cudnnNanPropagation_t reluNanOpt, double coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t,
+                                               cudnnActivationMode_t,
+                                               cudnnNanPropagation_t, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptor(
-                                const cudnnActivationDescriptor_t   activationDesc,
-                                cudnnActivationMode_t              *mode,
-                                cudnnNanPropagation_t              *reluNanOpt,
-                                double*                             coef ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnActivationDescriptor_t, cudnnActivationMode_t *, cudnnNanPropagation_t *, double *);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
+                             cudnnActivationMode_t *mode,
+                             cudnnNanPropagation_t *reluNanOpt, double *coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnActivationDescriptor_t, cudnnActivationMode_t *,
+      cudnnNanPropagation_t *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyActivationDescriptor(
-                                cudnnActivationDescriptor_t activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnActivationForward(
-                                cudnnHandle_t                       handle,
-                                cudnnActivationDescriptor_t         activationDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnActivationDescriptor_t         activationDesc,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateLRNDescriptor(
-                                cudnnLRNDescriptor_t               *normDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(
-                                cudnnLRNDescriptor_t                normDesc,
-                                unsigned                            lrnN,
-                                double                              lrnAlpha,
-                                double                              lrnBeta,
-                                double                              lrnK ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int, double, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned lrnN, double lrnAlpha,
+                                                double lrnBeta, double lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int, double, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(
-                                cudnnLRNDescriptor_t                normDesc,
-                                unsigned*                           lrnN,
-                                double*                             lrnAlpha,
-                                double*                             lrnBeta,
-                                double*                             lrnK ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned *lrnN,
+                                                double *lrnAlpha,
+                                                double *lrnBeta, double *lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyLRNDescriptor( cudnnLRNDescriptor_t lrnDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(lrnDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnLRNMode_t                      lrnMode,
-                                const void*                         alpha,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnLRNMode_t                      lrnMode,
-                                const void*                         alpha,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                const void                         *y,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc,
+                  x, beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnDivNormMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc, /* same desc for means, temp, temp2 */
-                                const void                         *x,
-                                const void                         *means, /* if NULL, means are assumed to be zero */
-                                void                               *temp,
-                                void                               *temp2,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, void *, void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, beta, yDesc, y);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2,
+                  beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnLRNDescriptor_t                normDesc,
-                                cudnnDivNormMode_t                  mode,
-                                const void                         *alpha,
-                                const cudnnTensorDescriptor_t       xDesc, /* same desc for x, means, dy, temp, temp2 */
-                                const void                         *x,
-                                const void                         *means, /* if NULL, means are assumed to be zero */
-                                const void                         *dy,
-                                void                               *temp,
-                                void                               *temp2,
-                                const void                         *beta,
-                                const cudnnTensorDescriptor_t       dXdMeansDesc, /* same desc for dx, dMeans */
-                                void                               *dx, /* output x differential */
-                                void                               *dMeans ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t
+        xDesc, /* same desc for x, means, dy, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    const void *dy, void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
+    void *dx,                                   /* output x differential */
+    void *dMeans) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *, void *, const void *, const cudnnTensorDescriptor_t, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, temp2, beta, dXdMeansDesc, dx, dMeans);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp,
+                  temp2, beta, dXdMeansDesc, dx, dMeans);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(
-                                cudnnTensorDescriptor_t             derivedBnDesc,
-                                const cudnnTensorDescriptor_t       xDesc,
-                                cudnnBatchNormMode_t                mode ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, cudnnBatchNormMode_t);
+    cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc,
+    cudnnBatchNormMode_t mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               cudnnBatchNormMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDeriveBNTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(derivedBnDesc, xDesc, mode);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
 
-                                const void                         *alpha, /* alpha[0] = result blend factor */
-                                const void                         *beta,  /* beta[0] = dest layer blend factor */
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
 
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,     /* NxCxHxW */
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,     /* NxCxHxW */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
 
-                                /* Shared desc for the next 6 tensors in the argument list.
-                                   Data type to be set as follows:
-                                   type = (typeOf(x) == double) ? double : float
-                                   Dimensions for this descriptor depend on normalization mode
-                                   - Spatial Normalization : tensors are expected to have dims 1xCx1x1
-                                    (normalization is performed across NxHxW)
-                                   - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW 
-                                    (normalization is performed across N) */
-                                const cudnnTensorDescriptor_t       bnScaleBiasMeanVarDesc,
+    /* Shared desc for the next 6 tensors in the argument list.
+       Data type to be set as follows:
+       type = (typeOf(x) == double) ? double : float
+       Dimensions for this descriptor depend on normalization mode
+       - Spatial Normalization : tensors are expected to have dims 1xCx1x1
+        (normalization is performed across NxHxW)
+       - Per-Activation Normalization : tensors are expected to have dims of
+       1xCxHxW (normalization is performed across N) */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
 
-                                /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
-                                const void                         *bnScale,
-                                const void                         *bnBias,
+    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
+     */
+    const void *bnScale, const void *bnBias,
 
-                                /* MUST use factor=1 in the very first call of a complete training cycle.
-                                   Use a factor=1/(1+n) at N-th call to the function to get
-                                   Cumulative Moving Average (CMA) behavior
-                                   CMA[n] = (x[1]+...+x[n])/n
-                                   Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
-                                   ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
-                                   CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
-                                double                              exponentialAverageFactor,
+    /* MUST use factor=1 in the very first call of a complete training cycle.
+       Use a factor=1/(1+n) at N-th call to the function to get
+       Cumulative Moving Average (CMA) behavior
+       CMA[n] = (x[1]+...+x[n])/n
+       Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
+       ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
+       CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
+    double exponentialAverageFactor,
 
-                                /* Used in Training phase only. 
-                                   runningMean = newMean*factor + runningMean*(1-factor) */
-                                void                               *resultRunningMean,
-                                /* Output in training mode, input in inference. Is the moving average
-                                   of  variance[x] (factor is applied in the same way as for runningMean) */
-                                void                               *resultRunningVariance,
+    /* Used in Training phase only.
+       runningMean = newMean*factor + runningMean*(1-factor) */
+    void *resultRunningMean,
+    /* Output in training mode, input in inference. Is the moving average
+       of  variance[x] (factor is applied in the same way as for runningMean) */
+    void *resultRunningVariance,
 
-                                /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
-                                double                              epsilon,
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
+    double epsilon,
 
-                                /* Optionally save intermediate results from the forward pass here
-                                   - can be reused to speed up backward pass. NULL if unused */
-                                void                               *resultSaveMean,
-                                void                               *resultSaveInvVariance ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
+    /* Optionally save intermediate results from the forward pass here
+       - can be reused to speed up backward pass. NULL if unused */
+    void *resultSaveMean, void *resultSaveInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
+  return func_ptr(
+      handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc,
+      bnScale, bnBias, exponentialAverageFactor, resultRunningMean,
+      resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
-                                const void                         *alpha, /* alpha[0] = result blend factor */
-                                const void                         *beta,  /* beta[0] = dest layer blend factor */
-                                const cudnnTensorDescriptor_t       xDesc,
-                                const void                         *x,     /* NxCxHxW */
-                                const cudnnTensorDescriptor_t       yDesc,
-                                void                               *y,     /* NxCxHxW */
-                                const cudnnTensorDescriptor_t       bnScaleBiasMeanVarDesc,
-                                const void                         *bnScale,
-                                const void                         *bnBias,
-                                const void                         *estimatedMean,
-                                const void                         *estimatedVariance,
-                                double                              epsilon ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, const void *, double);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+    const void *bnBias, const void *estimatedMean,
+    const void *estimatedVariance, double epsilon) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, const void *, const void *, double);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, estimatedVariance, epsilon);
+  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y,
+                  bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean,
+                  estimatedVariance, epsilon);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(
-                                cudnnHandle_t                       handle,
-                                cudnnBatchNormMode_t                mode,
-                                const void                         *alphaDataDiff,
-                                const void                         *betaDataDiff,
-                                const void                         *alphaParamDiff,
-                                const void                         *betaParamDiff,
-                                const cudnnTensorDescriptor_t       xDesc, /* same desc for x, dx, dy */
-                                const void                         *x,
-                                const cudnnTensorDescriptor_t       dyDesc,
-                                const void                         *dy,
-                                const cudnnTensorDescriptor_t       dxDesc,
-                                void                               *dx,
-                                /* Shared tensor desc for the 4 tensors below */
-                                const cudnnTensorDescriptor_t       dBnScaleBiasDesc,
-                                const void                         *bnScale, /* bnBias doesn't affect backpropagation */
-                                /* scale and bias diff are not backpropagated below this layer */
-                                void                               *dBnScaleResult,
-                                void                               *dBnBiasResult,
-                                /* Same epsilon as forward pass */
-                                double                              epsilon,
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff,
+    const void *betaDataDiff, const void *alphaParamDiff,
+    const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
+    const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const void *bnScale, /* bnBias doesn't affect backpropagation */
+    /* scale and bias diff are not backpropagated below this layer */
+    void *dBnScaleResult, void *dBnBiasResult,
+    /* Same epsilon as forward pass */
+    double epsilon,
 
-                                /* Optionally cached intermediate results from
-                                   forward pass */
-                                const void                         *savedMean,
-                                const void                         *savedInvVariance ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, void *, void *, double, const void *, const void *);
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const void *, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, void *, void *, double, const void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, epsilon, savedMean, savedInvVariance);
+  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff,
+                  betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx,
+                  dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult,
+                  epsilon, savedMean, savedInvVariance);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(
-                               cudnnSpatialTransformerDescriptor_t        *stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
+    cudnnSpatialTransformerDescriptor_t *stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(
-                                cudnnSpatialTransformerDescriptor_t         stDesc,
-                                cudnnSamplerType_t                          samplerType, 
-                                cudnnDataType_t                             dataType,
-                                const int                                   nbDims,
-                                const int                                   dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, const int, const int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
+    cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType,
+    cudnnDataType_t dataType, const int nbDims, const int dimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t,
+      const int, const int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc, samplerType, dataType, nbDims, dimA);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(
-                                 cudnnSpatialTransformerDescriptor_t        stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
+    cudnnSpatialTransformerDescriptor_t stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(
-                                 cudnnHandle_t                              handle,
-                                 const cudnnSpatialTransformerDescriptor_t  stDesc,
-                                 const void                                *theta,
-                                 void                                      *grid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *theta, void *grid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, theta, grid);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(
-                                 cudnnHandle_t                              handle,
-                                 const cudnnSpatialTransformerDescriptor_t  stDesc,
-                                 const void                                *dgrid,
-                                 void                                      *dtheta) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *dgrid, void *dtheta) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, dgrid, dtheta);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(
-                                 cudnnHandle_t                              handle,
-                                 cudnnSpatialTransformerDescriptor_t        stDesc,
-                                 const void                                *alpha,                                    
-                                 const cudnnTensorDescriptor_t              xDesc,
-                                 const void                                *x,
-                                 const void                                *grid,
-                                 const void                                *beta,
-                                 cudnnTensorDescriptor_t                    yDesc,
-                                 void                                      *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, cudnnTensorDescriptor_t, void *);
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc,
+    void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(
-                                 cudnnHandle_t                              handle,
-                                 cudnnSpatialTransformerDescriptor_t        stDesc,
-                                 const void                                *alpha,
-                                 const cudnnTensorDescriptor_t              xDesc,
-                                 const void                                *x,
-                                 const void                                *beta,
-                                 const cudnnTensorDescriptor_t              dxDesc,
-                                 void                                      *dx,
-                                 const void                                *alphaDgrid,
-                                 const cudnnTensorDescriptor_t              dyDesc,
-                                 const void                                *dy,
-                                 const void                                *grid,
-                                 const void                                *betaDgrid,
-                                 void                                      *dgrid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *);
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc,
+    const void *dy, const void *grid, const void *betaDgrid, void *dgrid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, dyDesc, dy, grid, betaDgrid, dgrid);
+  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid,
+                  dyDesc, dy, grid, betaDgrid, dgrid);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t * dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t * sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle,
+                                                    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetStatesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t * sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(
+    cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(xdesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                    cudnnHandle_t            handle,
-                                                    float                    dropout, 
-                                                    void *                   states, 
-                                                    size_t                   stateSizeInBytes, 
-                                                    unsigned long long       seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                        cudnnHandle_t            handle,
-                                                        float                    dropout, 
-                                                        void *                   states, 
-                                                        size_t                   stateSizeInBytes, 
-                                                        unsigned long long       seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, 
-                                                    cudnnHandle_t            handle,
-                                                    float *                  dropout, 
-                                                    void **                  states,
-                                                    unsigned long long *     seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float *, void **, unsigned long long *);
+cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout,
+    void **states, unsigned long long *seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float *, void **, unsigned long long *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(cudnnHandle_t                  handle, 
-                                              const cudnnDropoutDescriptor_t dropoutDesc,
-                                              const cudnnTensorDescriptor_t  xdesc, 
-                                              const void *                   x,
-                                              const cudnnTensorDescriptor_t  ydesc,
-                                              void *                         y,
-                                              void *                         reserveSpace,
-                                              size_t                         reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t xdesc, const void *x,
+    const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(cudnnHandle_t                  handle, 
-                                               const cudnnDropoutDescriptor_t dropoutDesc,
-                                               const cudnnTensorDescriptor_t  dydesc, 
-                                               const void *                   dy,
-                                               const cudnnTensorDescriptor_t  dxdesc,
-                                               void *                         dx,
-                                               void *                         reserveSpace,
-                                               size_t                         reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t dydesc, const void *dy,
+    const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t * rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount(
-                                cudnnHandle_t              handle,
-                          const cudnnRNNDescriptor_t       rnnDesc,
-                                int                        *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx( cudnnHandle_t handle,
-                                                    const cudnnRNNDescriptor_t rnnDesc,
-                                                    const int seqLength,
-                                                    const cudnnTensorDescriptor_t * xDesc,
-                                                    const void * x,
-                                                    const cudnnTensorDescriptor_t hxDesc,
-                                                    const void * hx,
-                                                    const cudnnTensorDescriptor_t cxDesc,
-                                                    const void * cx,
-                                                    const cudnnFilterDescriptor_t wDesc,
-                                                    const void * w,
-                                                    const cudnnTensorDescriptor_t *yDesc,
-                                                    void * y,
-                                                    const cudnnTensorDescriptor_t hyDesc,
-                                                    void * hy,
-                                                    const cudnnTensorDescriptor_t cyDesc,
-                                                    void * cy,
-                                                    const float findIntensity,
-                                                    const int requestedAlgoCount,
-                                                    int *returnedAlgoCount,
-                                                    cudnnAlgorithmPerformance_t *perfResults,
-                                                    void * workspace,
-                                                    size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount(
-                                cudnnHandle_t              handle,
-                          const cudnnRNNDescriptor_t       rnnDesc,
-                                int                        *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx( cudnnHandle_t handle,
-                                                    const cudnnRNNDescriptor_t rnnDesc,
-                                                    const int seqLength,
-                                                    const cudnnTensorDescriptor_t * xDesc,
-                                                    const void * x,
-                                                    const cudnnTensorDescriptor_t hxDesc,
-                                                    const void * hx,
-                                                    const cudnnTensorDescriptor_t cxDesc,
-                                                    const void * cx,
-                                                    const cudnnFilterDescriptor_t wDesc,
-                                                    const void * w,
-                                                    const cudnnTensorDescriptor_t *yDesc,
-                                                    void * y,
-                                                    const cudnnTensorDescriptor_t hyDesc,
-                                                    void * hy,
-                                                    const cudnnTensorDescriptor_t cyDesc,
-                                                    void * cy,
-                                                    const float findIntensity,
-                                                    const int requestedAlgoCount,
-                                                    int *returnedAlgoCount,
-                                                    cudnnAlgorithmPerformance_t *perfResults,
-                                                    void * workspace,
-                                                    size_t workSpaceSizeInBytes,
-                                                    void * reserveSpace,
-                                                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount(
-                                cudnnHandle_t              handle,
-                          const cudnnRNNDescriptor_t       rnnDesc,
-                                int                        *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx( cudnnHandle_t handle,
-                                                const cudnnRNNDescriptor_t rnnDesc,
-                                                const int seqLength,
-                                                const cudnnTensorDescriptor_t * yDesc,
-                                                const void * y,
-                                                const cudnnTensorDescriptor_t * dyDesc,
-                                                const void * dy,
-                                                const cudnnTensorDescriptor_t dhyDesc,
-                                                const void * dhy,
-                                                const cudnnTensorDescriptor_t dcyDesc,
-                                                const void * dcy,
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void * w,
-                                                const cudnnTensorDescriptor_t hxDesc,
-                                                const void * hx,
-                                                const cudnnTensorDescriptor_t cxDesc,
-                                                const void * cx,
-                                                const cudnnTensorDescriptor_t * dxDesc,
-                                                void * dx,
-                                                const cudnnTensorDescriptor_t dhxDesc,
-                                                void * dhx,
-                                                const cudnnTensorDescriptor_t dcxDesc,
-                                                void * dcx,
-                                                const float findIntensity,
-                                                const int requestedAlgoCount,
-                                                int *returnedAlgoCount,
-                                                cudnnAlgorithmPerformance_t *perfResults,
-                                                void * workspace,
-                                                size_t workSpaceSizeInBytes,
-                                                void * reserveSpace,
-                                                size_t reserveSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const cudnnTensorDescriptor_t *dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnTensorDescriptor_t *dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount(
-                                cudnnHandle_t              handle,
-                          const cudnnRNNDescriptor_t       rnnDesc,
-                                int                        *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx( cudnnHandle_t handle,
-                                                   const cudnnRNNDescriptor_t rnnDesc,
-                                                   const int seqLength,
-                                                   const cudnnTensorDescriptor_t * xDesc,
-                                                   const void * x,
-                                                   const cudnnTensorDescriptor_t hxDesc,
-                                                   const void * hx,
-                                                   const cudnnTensorDescriptor_t * yDesc, 
-                                                   const void * y,
-                                                   const float findIntensity,
-                                                   const int requestedAlgoCount,
-                                                   int *returnedAlgoCount,
-                                                   cudnnAlgorithmPerformance_t *perfResults,
-                                                   const void * workspace, 
-                                                   size_t workSpaceSizeInBytes, 
-                                                   const cudnnFilterDescriptor_t dwDesc, 
-                                                   void * dw,
-                                                   const void * reserveSpace, 
-                                                   size_t reserveSpaceSizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const float findIntensity, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults,
+    const void *workspace, size_t workSpaceSizeInBytes,
+    const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  findIntensity, requestedAlgoCount, returnedAlgoCount,
+                  perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t       rnnDesc,
-                                                       const int                  minibatch,
-                                                       const cudnnDataType_t      dataType,
-                                                       cudnnPersistentRNNPlan_t * plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, const cudnnDataType_t, cudnnPersistentRNNPlan_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, const int minibatch,
+    const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int,
+                                               const cudnnDataType_t,
+                                               cudnnPersistentRNNPlan_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, minibatch, dataType, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                                                    cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(cudnnHandle_t              handle,
-                                                cudnnRNNDescriptor_t       rnnDesc,
-                                                const int                  hiddenSize,
-                                                const int                  numLayers,
-                                                cudnnDropoutDescriptor_t   dropoutDesc, /* Between layers, not between recurrent steps. */
-                                                cudnnRNNInputMode_t        inputMode,          
-                                                cudnnDirectionMode_t       direction,
-                                                cudnnRNNMode_t             mode,
-                                                cudnnRNNAlgo_t             algo,
-                                                cudnnDataType_t            dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc, /* Between layers, not between recurrent steps. */
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNProjectionLayers(cudnnHandle_t        handle,
-                                                cudnnRNNDescriptor_t       rnnDesc,
-                                                const int                  recProjSize,
-                                                const int                  outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+                            const int recProjSize, const int outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers(cudnnHandle_t        handle,
-                                                const cudnnRNNDescriptor_t rnnDesc,
-                                                int                        *recProjSize,
-                                                int                        *outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize,
+    int *outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor(cudnnHandle_t     handle,
-                                                cudnnRNNDescriptor_t       rnnDesc,
-                                                cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(cudnnHandle_t              handle,
-                                                cudnnRNNDescriptor_t       rnnDesc,
-                                                int *                      hiddenSize, 
-                                                int *                      numLayers, 
-                                                cudnnDropoutDescriptor_t * dropoutDesc,
-                                                cudnnRNNInputMode_t *      inputMode, 
-                                                cudnnDirectionMode_t *     direction, 
-                                                cudnnRNNMode_t *           mode, 
-                                                cudnnRNNAlgo_t *           algo, 
-                                                cudnnDataType_t *          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize,
+    int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc,
+    cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction,
+    cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *,
+      cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *,
+      cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t* mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType(
+    cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( cudnnHandle_t             handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,  
-                                                const int                     seqLength, 
-                                                const cudnnTensorDescriptor_t *xDesc,
-                                                size_t                        *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( cudnnHandle_t       handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,  
-                                                const int                     seqLength,
-                                                const cudnnTensorDescriptor_t *xDesc,
-                                                size_t                        *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNTrainingReserveSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNParamsSize(cudnnHandle_t                 handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,  
-                                                const cudnnTensorDescriptor_t xDesc,
-                                                size_t                        *sizeInBytes,
-                                                cudnnDataType_t               dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, size_t *, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                      const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes,
+                      cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNParamsSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( cudnnHandle_t      handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc, 
-                                                const int                     pseudoLayer,
-                                                const cudnnTensorDescriptor_t xDesc,
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void                    *w, 
-                                                const int                     linLayerID,  
-                                                cudnnFilterDescriptor_t       linLayerMatDesc,
-                                                void                          **linLayerMat) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerMatrixParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerMatDesc, linLayerMat);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerMatDesc, linLayerMat);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( cudnnHandle_t        handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc, 
-                                                const int                     pseudoLayer,
-                                                const cudnnTensorDescriptor_t xDesc, 
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void                    *w,
-                                                const int                     linLayerID,
-                                                cudnnFilterDescriptor_t       linLayerBiasDesc,
-                                                void                          **linLayerBias) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerBiasParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerBiasDesc, linLayerBias);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerBiasDesc, linLayerBias);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( cudnnHandle_t             handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,
-                                                const int                     seqLength,
-                                                const cudnnTensorDescriptor_t *xDesc,
-                                                const void                    *x,
-                                                const cudnnTensorDescriptor_t hxDesc,
-                                                const void                    *hx,
-                                                const cudnnTensorDescriptor_t cxDesc,
-                                                const void                    *cx,
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void                    *w,
-                                                const cudnnTensorDescriptor_t *yDesc,
-                                                void                          *y,
-                                                const cudnnTensorDescriptor_t hyDesc,
-                                                void                          *hy,
-                                                const cudnnTensorDescriptor_t cyDesc,
-                                                void                          *cy,
-                                                void                          *workspace,
-                                                size_t                        workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( cudnnHandle_t              handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,
-                                                const int                     seqLength,
-                                                const cudnnTensorDescriptor_t *xDesc,
-                                                const void                    *x,
-                                                const cudnnTensorDescriptor_t hxDesc,
-                                                const void                    *hx,
-                                                const cudnnTensorDescriptor_t cxDesc,
-                                                const void                    *cx,
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void                    *w,
-                                                const cudnnTensorDescriptor_t *yDesc,
-                                                void                          *y,
-                                                const cudnnTensorDescriptor_t hyDesc,
-                                                void                          *hy,
-                                                const cudnnTensorDescriptor_t cyDesc,
-                                                void                          *cy,
-                                                void                          *workspace,
-                                                size_t                        workSpaceSizeInBytes,
-                                                void *                        reserveSpace,
-                                                size_t                        reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardData( cudnnHandle_t                 handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,
-                                                const int                     seqLength,
-                                                const cudnnTensorDescriptor_t *yDesc,
-                                                const void                    *y,
-                                                const cudnnTensorDescriptor_t *dyDesc,
-                                                const void                    *dy,
-                                                const cudnnTensorDescriptor_t dhyDesc,
-                                                const void                    *dhy,
-                                                const cudnnTensorDescriptor_t dcyDesc,
-                                                const void                    *dcy,
-                                                const cudnnFilterDescriptor_t wDesc,
-                                                const void                    *w,
-                                                const cudnnTensorDescriptor_t hxDesc,
-                                                const void                    *hx,
-                                                const cudnnTensorDescriptor_t cxDesc,
-                                                const void                    *cx,
-                                                const cudnnTensorDescriptor_t *dxDesc,
-                                                void                          *dx,
-                                                const cudnnTensorDescriptor_t dhxDesc,
-                                                void                          *dhx,
-                                                const cudnnTensorDescriptor_t dcxDesc,
-                                                void                          *dcx,
-                                                void                          *workspace,
-                                                size_t                        workSpaceSizeInBytes,
-                                                void *                        reserveSpace,
-                                                size_t                        reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                     const int seqLength, const cudnnTensorDescriptor_t *yDesc,
+                     const void *y, const cudnnTensorDescriptor_t *dyDesc,
+                     const void *dy, const cudnnTensorDescriptor_t dhyDesc,
+                     const void *dhy, const cudnnTensorDescriptor_t dcyDesc,
+                     const void *dcy, const cudnnFilterDescriptor_t wDesc,
+                     const void *w, const cudnnTensorDescriptor_t hxDesc,
+                     const void *hx, const cudnnTensorDescriptor_t cxDesc,
+                     const void *cx, const cudnnTensorDescriptor_t *dxDesc,
+                     void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+                     const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+                     void *workspace, size_t workSpaceSizeInBytes,
+                     void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( cudnnHandle_t              handle,
-                                                const cudnnRNNDescriptor_t    rnnDesc,
-                                                const int                     seqLength,
-                                                const cudnnTensorDescriptor_t *xDesc,
-                                                const void                    *x,
-                                                const cudnnTensorDescriptor_t hxDesc,
-                                                const void                    *hx,
-                                                const cudnnTensorDescriptor_t *yDesc, 
-                                                const void                    *y,
-                                                const void                    *workspace, 
-                                                size_t                        workSpaceSizeInBytes, 
-                                                const cudnnFilterDescriptor_t dwDesc, 
-                                                void                          *dw,
-                                                const void                    *reserveSpace, 
-                                                size_t                        reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateCTCLossDescriptor( cudnnCTCLossDescriptor_t* ctcLossDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor(
-                                cudnnCTCLossDescriptor_t         ctcLossDesc,
-                                cudnnDataType_t                  compType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor(
-                                cudnnCTCLossDescriptor_t         ctcLossDesc,
-                                cudnnDataType_t*                 compType ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyCTCLossDescriptor( cudnnCTCLossDescriptor_t ctcLossDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( cudnnHandle_t handle, 
-                                        const cudnnTensorDescriptor_t probsDesc,     /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the mini batch size, A is the alphabet size)  */
-                                        const void * probs,                          /* probabilities after softmax, in GPU memory */
-                                        const int * labels,                          /* labels, in CPU memory */
-                                        const int * labelLengths,                    /* the length of each label, in CPU memory */
-                                        const int * inputLengths,                    /* the lengths of timing steps in each batch, in CPU memory */
-                                        void * costs,                                /* the returned costs of CTC, in GPU memory */
-                                        const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
-                                        const void * gradients,                      /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
-                                        cudnnCTCLossAlgo_t algo,                     /* algorithm selected, supported now 0 and 1 */
-                                        cudnnCTCLossDescriptor_t ctcLossDesc,
-                                        void * workspace,                            /* pointer to the workspace, in GPU memory */
-                                        size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, const int *, const int *, void *, const cudnnTensorDescriptor_t, const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnCTCLoss(
+    cudnnHandle_t handle,
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the mini batch size, A
+                      is the alphabet size)  */
+    const void *probs,       /* probabilities after softmax, in GPU memory */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    void *costs,             /* the returned costs of CTC, in GPU memory */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A */
+    const void *gradients,   /* the returned CTC gradients, in GPU memory, to
+                                compute costs only, set it to NULL */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc,
+    void *workspace, /* pointer to the workspace, in GPU memory */
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *,
+      const int *, const int *, void *, const cudnnTensorDescriptor_t,
+      const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCTCLoss");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths,
+                  costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace,
+                  workSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize(
-                                cudnnHandle_t                       handle,
-                                const cudnnTensorDescriptor_t       probsDesc,       /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the mini batch size, A is the alphabet size) */
-                                const cudnnTensorDescriptor_t       gradientsDesc,   /* Tensor descriptor for gradients, the dimensions are T,N,A. To compute costs only, set it to NULL */
-                                const int                          * labels,         /* labels, in CPU memory */
-                                const int                          * labelLengths,   /* the length of each label, in CPU memory */
-                                const int                          * inputLengths,   /* the lengths of timing steps in each batch, in CPU memory */
-                                cudnnCTCLossAlgo_t                  algo,            /* algorithm selected, supported now 0 and 1 */
-                                cudnnCTCLossDescriptor_t            ctcLossDesc,
-                                size_t                             *sizeInBytes ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const int *, const int *, const int *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
+    cudnnHandle_t handle,
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the mini batch size, A
+                      is the alphabet size) */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A. To compute costs only, set it to NULL */
+    const int *labels, /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const int *, const int *, const int *,
+      cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc, sizeInBytes);
+  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths,
+                  inputLengths, algo, ctcLossDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmDescriptor(
-                                cudnnAlgorithmDescriptor_t *algoDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
+cudnnStatus_t CUDNNWINAPI
+cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor(
-                                cudnnAlgorithmDescriptor_t algoDesc,
-                                cudnnAlgorithm_t algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t);
+    cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor(
-                                const cudnnAlgorithmDescriptor_t algoDesc,
-                                cudnnAlgorithm_t* algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t *);
+    const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor(
-                                const cudnnAlgorithmDescriptor_t src,
-                                cudnnAlgorithmDescriptor_t dest) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithmDescriptor_t);
+    const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCopyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(src, dest);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmDescriptor(
-                                cudnnAlgorithmDescriptor_t algoDesc ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance(
-                                cudnnAlgorithmPerformance_t* algoPerf,
-                                int numberToCreate ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToCreate);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance(
-                                cudnnAlgorithmPerformance_t algoPerf,
-                                cudnnAlgorithmDescriptor_t algoDesc,
-                                cudnnStatus_t status,
-                                float time,
-                                size_t memory ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t, cudnnStatus_t, float, size_t);
+    cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc,
+    cudnnStatus_t status, float time, size_t memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t,
+                                               cudnnAlgorithmDescriptor_t,
+                                               cudnnStatus_t, float, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance(
-                                const cudnnAlgorithmPerformance_t algoPerf,
-                                cudnnAlgorithmDescriptor_t* algoDesc,
-                                cudnnStatus_t* status,
-                                float* time,
-                                size_t* memory ) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, cudnnStatus_t *, float *, size_t *);
+    const cudnnAlgorithmPerformance_t algoPerf,
+    cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time,
+    size_t *memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *,
+      cudnnStatus_t *, float *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance(
-                                cudnnAlgorithmPerformance_t* algoPerf,
-                                int numberToDestroy) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToDestroy);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize(
-                                cudnnHandle_t              handle,
-                                cudnnAlgorithmDescriptor_t algoDesc,
-                                size_t*                    algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
+    cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+    size_t *algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSaveAlgorithm(
-                                cudnnHandle_t              handle,
-                                cudnnAlgorithmDescriptor_t algoDesc,
-                                void*                      algoSpace,
-                                size_t                     algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI
+cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+                   void *algoSpace, size_t algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSaveAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm(
-                                cudnnHandle_t              handle,
-                                void*                      algoSpace,
-                                size_t                     algoSpaceSizeInBytes,
-                                cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, cudnnAlgorithmDescriptor_t);
+    cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetCallback(
-                                unsigned            mask,
-                                void                *udata,
-                                cudnnCallback_t     fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata,
+                                           cudnnCallback_t fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnGetCallback(
-                                unsigned            *mask,
-                                void                **udata,
-                                cudnnCallback_t     *fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata,
+                                           cudnnCallback_t *fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(cudnnHandle_t         handle,
-                                                cudnnRNNDescriptor_t     rnnDesc,
-                                                const int                hiddenSize,
-                                                const int                numLayers,
-                                                cudnnDropoutDescriptor_t dropoutDesc,
-                                                cudnnRNNInputMode_t      inputMode,          
-                                                cudnnDirectionMode_t     direction,
-                                                cudnnRNNMode_t           mode,
-                                                cudnnRNNAlgo_t           algo,
-                                                cudnnDataType_t          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers, cudnnDropoutDescriptor_t dropoutDesc,
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v6");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(cudnnRNNDescriptor_t  rnnDesc,
-                                                int                      hiddenSize,
-                                                int                      numLayers,
-                                                cudnnDropoutDescriptor_t dropoutDesc,
-                                                cudnnRNNInputMode_t      inputMode,
-                                                cudnnDirectionMode_t     direction,
-                                                cudnnRNNMode_t           mode,
-                                                cudnnDataType_t          dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(
+    cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers,
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode,
+    cudnnDirectionMode_t direction, cudnnRNNMode_t mode,
+    cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t,
+      cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t,
+      cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, dataType);
+  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode,
+                  direction, mode, dataType);
 }
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cudnn_7_3.inc b/tensorflow/stream_executor/cuda/cudnn_7_3.inc
index 0ee8e1492d5..f1c25c74d0c 100644
--- a/tensorflow/stream_executor/cuda/cudnn_7_3.inc
+++ b/tensorflow/stream_executor/cuda/cudnn_7_3.inc
@@ -2,73 +2,71 @@
 
 extern "C" {
 
-size_t CUDNNWINAPI
-cudnnGetVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+size_t CUDNNWINAPI cudnnGetVersion(void) {
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-size_t CUDNNWINAPI
-cudnnGetCudartVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+size_t CUDNNWINAPI cudnnGetCudartVersion(void) {
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCudartVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-const char *CUDNNWINAPI
-cudnnGetErrorString(cudnnStatus_t status) {
-  using FuncPtr = const char * (CUDNNWINAPI *)(cudnnStatus_t);
+const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
+  using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetErrorString");
   if (!func_ptr) return "cudnnGetErrorString symbol not found.";
   return func_ptr(status);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
+cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle,
+                                                 cudnnStatus_t *rstatus,
+                                                 cudnnErrQueryMode_t mode,
+                                                 cudnnRuntimeTag_t *tag) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnQueryRuntimeError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rstatus, mode, tag);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(libraryPropertyType, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type,
+                                           int *value) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreate(cudnnHandle_t *handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreate");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroy(cudnnHandle_t handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroy");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle,
+                                         cudaStream_t streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle,
+                                         cudaStream_t *streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
@@ -76,100 +74,97 @@ cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
-                           cudnnTensorFormat_t format,
-                           cudnnDataType_t dataType, /* image data type */
-                           int n,                    /* number of inputs (batch size) */
-                           int c,                    /* number of input feature maps */
-                           int h,                    /* height of input section */
-                           int w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
-                             cudnnDataType_t dataType, /* image data type */
-                             int n,                    /* number of inputs (batch size) */
-                             int c,                    /* number of input feature maps */
-                             int h,                    /* height of input section */
-                             int w,                    /* width of input section */
-                             int nStride,
-                             int cStride,
-                             int hStride,
-                             int wStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, int, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(
+    cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w,                    /* width of input section */
+    int nStride, int cStride, int hStride, int wStride) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t,
+                                   int, int, int, int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
-                           cudnnDataType_t *dataType, /* image data type */
-                           int *n,                    /* number of inputs (batch size) */
-                           int *c,                    /* number of input feature maps  */
-                           int *h,                    /* height of input section */
-                           int *w,                    /* width of input section */
-                           int *nStride,
-                           int *cStride,
-                           int *hStride,
-                           int *wStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(
+    const cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t *dataType, /* image data type */
+    int *n,                    /* number of inputs (batch size) */
+    int *c,                    /* number of input feature maps  */
+    int *h,                    /* height of input section */
+    int *w,                    /* width of input section */
+    int *nStride, int *cStride, int *hStride, int *wStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *,
+      int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
-                           cudnnDataType_t dataType,
-                           int nbDims,
-                           const int dimA[],
-                           const int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, const int [], const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(
+    cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
-                             cudnnTensorFormat_t format,
-                             cudnnDataType_t dataType,
-                             int nbDims,
-                             const int dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, int nbDims, const int dimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, nbDims, dimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
-                           int nbDimsRequested,
-                           cudnnDataType_t *dataType,
-                           int *nbDims,
-                           int dimA[],
-                           int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, cudnnDataType_t *, int *, int [], int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(
+    const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int,
+                                   cudnnDataType_t *, int *, int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(
+    const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, size);
@@ -177,35 +172,33 @@ cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnTransformTensor(cudnnHandle_t handle,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t xDesc,
-                     const void *x,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t yDesc,
-                     void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnAddTensor(cudnnHandle_t handle,
-               const void *alpha,
-               const cudnnTensorDescriptor_t aDesc,
-               const void *A,
-               const void *beta,
-               const cudnnTensorDescriptor_t cDesc,
-               void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle,
+                                         const void *alpha,
+                                         const cudnnTensorDescriptor_t aDesc,
+                                         const void *A, const void *beta,
+                                         const cudnnTensorDescriptor_t cDesc,
+                                         void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnAddTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C);
@@ -213,29 +206,29 @@ cudnnAddTensor(cudnnHandle_t handle,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
-                           cudnnOpTensorOp_t opTensorOp,
-                           cudnnDataType_t opTensorCompType,
-                           cudnnNanPropagation_t opTensorNanOpt) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(
+    cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp,
+    cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t,
+                                   cudnnDataType_t, cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
-                           cudnnOpTensorOp_t *opTensorOp,
-                           cudnnDataType_t *opTensorCompType,
-                           cudnnNanPropagation_t *opTensorNanOpt) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(
+    const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp,
+    cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *,
+      cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
@@ -243,126 +236,136 @@ cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnOpTensor(cudnnHandle_t handle,
-              const cudnnOpTensorDescriptor_t opTensorDesc,
-              const void *alpha1,
-              const cudnnTensorDescriptor_t aDesc,
-              const void *A,
-              const void *alpha2,
-              const cudnnTensorDescriptor_t bDesc,
-              const void *B,
-              const void *beta,
-              const cudnnTensorDescriptor_t cDesc,
-              void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnOpTensor(
+    cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc,
+    const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A,
+    const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B,
+    const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnOpTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, beta, cDesc, C);
+  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B,
+                  beta, cDesc, C);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               cudnnReduceTensorOp_t reduceTensorOp,
-                               cudnnDataType_t reduceTensorCompType,
-                               cudnnNanPropagation_t reduceTensorNanOpt,
-                               cudnnReduceTensorIndices_t reduceTensorIndices,
-                               cudnnIndicesType_t reduceTensorIndicesType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType,
+    cudnnNanPropagation_t reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t reduceTensorIndices,
+    cudnnIndicesType_t reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t,
+      cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               cudnnReduceTensorOp_t *reduceTensorOp,
-                               cudnnDataType_t *reduceTensorCompType,
-                               cudnnNanPropagation_t *reduceTensorNanOpt,
-                               cudnnReduceTensorIndices_t *reduceTensorIndices,
-                               cudnnIndicesType_t *reduceTensorIndicesType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, cudnnIndicesType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(
+    const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t *reduceTensorOp,
+    cudnnDataType_t *reduceTensorCompType,
+    cudnnNanPropagation_t *reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t *reduceTensorIndices,
+    cudnnIndicesType_t *reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *,
+      cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *,
+      cudnnIndicesType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReductionIndicesSize(cudnnHandle_t handle,
-                             const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                             const cudnnTensorDescriptor_t aDesc,
-                             const cudnnTensorDescriptor_t cDesc,
-                             size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionIndicesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
-                               const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               const cudnnTensorDescriptor_t aDesc,
-                               const cudnnTensorDescriptor_t cDesc,
-                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnReduceTensor(cudnnHandle_t handle,
-                  const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                  void *indices,
-                  size_t indicesSizeInBytes,
-                  void *workspace,
-                  size_t workspaceSizeInBytes,
-                  const void *alpha,
-                  const cudnnTensorDescriptor_t aDesc,
-                  const void *A,
-                  const void *beta,
-                  const cudnnTensorDescriptor_t cDesc,
-                  void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    void *indices, size_t indicesSizeInBytes, void *workspace,
+    size_t workspaceSizeInBytes, const void *alpha,
+    const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta,
+    const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t,
+      void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReduceTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, C);
+  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes,
+                  workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc,
+                  C);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle,
+                                         const cudnnTensorDescriptor_t yDesc,
+                                         void *y, const void *valuePtr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, valuePtr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle,
+                                           const cudnnTensorDescriptor_t yDesc,
+                                           void *y, const void *alpha) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnScaleTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, alpha);
@@ -370,68 +373,70 @@ cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t dataType, /* image data type */
-                           cudnnTensorFormat_t format,
-                           int k,  /* number of output feature maps */
-                           int c,  /* number of input feature maps */
-                           int h,  /* height of each input filter */
-                           int w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType,          /* image data type */
+    cudnnTensorFormat_t format, int k, /* number of output feature maps */
+    int c,                             /* number of input feature maps */
+    int h,                             /* height of each input filter */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t *dataType, /* image data type */
-                           cudnnTensorFormat_t *format,
-                           int *k,  /* number of output feature maps */
-                           int *c,  /* number of input feature maps */
-                           int *h,  /* height of each input filter */
-                           int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(
+    const cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t *dataType,           /* image data type */
+    cudnnTensorFormat_t *format, int *k, /* number of output feature maps */
+    int *c,                              /* number of input feature maps */
+    int *h,                              /* height of each input filter */
+    int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *,
+      int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t dataType, /* image data type */
-                           cudnnTensorFormat_t format,
-                           int nbDims,
-                           const int filterDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType, /* image data type */
+    cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, nbDims, filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
-                           int nbDimsRequested,
-                           cudnnDataType_t *dataType, /* image data type */
-                           cudnnTensorFormat_t *format,
-                           int *nbDims,
-                           int filterDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, int, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(
+    const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, /* image data type */
+    cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, int, cudnnDataType_t *,
+      cudnnTensorFormat_t *, int *, int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, filterDimA);
+  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims,
+                  filterDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
@@ -439,622 +444,657 @@ cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
-                                int pad_h,      /* zero-padding height */
-                                int pad_w,      /* zero-padding width */
-                                int u,          /* vertical filter stride */
-                                int v,          /* horizontal filter stride */
-                                int dilation_h, /* filter dilation in the vertical dimension */
-                                int dilation_w, /* filter dilation in the horizontal dimension */
-                                cudnnConvolutionMode_t mode,
-                                cudnnDataType_t computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */
+    int pad_w,                                        /* zero-padding width */
+    int u,          /* vertical filter stride */
+    int v,          /* horizontal filter stride */
+    int dilation_h, /* filter dilation in the vertical dimension */
+    int dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
-                                int *pad_h,      /* zero-padding height */
-                                int *pad_w,      /* zero-padding width */
-                                int *u,          /* vertical filter stride */
-                                int *v,          /* horizontal filter stride */
-                                int *dilation_h, /* filter dilation in the vertical dimension */
-                                int *dilation_w, /* filter dilation in the horizontal dimension */
-                                cudnnConvolutionMode_t *mode,
-                                cudnnDataType_t *computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,      /* zero-padding height */
+    int *pad_w,      /* zero-padding width */
+    int *u,          /* vertical filter stride */
+    int *v,          /* horizontal filter stride */
+    int *dilation_h, /* filter dilation in the vertical dimension */
+    int *dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
-                                      const cudnnTensorDescriptor_t inputTensorDesc,
-                                      const cudnnFilterDescriptor_t filterDesc,
-                                      int *n,
-                                      int *c,
-                                      int *h,
-                                      int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim(
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
-                                int arrayLength, /* nbDims-2 size */
-                                const int padA[],
-                                const int filterStrideA[],
-                                const int dilationA[],
-                                cudnnConvolutionMode_t mode,
-                                cudnnDataType_t computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, const int [], const int [], const int [], cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */
+    const int padA[], const int filterStrideA[], const int dilationA[],
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[],
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
-                                int arrayLengthRequested,
-                                int *arrayLength,
-                                int padA[],
-                                int strideA[],
-                                int dilationA[],
-                                cudnnConvolutionMode_t *mode,
-                                cudnnDataType_t *computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int, int *, int [], int [], int [], cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested,
+    int *arrayLength, int padA[], int strideA[], int dilationA[],
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[],
+      cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA,
+                  dilationA, mode, computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
-                                      const cudnnTensorDescriptor_t inputTensorDesc,
-                                      const cudnnFilterDescriptor_t filterDesc,
-                                      int nbDims,
-                                      int tensorOutputDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim(
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int nbDims,
+    int tensorOutputDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, tensorOutputDimA);
+  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims,
+                  tensorOutputDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
-                                     const cudnnTensorDescriptor_t xDesc,
-                                     const cudnnFilterDescriptor_t wDesc,
-                                     const cudnnConvolutionDescriptor_t convDesc,
-                                     const cudnnTensorDescriptor_t yDesc,
-                                     const int requestedAlgoCount,
-                                     int *returnedAlgoCount,
-                                     cudnnConvolutionFwdAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnTensorDescriptor_t xDesc,
-                                       const void *x,
-                                       const cudnnFilterDescriptor_t wDesc,
-                                       const void *w,
-                                       const cudnnConvolutionDescriptor_t convDesc,
-                                       const cudnnTensorDescriptor_t yDesc,
-                                       void *y,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnConvolutionFwdAlgoPerf_t *perfResults,
-                                       void *workSpace,
-                                       size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults,
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardAlgorithm(cudnnHandle_t handle,
-                                    const cudnnTensorDescriptor_t xDesc,
-                                    const cudnnFilterDescriptor_t wDesc,
-                                    const cudnnConvolutionDescriptor_t convDesc,
-                                    const cudnnTensorDescriptor_t yDesc,
-                                    cudnnConvolutionFwdPreference_t preference,
-                                    size_t memoryLimitInBytes,
-                                    cudnnConvolutionFwdAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionFwdAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t,
+      cudnnConvolutionFwdAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
-                                       const cudnnTensorDescriptor_t srcDesc,
-                                       const cudnnFilterDescriptor_t filterDesc,
-                                       const cudnnConvolutionDescriptor_t convDesc,
-                                       const cudnnTensorDescriptor_t destDesc,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnConvolutionFwdAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnFilterDescriptor_t filterDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
-                                        const cudnnTensorDescriptor_t xDesc,
-                                        const cudnnFilterDescriptor_t wDesc,
-                                        const cudnnConvolutionDescriptor_t convDesc,
-                                        const cudnnTensorDescriptor_t yDesc,
-                                        cudnnConvolutionFwdAlgo_t algo,
-                                        size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionForward(cudnnHandle_t handle,
-                        const void *alpha,
-                        const cudnnTensorDescriptor_t xDesc,
-                        const void *x,
-                        const cudnnFilterDescriptor_t wDesc,
-                        const void *w,
-                        const cudnnConvolutionDescriptor_t convDesc,
-                        cudnnConvolutionFwdAlgo_t algo,
-                        void *workSpace,
-                        size_t workSpaceSizeInBytes,
-                        const void *beta,
-                        const cudnnTensorDescriptor_t yDesc,
-                        void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, yDesc, y);
+  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
-                                      const void *alpha1,
-                                      const cudnnTensorDescriptor_t xDesc,
-                                      const void *x,
-                                      const cudnnFilterDescriptor_t wDesc,
-                                      const void *w,
-                                      const cudnnConvolutionDescriptor_t convDesc,
-                                      cudnnConvolutionFwdAlgo_t algo,
-                                      void *workSpace,
-                                      size_t workSpaceSizeInBytes,
-                                      const void *alpha2,
-                                      const cudnnTensorDescriptor_t zDesc,
-                                      const void *z,
-                                      const cudnnTensorDescriptor_t biasDesc,
-                                      const void *bias,
-                                      const cudnnActivationDescriptor_t activationDesc,
-                                      const cudnnTensorDescriptor_t yDesc,
-                                      void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(
+    cudnnHandle_t handle, const void *alpha1,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2,
+    const cudnnTensorDescriptor_t zDesc, const void *z,
+    const cudnnTensorDescriptor_t biasDesc, const void *bias,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, activationDesc, yDesc, y);
+  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias,
+                  activationDesc, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardBias(cudnnHandle_t handle,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dbDesc,
-                             void *db) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dbDesc, void *db) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardBias");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
-                                            const cudnnTensorDescriptor_t xDesc,
-                                            const cudnnTensorDescriptor_t dyDesc,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnFilterDescriptor_t dwDesc,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle,
-                                              const cudnnTensorDescriptor_t xDesc,
-                                              const void *x,
-                                              const cudnnTensorDescriptor_t dyDesc,
-                                              const void *y,
-                                              const cudnnConvolutionDescriptor_t convDesc,
-                                              const cudnnFilterDescriptor_t dwDesc,
-                                              void *dw,
-                                              const int requestedAlgoCount,
-                                              int *returnedAlgoCount,
-                                              cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
-                                              void *workSpace,
-                                              size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *y,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
-                                           const cudnnTensorDescriptor_t xDesc,
-                                           const cudnnTensorDescriptor_t dyDesc,
-                                           const cudnnConvolutionDescriptor_t convDesc,
-                                           const cudnnFilterDescriptor_t dwDesc,
-                                           cudnnConvolutionBwdFilterPreference_t preference,
-                                           size_t memoryLimitInBytes,
-                                           cudnnConvolutionBwdFilterAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, size_t, cudnnConvolutionBwdFilterAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc,
+    cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdFilterAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t,
+      size_t, cudnnConvolutionBwdFilterAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
-                                              const cudnnTensorDescriptor_t srcDesc,
-                                              const cudnnTensorDescriptor_t diffDesc,
-                                              const cudnnConvolutionDescriptor_t convDesc,
-                                              const cudnnFilterDescriptor_t gradDesc,
-                                              const int requestedAlgoCount,
-                                              int *returnedAlgoCount,
-                                              cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle,
-                                               const cudnnTensorDescriptor_t xDesc,
-                                               const cudnnTensorDescriptor_t dyDesc,
-                                               const cudnnConvolutionDescriptor_t convDesc,
-                                               const cudnnFilterDescriptor_t gradDesc,
-                                               cudnnConvolutionBwdFilterAlgo_t algo,
-                                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardFilter(cudnnHandle_t handle,
-                               const void *alpha,
-                               const cudnnTensorDescriptor_t xDesc,
-                               const void *x,
-                               const cudnnTensorDescriptor_t dyDesc,
-                               const void *dy,
-                               const cudnnConvolutionDescriptor_t convDesc,
-                               cudnnConvolutionBwdFilterAlgo_t algo,
-                               void *workSpace,
-                               size_t workSpaceSizeInBytes,
-                               const void *beta,
-                               const cudnnFilterDescriptor_t dwDesc,
-                               void *dw) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnFilterDescriptor_t dwDesc, void *dw) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t,
+      void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
+  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
-                                          const cudnnFilterDescriptor_t wDesc,
-                                          const cudnnTensorDescriptor_t dyDesc,
-                                          const cudnnConvolutionDescriptor_t convDesc,
-                                          const cudnnTensorDescriptor_t dxDesc,
-                                          const int requestedAlgoCount,
-                                          int *returnedAlgoCount,
-                                          cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
-                                            const cudnnFilterDescriptor_t wDesc,
-                                            const void *w,
-                                            const cudnnTensorDescriptor_t dyDesc,
-                                            const void *dy,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnTensorDescriptor_t dxDesc,
-                                            void *dx,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
-                                            void *workSpace,
-                                            size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
-                                         const cudnnFilterDescriptor_t wDesc,
-                                         const cudnnTensorDescriptor_t dyDesc,
-                                         const cudnnConvolutionDescriptor_t convDesc,
-                                         const cudnnTensorDescriptor_t dxDesc,
-                                         cudnnConvolutionBwdDataPreference_t preference,
-                                         size_t memoryLimitInBytes,
-                                         cudnnConvolutionBwdDataAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, size_t, cudnnConvolutionBwdDataAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdDataAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t,
+      size_t, cudnnConvolutionBwdDataAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
-                                            const cudnnFilterDescriptor_t filterDesc,
-                                            const cudnnTensorDescriptor_t diffDesc,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnTensorDescriptor_t gradDesc,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
-                                             const cudnnFilterDescriptor_t wDesc,
-                                             const cudnnTensorDescriptor_t dyDesc,
-                                             const cudnnConvolutionDescriptor_t convDesc,
-                                             const cudnnTensorDescriptor_t dxDesc,
-                                             cudnnConvolutionBwdDataAlgo_t algo,
-                                             size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardData(cudnnHandle_t handle,
-                             const void *alpha,
-                             const cudnnFilterDescriptor_t wDesc,
-                             const void *w,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const cudnnConvolutionDescriptor_t convDesc,
-                             cudnnConvolutionBwdDataAlgo_t algo,
-                             void *workSpace,
-                             size_t workSpaceSizeInBytes,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dxDesc,
-                             void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
+  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnIm2Col(cudnnHandle_t handle,
-            const cudnnTensorDescriptor_t xDesc,
-            const void *x,
-            const cudnnFilterDescriptor_t wDesc,
-            const cudnnConvolutionDescriptor_t convDesc,
-            void *colBuffer) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, void *);
+cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+            const void *x, const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t,
+                                   const void *, const cudnnFilterDescriptor_t,
+                                   const cudnnConvolutionDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnIm2Col");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSoftmaxForward(cudnnHandle_t handle,
-                    cudnnSoftmaxAlgorithm_t algo,
-                    cudnnSoftmaxMode_t mode,
-                    const void *alpha,
-                    const cudnnTensorDescriptor_t xDesc,
-                    const void *x,
-                    const void *beta,
-                    const cudnnTensorDescriptor_t yDesc,
-                    void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSoftmaxBackward(cudnnHandle_t handle,
-                     cudnnSoftmaxAlgorithm_t algo,
-                     cudnnSoftmaxMode_t mode,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t dyDesc,
-                     const void *dy,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t dxDesc,
-                     void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, dx);
+  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc,
+                  dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
-                            cudnnPoolingMode_t mode,
-                            cudnnNanPropagation_t maxpoolingNanOpt,
-                            int windowHeight,
-                            int windowWidth,
-                            int verticalPadding,
-                            int horizontalPadding,
-                            int verticalStride,
-                            int horizontalStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(
+    cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode,
+    cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth,
+    int verticalPadding, int horizontalPadding, int verticalStride,
+    int horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int,
+      int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
-                            cudnnPoolingMode_t *mode,
-                            cudnnNanPropagation_t *maxpoolingNanOpt,
-                            int *windowHeight,
-                            int *windowWidth,
-                            int *verticalPadding,
-                            int *horizontalPadding,
-                            int *verticalStride,
-                            int *horizontalStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(
+    const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode,
+    cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight,
+    int *windowWidth, int *verticalPadding, int *horizontalPadding,
+    int *verticalStride, int *horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
-                            const cudnnPoolingMode_t mode,
-                            const cudnnNanPropagation_t maxpoolingNanOpt,
-                            int nbDims,
-                            const int windowDimA[],
-                            const int paddingA[],
-                            const int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int [], const int [], const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(
+    cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode,
+    const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims,
+    const int windowDimA[], const int paddingA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, const cudnnPoolingMode_t,
+      const cudnnNanPropagation_t, int, const int[], const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA,
+                  paddingA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
-                            int nbDimsRequested,
-                            cudnnPoolingMode_t *mode,
-                            cudnnNanPropagation_t *maxpoolingNanOpt,
-                            int *nbDims,
-                            int windowDimA[],
-                            int paddingA[],
-                            int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int [], int [], int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(
+    const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested,
+    cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt,
+    int *nbDims, int windowDimA[], int paddingA[], int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int[], int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims,
+                  windowDimA, paddingA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
                                   const cudnnTensorDescriptor_t inputTensorDesc,
-                                  int nbDims,
-                                  int outputTensorDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
+                                  int nbDims, int outputTensorDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                   const cudnnTensorDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA);
 }
@@ -1062,72 +1102,69 @@ cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
 cudnnStatus_t CUDNNWINAPI
 cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
                                   const cudnnTensorDescriptor_t inputTensorDesc,
-                                  int *n,
-                                  int *c,
-                                  int *h,
-                                  int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
+                                  int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnPoolingForward(cudnnHandle_t handle,
-                    const cudnnPoolingDescriptor_t poolingDesc,
-                    const void *alpha,
-                    const cudnnTensorDescriptor_t xDesc,
-                    const void *x,
-                    const void *beta,
-                    const cudnnTensorDescriptor_t yDesc,
-                    void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnPoolingBackward(cudnnHandle_t handle,
-                     const cudnnPoolingDescriptor_t poolingDesc,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t dyDesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t xDesc,
-                     const void *x,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t dxDesc,
-                     void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
-                             cudnnActivationMode_t mode,
-                             cudnnNanPropagation_t reluNanOpt,
-                             double coef) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t, cudnnActivationMode_t, cudnnNanPropagation_t, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(
+    cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode,
+    cudnnNanPropagation_t reluNanOpt, double coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t,
+                                               cudnnActivationMode_t,
+                                               cudnnNanPropagation_t, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
@@ -1136,9 +1173,10 @@ cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
 cudnnStatus_t CUDNNWINAPI
 cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
                              cudnnActivationMode_t *mode,
-                             cudnnNanPropagation_t *reluNanOpt,
-                             double *coef) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnActivationDescriptor_t, cudnnActivationMode_t *, cudnnNanPropagation_t *, double *);
+                             cudnnNanPropagation_t *reluNanOpt, double *coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnActivationDescriptor_t, cudnnActivationMode_t *,
+      cudnnNanPropagation_t *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
@@ -1146,65 +1184,68 @@ cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnActivationForward(cudnnHandle_t handle,
-                       cudnnActivationDescriptor_t activationDesc,
-                       const void *alpha,
-                       const cudnnTensorDescriptor_t xDesc,
-                       const void *x,
-                       const void *beta,
-                       const cudnnTensorDescriptor_t yDesc,
-                       void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnActivationForward(
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnActivationBackward(cudnnHandle_t handle,
-                        cudnnActivationDescriptor_t activationDesc,
-                        const void *alpha,
-                        const cudnnTensorDescriptor_t yDesc,
-                        const void *y,
-                        const cudnnTensorDescriptor_t dyDesc,
-                        const void *dy,
-                        const cudnnTensorDescriptor_t xDesc,
-                        const void *x,
-                        const void *beta,
-                        const cudnnTensorDescriptor_t dxDesc,
-                        void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int, double, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned lrnN, double lrnAlpha,
+                                                double lrnBeta, double lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int, double, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned *lrnN,
+                                                double *lrnAlpha,
+                                                double *lrnBeta, double *lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
@@ -1212,110 +1253,104 @@ cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrn
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(lrnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnLRNCrossChannelForward(cudnnHandle_t handle,
-                            cudnnLRNDescriptor_t normDesc,
-                            cudnnLRNMode_t lrnMode,
-                            const void *alpha,
-                            const cudnnTensorDescriptor_t xDesc,
-                            const void *x,
-                            const void *beta,
-                            const cudnnTensorDescriptor_t yDesc,
-                            void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
-                             cudnnLRNDescriptor_t normDesc,
-                             cudnnLRNMode_t lrnMode,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t yDesc,
-                             const void *y,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const cudnnTensorDescriptor_t xDesc,
-                             const void *x,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dxDesc,
-                             void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc,
+                  x, beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
-                                  cudnnLRNDescriptor_t normDesc,
-                                  cudnnDivNormMode_t mode,
-                                  const void *alpha,
-                                  const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
-                                  const void *x,
-                                  const void *means, /* if NULL, means are assumed to be zero */
-                                  void *temp,
-                                  void *temp2,
-                                  const void *beta,
-                                  const cudnnTensorDescriptor_t yDesc,
-                                  void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
+cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, void *, void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, beta, yDesc, y);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2,
+                  beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
-                                   cudnnLRNDescriptor_t normDesc,
-                                   cudnnDivNormMode_t mode,
-                                   const void *alpha,
-                                   const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
-                                   const void *x,
-                                   const void *means, /* if NULL, means are assumed to be zero */
-                                   const void *dy,
-                                   void *temp,
-                                   void *temp2,
-                                   const void *beta,
-                                   const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
-                                   void *dx,                                   /* output x differential */
-                                   void *dMeans) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
+cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t
+        xDesc, /* same desc for x, means, dy, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    const void *dy, void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
+    void *dx,                                   /* output x differential */
+    void *dMeans) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *, void *, const void *, const cudnnTensorDescriptor_t, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, temp2, beta, dXdMeansDesc, dx, dMeans);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp,
+                  temp2, beta, dXdMeansDesc, dx, dMeans);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
-                              const cudnnTensorDescriptor_t xDesc,
-                              cudnnBatchNormMode_t mode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, cudnnBatchNormMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(
+    cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc,
+    cudnnBatchNormMode_t mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               cudnnBatchNormMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDeriveBNTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(derivedBnDesc, xDesc, mode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardTraining(
-    cudnnHandle_t handle,
-    cudnnBatchNormMode_t mode,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
 
     const void *alpha, /* alpha[0] = result blend factor */
     const void *beta,  /* beta[0] = dest layer blend factor */
 
-    const cudnnTensorDescriptor_t xDesc,
-    const void *x, /* NxCxHxW */
-    const cudnnTensorDescriptor_t yDesc,
-    void *y, /* NxCxHxW */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
 
     /* Shared desc for the next 6 tensors in the argument list.
        Data type to be set as follows:
@@ -1323,13 +1358,13 @@ cudnnBatchNormalizationForwardTraining(
        Dimensions for this descriptor depend on normalization mode
        - Spatial Normalization : tensors are expected to have dims 1xCx1x1
         (normalization is performed across NxHxW)
-       - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
-        (normalization is performed across N) */
+       - Per-Activation Normalization : tensors are expected to have dims of
+       1xCxHxW (normalization is performed across N) */
     const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
 
-    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
-    const void *bnScale,
-    const void *bnBias,
+    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
+     */
+    const void *bnScale, const void *bnBias,
 
     /* MUST use factor=1 in the very first call of a complete training cycle.
        Use a factor=1/(1+n) at N-th call to the function to get
@@ -1347,162 +1382,173 @@ cudnnBatchNormalizationForwardTraining(
        of  variance[x] (factor is applied in the same way as for runningMean) */
     void *resultRunningVariance,
 
-    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
     double epsilon,
 
     /* Optionally save intermediate results from the forward pass here
        - can be reused to speed up backward pass. NULL if unused */
-    void *resultSaveMean,
-    void *resultSaveInvVariance) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
+    void *resultSaveMean, void *resultSaveInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
+  return func_ptr(
+      handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc,
+      bnScale, bnBias, exponentialAverageFactor, resultRunningMean,
+      resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
-                                        cudnnBatchNormMode_t mode,
-                                        const void *alpha, /* alpha[0] = result blend factor */
-                                        const void *beta,  /* beta[0] = dest layer blend factor */
-                                        const cudnnTensorDescriptor_t xDesc,
-                                        const void *x, /* NxCxHxW */
-                                        const cudnnTensorDescriptor_t yDesc,
-                                        void *y, /* NxCxHxW */
-                                        const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                        const void *bnScale,
-                                        const void *bnBias,
-                                        const void *estimatedMean,
-                                        const void *estimatedVariance,
-                                        double epsilon) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, const void *, double);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+    const void *bnBias, const void *estimatedMean,
+    const void *estimatedVariance, double epsilon) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, const void *, const void *, double);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, estimatedVariance, epsilon);
+  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y,
+                  bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean,
+                  estimatedVariance, epsilon);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationBackward(cudnnHandle_t handle,
-                                cudnnBatchNormMode_t mode,
-                                const void *alphaDataDiff,
-                                const void *betaDataDiff,
-                                const void *alphaParamDiff,
-                                const void *betaParamDiff,
-                                const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
-                                const void *x,
-                                const cudnnTensorDescriptor_t dyDesc,
-                                const void *dy,
-                                const cudnnTensorDescriptor_t dxDesc,
-                                void *dx,
-                                /* Shared tensor desc for the 4 tensors below */
-                                const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                const void *bnScale, /* bnBias doesn't affect backpropagation */
-                                /* scale and bias diff are not backpropagated below this layer */
-                                void *dBnScaleResult,
-                                void *dBnBiasResult,
-                                /* Same epsilon as forward pass */
-                                double epsilon,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff,
+    const void *betaDataDiff, const void *alphaParamDiff,
+    const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
+    const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const void *bnScale, /* bnBias doesn't affect backpropagation */
+    /* scale and bias diff are not backpropagated below this layer */
+    void *dBnScaleResult, void *dBnBiasResult,
+    /* Same epsilon as forward pass */
+    double epsilon,
 
-                                /* Optionally cached intermediate results from
-                                   forward pass */
-                                const void *savedMean,
-                                const void *savedInvVariance) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, void *, void *, double, const void *, const void *);
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const void *, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, void *, void *, double, const void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, epsilon, savedMean, savedInvVariance);
+  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff,
+                  betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx,
+                  dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult,
+                  epsilon, savedMean, savedInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(
+    cudnnSpatialTransformerDescriptor_t *stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
-                                       cudnnSamplerType_t samplerType,
-                                       cudnnDataType_t dataType,
-                                       const int nbDims,
-                                       const int dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, const int, const int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(
+    cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType,
+    cudnnDataType_t dataType, const int nbDims, const int dimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t,
+      const int, const int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc, samplerType, dataType, nbDims, dimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(
+    cudnnSpatialTransformerDescriptor_t stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
-                                   const cudnnSpatialTransformerDescriptor_t stDesc,
-                                   const void *theta,
-                                   void *grid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *theta, void *grid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, theta, grid);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
-                                    const cudnnSpatialTransformerDescriptor_t stDesc,
-                                    const void *dgrid,
-                                    void *dtheta) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *dgrid, void *dtheta) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, dgrid, dtheta);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
-                             cudnnSpatialTransformerDescriptor_t stDesc,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t xDesc,
-                             const void *x,
-                             const void *grid,
-                             const void *beta,
-                             cudnnTensorDescriptor_t yDesc,
-                             void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc,
+    void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
-                              cudnnSpatialTransformerDescriptor_t stDesc,
-                              const void *alpha,
-                              const cudnnTensorDescriptor_t xDesc,
-                              const void *x,
-                              const void *beta,
-                              const cudnnTensorDescriptor_t dxDesc,
-                              void *dx,
-                              const void *alphaDgrid,
-                              const cudnnTensorDescriptor_t dyDesc,
-                              const void *dy,
-                              const void *grid,
-                              const void *betaDgrid,
-                              void *dgrid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc,
+    const void *dy, const void *grid, const void *betaDgrid, void *dgrid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, dyDesc, dy, grid, betaDgrid, dgrid);
+  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid,
+                  dyDesc, dy, grid, betaDgrid, dgrid);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
@@ -1510,99 +1556,95 @@ cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle,
+                                                    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetStatesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(
+    cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(xdesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                          cudnnHandle_t handle,
-                          float dropout,
-                          void *states,
-                          size_t stateSizeInBytes,
-                          unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                              cudnnHandle_t handle,
-                              float dropout,
-                              void *states,
-                              size_t stateSizeInBytes,
-                              unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                          cudnnHandle_t handle,
-                          float *dropout,
-                          void **states,
-                          unsigned long long *seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float *, void **, unsigned long long *);
+cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout,
+    void **states, unsigned long long *seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float *, void **, unsigned long long *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutForward(cudnnHandle_t handle,
-                    const cudnnDropoutDescriptor_t dropoutDesc,
-                    const cudnnTensorDescriptor_t xdesc,
-                    const void *x,
-                    const cudnnTensorDescriptor_t ydesc,
-                    void *y,
-                    void *reserveSpace,
-                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t xdesc, const void *x,
+    const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutBackward(cudnnHandle_t handle,
-                     const cudnnDropoutDescriptor_t dropoutDesc,
-                     const cudnnTensorDescriptor_t dydesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t dxdesc,
-                     void *dx,
-                     void *reserveSpace,
-                     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t dydesc, const void *dy,
+    const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
@@ -1610,184 +1652,192 @@ cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNForwardInferenceAlgorithmEx(cudnnHandle_t handle,
-                                        const cudnnRNNDescriptor_t rnnDesc,
-                                        const int seqLength,
-                                        const cudnnTensorDescriptor_t *xDesc,
-                                        const void *x,
-                                        const cudnnTensorDescriptor_t hxDesc,
-                                        const void *hx,
-                                        const cudnnTensorDescriptor_t cxDesc,
-                                        const void *cx,
-                                        const cudnnFilterDescriptor_t wDesc,
-                                        const void *w,
-                                        const cudnnTensorDescriptor_t *yDesc,
-                                        void *y,
-                                        const cudnnTensorDescriptor_t hyDesc,
-                                        void *hy,
-                                        const cudnnTensorDescriptor_t cyDesc,
-                                        void *cy,
-                                        const float findIntensity,
-                                        const int requestedAlgoCount,
-                                        int *returnedAlgoCount,
-                                        cudnnAlgorithmPerformance_t *perfResults,
-                                        void *workspace,
-                                        size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNForwardTrainingAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnRNNDescriptor_t rnnDesc,
-                                       const int seqLength,
-                                       const cudnnTensorDescriptor_t *xDesc,
-                                       const void *x,
-                                       const cudnnTensorDescriptor_t hxDesc,
-                                       const void *hx,
-                                       const cudnnTensorDescriptor_t cxDesc,
-                                       const void *cx,
-                                       const cudnnFilterDescriptor_t wDesc,
-                                       const void *w,
-                                       const cudnnTensorDescriptor_t *yDesc,
-                                       void *y,
-                                       const cudnnTensorDescriptor_t hyDesc,
-                                       void *hy,
-                                       const cudnnTensorDescriptor_t cyDesc,
-                                       void *cy,
-                                       const float findIntensity,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnAlgorithmPerformance_t *perfResults,
-                                       void *workspace,
-                                       size_t workSpaceSizeInBytes,
-                                       void *reserveSpace,
-                                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNBackwardDataAlgorithmEx(cudnnHandle_t handle,
-                                    const cudnnRNNDescriptor_t rnnDesc,
-                                    const int seqLength,
-                                    const cudnnTensorDescriptor_t *yDesc,
-                                    const void *y,
-                                    const cudnnTensorDescriptor_t *dyDesc,
-                                    const void *dy,
-                                    const cudnnTensorDescriptor_t dhyDesc,
-                                    const void *dhy,
-                                    const cudnnTensorDescriptor_t dcyDesc,
-                                    const void *dcy,
-                                    const cudnnFilterDescriptor_t wDesc,
-                                    const void *w,
-                                    const cudnnTensorDescriptor_t hxDesc,
-                                    const void *hx,
-                                    const cudnnTensorDescriptor_t cxDesc,
-                                    const void *cx,
-                                    const cudnnTensorDescriptor_t *dxDesc,
-                                    void *dx,
-                                    const cudnnTensorDescriptor_t dhxDesc,
-                                    void *dhx,
-                                    const cudnnTensorDescriptor_t dcxDesc,
-                                    void *dcx,
-                                    const float findIntensity,
-                                    const int requestedAlgoCount,
-                                    int *returnedAlgoCount,
-                                    cudnnAlgorithmPerformance_t *perfResults,
-                                    void *workspace,
-                                    size_t workSpaceSizeInBytes,
-                                    void *reserveSpace,
-                                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const cudnnTensorDescriptor_t *dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnTensorDescriptor_t *dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnRNNDescriptor_t rnnDesc,
-                                       const int seqLength,
-                                       const cudnnTensorDescriptor_t *xDesc,
-                                       const void *x,
-                                       const cudnnTensorDescriptor_t hxDesc,
-                                       const void *hx,
-                                       const cudnnTensorDescriptor_t *yDesc,
-                                       const void *y,
-                                       const float findIntensity,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnAlgorithmPerformance_t *perfResults,
-                                       const void *workspace,
-                                       size_t workSpaceSizeInBytes,
-                                       const cudnnFilterDescriptor_t dwDesc,
-                                       void *dw,
-                                       const void *reserveSpace,
-                                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const float findIntensity, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults,
+    const void *workspace, size_t workSpaceSizeInBytes,
+    const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  findIntensity, requestedAlgoCount, returnedAlgoCount,
+                  perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                             const int minibatch,
-                             const cudnnDataType_t dataType,
-                             cudnnPersistentRNNPlan_t *plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, const cudnnDataType_t, cudnnPersistentRNNPlan_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, const int minibatch,
+    const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int,
+                                               const cudnnDataType_t,
+                                               cudnnPersistentRNNPlan_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, minibatch, dataType, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, plan);
@@ -1795,289 +1845,285 @@ cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor(cudnnHandle_t handle,
-                      cudnnRNNDescriptor_t rnnDesc,
-                      const int hiddenSize,
-                      const int numLayers,
-                      cudnnDropoutDescriptor_t dropoutDesc, /* Between layers, not between recurrent steps. */
-                      cudnnRNNInputMode_t inputMode,
-                      cudnnDirectionMode_t direction,
-                      cudnnRNNMode_t mode,
-                      cudnnRNNAlgo_t algo,
-                      cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc, /* Between layers, not between recurrent steps. */
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNProjectionLayers(cudnnHandle_t handle,
-                            cudnnRNNDescriptor_t rnnDesc,
-                            const int recProjSize,
-                            const int outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
+cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+                            const int recProjSize, const int outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNProjectionLayers(cudnnHandle_t handle,
-                            const cudnnRNNDescriptor_t rnnDesc,
-                            int *recProjSize,
-                            int *outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize,
+    int *outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNAlgorithmDescriptor(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNDescriptor(cudnnHandle_t handle,
-                      cudnnRNNDescriptor_t rnnDesc,
-                      int *hiddenSize,
-                      int *numLayers,
-                      cudnnDropoutDescriptor_t *dropoutDesc,
-                      cudnnRNNInputMode_t *inputMode,
-                      cudnnDirectionMode_t *direction,
-                      cudnnRNNMode_t *mode,
-                      cudnnRNNAlgo_t *algo,
-                      cudnnDataType_t *dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize,
+    int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc,
+    cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction,
+    cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *,
+      cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *,
+      cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType(
+    cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNWorkspaceSize(cudnnHandle_t handle,
-                         const cudnnRNNDescriptor_t rnnDesc,
-                         const int seqLength,
-                         const cudnnTensorDescriptor_t *xDesc,
-                         size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNTrainingReserveSize(cudnnHandle_t handle,
-                               const cudnnRNNDescriptor_t rnnDesc,
-                               const int seqLength,
-                               const cudnnTensorDescriptor_t *xDesc,
-                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNTrainingReserveSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNParamsSize(cudnnHandle_t handle,
-                      const cudnnRNNDescriptor_t rnnDesc,
-                      const cudnnTensorDescriptor_t xDesc,
-                      size_t *sizeInBytes,
+cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                      const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes,
                       cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, size_t *, cudnnDataType_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNParamsSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNLinLayerMatrixParams(cudnnHandle_t handle,
-                                const cudnnRNNDescriptor_t rnnDesc,
-                                const int pseudoLayer,
-                                const cudnnTensorDescriptor_t xDesc,
-                                const cudnnFilterDescriptor_t wDesc,
-                                const void *w,
-                                const int linLayerID,
-                                cudnnFilterDescriptor_t linLayerMatDesc,
-                                void **linLayerMat) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerMatrixParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerMatDesc, linLayerMat);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerMatDesc, linLayerMat);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNLinLayerBiasParams(cudnnHandle_t handle,
-                              const cudnnRNNDescriptor_t rnnDesc,
-                              const int pseudoLayer,
-                              const cudnnTensorDescriptor_t xDesc,
-                              const cudnnFilterDescriptor_t wDesc,
-                              const void *w,
-                              const int linLayerID,
-                              cudnnFilterDescriptor_t linLayerBiasDesc,
-                              void **linLayerBias) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerBiasParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerBiasDesc, linLayerBias);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerBiasDesc, linLayerBias);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardInference(cudnnHandle_t handle,
-                         const cudnnRNNDescriptor_t rnnDesc,
-                         const int seqLength,
-                         const cudnnTensorDescriptor_t *xDesc,
-                         const void *x,
-                         const cudnnTensorDescriptor_t hxDesc,
-                         const void *hx,
-                         const cudnnTensorDescriptor_t cxDesc,
-                         const void *cx,
-                         const cudnnFilterDescriptor_t wDesc,
-                         const void *w,
-                         const cudnnTensorDescriptor_t *yDesc,
-                         void *y,
-                         const cudnnTensorDescriptor_t hyDesc,
-                         void *hy,
-                         const cudnnTensorDescriptor_t cyDesc,
-                         void *cy,
-                         void *workspace,
-                         size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardTraining(cudnnHandle_t handle,
-                        const cudnnRNNDescriptor_t rnnDesc,
-                        const int seqLength,
-                        const cudnnTensorDescriptor_t *xDesc,
-                        const void *x,
-                        const cudnnTensorDescriptor_t hxDesc,
-                        const void *hx,
-                        const cudnnTensorDescriptor_t cxDesc,
-                        const void *cx,
-                        const cudnnFilterDescriptor_t wDesc,
-                        const void *w,
-                        const cudnnTensorDescriptor_t *yDesc,
-                        void *y,
-                        const cudnnTensorDescriptor_t hyDesc,
-                        void *hy,
-                        const cudnnTensorDescriptor_t cyDesc,
-                        void *cy,
-                        void *workspace,
-                        size_t workSpaceSizeInBytes,
-                        void *reserveSpace,
-                        size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardData(cudnnHandle_t handle,
-                     const cudnnRNNDescriptor_t rnnDesc,
-                     const int seqLength,
-                     const cudnnTensorDescriptor_t *yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t *dyDesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t dhyDesc,
-                     const void *dhy,
-                     const cudnnTensorDescriptor_t dcyDesc,
-                     const void *dcy,
-                     const cudnnFilterDescriptor_t wDesc,
-                     const void *w,
-                     const cudnnTensorDescriptor_t hxDesc,
-                     const void *hx,
-                     const cudnnTensorDescriptor_t cxDesc,
-                     const void *cx,
-                     const cudnnTensorDescriptor_t *dxDesc,
-                     void *dx,
-                     const cudnnTensorDescriptor_t dhxDesc,
-                     void *dhx,
-                     const cudnnTensorDescriptor_t dcxDesc,
-                     void *dcx,
-                     void *workspace,
-                     size_t workSpaceSizeInBytes,
-                     void *reserveSpace,
-                     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                     const int seqLength, const cudnnTensorDescriptor_t *yDesc,
+                     const void *y, const cudnnTensorDescriptor_t *dyDesc,
+                     const void *dy, const cudnnTensorDescriptor_t dhyDesc,
+                     const void *dhy, const cudnnTensorDescriptor_t dcyDesc,
+                     const void *dcy, const cudnnFilterDescriptor_t wDesc,
+                     const void *w, const cudnnTensorDescriptor_t hxDesc,
+                     const void *hx, const cudnnTensorDescriptor_t cxDesc,
+                     const void *cx, const cudnnTensorDescriptor_t *dxDesc,
+                     void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+                     const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+                     void *workspace, size_t workSpaceSizeInBytes,
+                     void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardWeights(cudnnHandle_t handle,
-                        const cudnnRNNDescriptor_t rnnDesc,
-                        const int seqLength,
-                        const cudnnTensorDescriptor_t *xDesc,
-                        const void *x,
-                        const cudnnTensorDescriptor_t hxDesc,
-                        const void *hx,
-                        const cudnnTensorDescriptor_t *yDesc,
-                        const void *y,
-                        const void *workspace,
-                        size_t workSpaceSizeInBytes,
-                        const cudnnFilterDescriptor_t dwDesc,
-                        void *dw,
-                        const void *reserveSpace,
-                        size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
@@ -2085,82 +2131,102 @@ cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCTCLoss(
+cudnnStatus_t CUDNNWINAPI cudnnCTCLoss(
     cudnnHandle_t handle,
     const cudnnTensorDescriptor_t
-        probsDesc,     /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
-                          mini batch size, A is the alphabet size)  */
-    const void *probs, /* probabilities after softmax, in GPU memory */
-    const int *labels, /* labels, in CPU memory */
-    const int *labelLengths,                     /* the length of each label, in CPU memory */
-    const int *inputLengths,                     /* the lengths of timing steps in each batch, in CPU memory */
-    void *costs,                                 /* the returned costs of CTC, in GPU memory */
-    const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
-    const void *gradients,   /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the
+                      mini batch size, A is the alphabet size)  */
+    const void *probs,       /* probabilities after softmax, in GPU memory */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    void *costs,             /* the returned costs of CTC, in GPU memory */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A */
+    const void *gradients,   /* the returned CTC gradients, in GPU memory, to
+                                compute costs only, set it to NULL */
     cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
     cudnnCTCLossDescriptor_t ctcLossDesc,
-    void *workspace,              /* pointer to the workspace, in GPU memory */
+    void *workspace, /* pointer to the workspace, in GPU memory */
     size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, const int *, const int *, void *, const cudnnTensorDescriptor_t, const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *,
+      const int *, const int *, void *, const cudnnTensorDescriptor_t,
+      const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCTCLoss");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths,
+                  costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossWorkspaceSize(
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize(
     cudnnHandle_t handle,
-    const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
-                                                timing steps, N is the mini batch size, A is the alphabet size) */
-    const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
-                                                    dimensions are T,N,A. To compute costs
-                                                    only, set it to NULL */
-    const int *labels,                           /* labels, in CPU memory */
-    const int *labelLengths,                     /* the length of each label, in CPU memory */
-    const int *inputLengths,                     /* the lengths of timing steps in each batch, in CPU memory */
-    cudnnCTCLossAlgo_t algo,                     /* algorithm selected, supported now 0 and 1 */
-    cudnnCTCLossDescriptor_t ctcLossDesc,
-    size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const int *, const int *, const int *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the
+                      timing steps, N is the mini batch size, A is the alphabet
+                      size) */
+    const cudnnTensorDescriptor_t
+        gradientsDesc,       /* Tensor descriptor for gradients, the
+                                dimensions are T,N,A. To compute costs
+                                only, set it to NULL */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const int *, const int *, const int *,
+      cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc, sizeInBytes);
+  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths,
+                  inputLengths, algo, ctcLossDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor(
+    cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor(
+    const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor(
+    const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCopyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(src, dest);
@@ -2168,135 +2234,141 @@ cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorith
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToCreate);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
-                             cudnnAlgorithmDescriptor_t algoDesc,
-                             cudnnStatus_t status,
-                             float time,
-                             size_t memory) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t, cudnnStatus_t, float, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc,
+    cudnnStatus_t status, float time, size_t memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t,
+                                               cudnnAlgorithmDescriptor_t,
+                                               cudnnStatus_t, float, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
-                             cudnnAlgorithmDescriptor_t *algoDesc,
-                             cudnnStatus_t *status,
-                             float *time,
-                             size_t *memory) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, cudnnStatus_t *, float *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance(
+    const cudnnAlgorithmPerformance_t algoPerf,
+    cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time,
+    size_t *memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *,
+      cudnnStatus_t *, float *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToDestroy);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize(
+    cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+    size_t *algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnSaveAlgorithm(cudnnHandle_t handle,
-                   cudnnAlgorithmDescriptor_t algoDesc,
-                   void *algoSpace,
-                   size_t algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
+cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+                   void *algoSpace, size_t algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSaveAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRestoreAlgorithm(cudnnHandle_t handle,
-                      void *algoSpace,
-                      size_t algoSpaceSizeInBytes,
-                      cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm(
+    cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNSetClip(cudnnHandle_t handle,
-                cudnnRNNDescriptor_t rnnDesc,
-                cudnnRNNClipMode_t clipMode,
-                cudnnNanPropagation_t clipNanOpt,
-                double lclip,
-                double rclip) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, cudnnNanPropagation_t, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle,
+                                          cudnnRNNDescriptor_t rnnDesc,
+                                          cudnnRNNClipMode_t clipMode,
+                                          cudnnNanPropagation_t clipNanOpt,
+                                          double lclip, double rclip) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t,
+      cudnnNanPropagation_t, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNSetClip");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNGetClip(cudnnHandle_t handle,
-                cudnnRNNDescriptor_t rnnDesc,
-                cudnnRNNClipMode_t *clipMode,
-                cudnnNanPropagation_t *clipNanOpt,
-                double *lclip,
-                double *rclip) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, cudnnNanPropagation_t *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle,
+                                          cudnnRNNDescriptor_t rnnDesc,
+                                          cudnnRNNClipMode_t *clipMode,
+                                          cudnnNanPropagation_t *clipNanOpt,
+                                          double *lclip, double *rclip) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *,
+      cudnnNanPropagation_t *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNGetClip");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata,
+                                           cudnnCallback_t fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata,
+                                           cudnnCallback_t *fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode(
+    cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNPaddingMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, paddingMode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode(
+    cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnRNNPaddingMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNPaddingMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, paddingMode);
@@ -2304,7 +2376,7 @@ cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *padd
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *RNNDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(RNNDataDesc);
@@ -2312,199 +2384,202 @@ cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *RNNDataDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(RNNDataDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc,
-                          cudnnDataType_t dataType,
-                          cudnnRNNDataLayout_t layout,
-                          int maxSeqLength,
-                          int batchSize,
-                          int vectorSize,
-                          const int seqLengthArray[], /* length of each sequence in the batch */
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, int, const int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor(
+    cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t dataType,
+    cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize,
+    int vectorSize,
+    const int seqLengthArray[], /* length of each sequence in the batch */
+    void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int,
+      int, const int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, paddingFill);
+  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize,
+                  vectorSize, seqLengthArray, paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc,
-                          cudnnDataType_t *dataType,
-                          cudnnRNNDataLayout_t *layout,
-                          int *maxSeqLength,
-                          int *batchSize,
-                          int *vectorSize,
-                          int arrayLengthRequested,
-                          int seqLengthArray[],
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, int *, int *, int *, int, int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor(
+    cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t *dataType,
+    cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize,
+    int *vectorSize, int arrayLengthRequested, int seqLengthArray[],
+    void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *,
+      int *, int *, int *, int, int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, vectorSize, arrayLengthRequested, seqLengthArray, paddingFill);
+  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize,
+                  vectorSize, arrayLengthRequested, seqLengthArray,
+                  paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardTrainingEx(cudnnHandle_t handle,
-                          const cudnnRNNDescriptor_t rnnDesc,
-                          const cudnnRNNDataDescriptor_t xDesc,
-                          const void *x,
-                          const cudnnTensorDescriptor_t hxDesc,
-                          const void *hx,
-                          const cudnnTensorDescriptor_t cxDesc,
-                          const void *cx,
-                          const cudnnFilterDescriptor_t wDesc,
-                          const void *w,
-                          const cudnnRNNDataDescriptor_t yDesc,
-                          void *y,
-                          const cudnnTensorDescriptor_t hyDesc,
-                          void *hy,
-                          const cudnnTensorDescriptor_t cyDesc,
-                          void *cy,
-                          const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
-                          const void *keys,                     /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
-                          void *cAttn,                          /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
-                          void *iAttn,                          /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
-                          void *queries,                        /* reserved, should pass NULL */
-                          void *workSpace,
-                          size_t workSpaceSizeInBytes,
-                          void *reserveSpace,
-                          size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnRNNDataDescriptor_t yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy,
+    const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
+    const void *keys,                     /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
+    void *cAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
+    void *iAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
+    void *queries,                        /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t,
+      void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTrainingEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w,
+                  yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn,
+                  iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardInferenceEx(cudnnHandle_t handle,
-                           const cudnnRNNDescriptor_t rnnDesc,
-                           const cudnnRNNDataDescriptor_t xDesc,
-                           const void *x,
-                           const cudnnTensorDescriptor_t hxDesc,
-                           const void *hx,
-                           const cudnnTensorDescriptor_t cxDesc,
-                           const void *cx,
-                           const cudnnFilterDescriptor_t wDesc,
-                           const void *w,
-                           const cudnnRNNDataDescriptor_t yDesc,
-                           void *y,
-                           const cudnnTensorDescriptor_t hyDesc,
-                           void *hy,
-                           const cudnnTensorDescriptor_t cyDesc,
-                           void *cy,
-                           const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
-                           const void *keys,                     /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
-                           void *cAttn,                          /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
-                           void *iAttn,                          /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
-                           void *queries,                        /* reserved, should pass NULL */
-                           void *workSpace,
-                           size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnRNNDataDescriptor_t yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy,
+    const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
+    const void *keys,                     /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
+    void *cAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
+    void *iAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
+    void *queries,                        /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t,
+      void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInferenceEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w,
+                  yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn,
+                  iDesc, iAttn, qDesc, queries, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardDataEx(cudnnHandle_t handle,
-                       const cudnnRNNDescriptor_t rnnDesc,
-                       const cudnnRNNDataDescriptor_t yDesc,
-                       const void *y,
-                       const cudnnRNNDataDescriptor_t dyDesc,
-                       const void *dy,
-                       const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
-                       const void *dcAttn,                    /* reserved, should pass NULL */
-                       const cudnnTensorDescriptor_t dhyDesc,
-                       const void *dhy,
-                       const cudnnTensorDescriptor_t dcyDesc,
-                       const void *dcy,
-                       const cudnnFilterDescriptor_t wDesc,
-                       const void *w,
-                       const cudnnTensorDescriptor_t hxDesc,
-                       const void *hx,
-                       const cudnnTensorDescriptor_t cxDesc,
-                       const void *cx,
-                       const cudnnRNNDataDescriptor_t dxDesc,
-                       void *dx,
-                       const cudnnTensorDescriptor_t dhxDesc,
-                       void *dhx,
-                       const cudnnTensorDescriptor_t dcxDesc,
-                       void *dcx,
-                       const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
-                       void *dkeys,                           /* reserved, should pass NULL */
-                       void *workSpace,
-                       size_t workSpaceSizeInBytes,
-                       void *reserveSpace,
-                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t yDesc, const void *y,
+    const cudnnRNNDataDescriptor_t dyDesc, const void *dy,
+    const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
+    const void *dcAttn,                    /* reserved, should pass NULL */
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnRNNDataDescriptor_t dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+    const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
+    void *dkeys,                           /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardDataEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn,
+                  dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx,
+                  dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys,
+                  workSpace, workSpaceSizeInBytes, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardWeightsEx(cudnnHandle_t handle,
-                          const cudnnRNNDescriptor_t rnnDesc,
-                          const cudnnRNNDataDescriptor_t xDesc,
-                          const void *x,
-                          const cudnnTensorDescriptor_t hxDesc,
-                          const void *hx,
-                          const cudnnRNNDataDescriptor_t yDesc,
-                          const void *y,
-                          void *workSpace,
-                          size_t workSpaceSizeInBytes,
-                          const cudnnFilterDescriptor_t dwDesc,
-                          void *dw,
-                          void *reserveSpace,
-                          size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, void *, size_t, const cudnnFilterDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, const void *, void *, size_t,
+      const cudnnFilterDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeightsEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace,
+                  workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor_v6(cudnnHandle_t handle,
-                         cudnnRNNDescriptor_t rnnDesc,
-                         const int hiddenSize,
-                         const int numLayers,
-                         cudnnDropoutDescriptor_t dropoutDesc,
-                         cudnnRNNInputMode_t inputMode,
-                         cudnnDirectionMode_t direction,
-                         cudnnRNNMode_t mode,
-                         cudnnRNNAlgo_t algo,
-                         cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers, cudnnDropoutDescriptor_t dropoutDesc,
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v6");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor_v5(cudnnRNNDescriptor_t rnnDesc,
-                         int hiddenSize,
-                         int numLayers,
-                         cudnnDropoutDescriptor_t dropoutDesc,
-                         cudnnRNNInputMode_t inputMode,
-                         cudnnDirectionMode_t direction,
-                         cudnnRNNMode_t mode,
-                         cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(
+    cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers,
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode,
+    cudnnDirectionMode_t direction, cudnnRNNMode_t mode,
+    cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t,
+      cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t,
+      cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, dataType);
+  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode,
+                  direction, mode, dataType);
 }
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cudnn_7_4.inc b/tensorflow/stream_executor/cuda/cudnn_7_4.inc
index bd9f49f9780..883c8ba8812 100644
--- a/tensorflow/stream_executor/cuda/cudnn_7_4.inc
+++ b/tensorflow/stream_executor/cuda/cudnn_7_4.inc
@@ -2,73 +2,71 @@
 
 extern "C" {
 
-size_t CUDNNWINAPI
-cudnnGetVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+size_t CUDNNWINAPI cudnnGetVersion(void) {
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-size_t CUDNNWINAPI
-cudnnGetCudartVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+size_t CUDNNWINAPI cudnnGetCudartVersion(void) {
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCudartVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-const char *CUDNNWINAPI
-cudnnGetErrorString(cudnnStatus_t status) {
-  using FuncPtr = const char * (CUDNNWINAPI *)(cudnnStatus_t);
+const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
+  using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetErrorString");
   if (!func_ptr) return "cudnnGetErrorString symbol not found.";
   return func_ptr(status);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
+cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle,
+                                                 cudnnStatus_t *rstatus,
+                                                 cudnnErrQueryMode_t mode,
+                                                 cudnnRuntimeTag_t *tag) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnQueryRuntimeError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rstatus, mode, tag);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(libraryPropertyType, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type,
+                                           int *value) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreate(cudnnHandle_t *handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreate");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroy(cudnnHandle_t handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroy");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle,
+                                         cudaStream_t streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle,
+                                         cudaStream_t *streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
@@ -76,100 +74,97 @@ cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
-                           cudnnTensorFormat_t format,
-                           cudnnDataType_t dataType, /* image data type */
-                           int n,                    /* number of inputs (batch size) */
-                           int c,                    /* number of input feature maps */
-                           int h,                    /* height of input section */
-                           int w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
-                             cudnnDataType_t dataType, /* image data type */
-                             int n,                    /* number of inputs (batch size) */
-                             int c,                    /* number of input feature maps */
-                             int h,                    /* height of input section */
-                             int w,                    /* width of input section */
-                             int nStride,
-                             int cStride,
-                             int hStride,
-                             int wStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, int, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(
+    cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w,                    /* width of input section */
+    int nStride, int cStride, int hStride, int wStride) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t,
+                                   int, int, int, int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
-                           cudnnDataType_t *dataType, /* image data type */
-                           int *n,                    /* number of inputs (batch size) */
-                           int *c,                    /* number of input feature maps  */
-                           int *h,                    /* height of input section */
-                           int *w,                    /* width of input section */
-                           int *nStride,
-                           int *cStride,
-                           int *hStride,
-                           int *wStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(
+    const cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t *dataType, /* image data type */
+    int *n,                    /* number of inputs (batch size) */
+    int *c,                    /* number of input feature maps  */
+    int *h,                    /* height of input section */
+    int *w,                    /* width of input section */
+    int *nStride, int *cStride, int *hStride, int *wStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *,
+      int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
-                           cudnnDataType_t dataType,
-                           int nbDims,
-                           const int dimA[],
-                           const int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, const int [], const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(
+    cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
-                             cudnnTensorFormat_t format,
-                             cudnnDataType_t dataType,
-                             int nbDims,
-                             const int dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, int nbDims, const int dimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, nbDims, dimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
-                           int nbDimsRequested,
-                           cudnnDataType_t *dataType,
-                           int *nbDims,
-                           int dimA[],
-                           int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, cudnnDataType_t *, int *, int [], int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(
+    const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int,
+                                   cudnnDataType_t *, int *, int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(
+    const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, size);
@@ -177,35 +172,33 @@ cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnTransformTensor(cudnnHandle_t handle,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t xDesc,
-                     const void *x,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t yDesc,
-                     void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnAddTensor(cudnnHandle_t handle,
-               const void *alpha,
-               const cudnnTensorDescriptor_t aDesc,
-               const void *A,
-               const void *beta,
-               const cudnnTensorDescriptor_t cDesc,
-               void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle,
+                                         const void *alpha,
+                                         const cudnnTensorDescriptor_t aDesc,
+                                         const void *A, const void *beta,
+                                         const cudnnTensorDescriptor_t cDesc,
+                                         void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnAddTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C);
@@ -213,29 +206,29 @@ cudnnAddTensor(cudnnHandle_t handle,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
-                           cudnnOpTensorOp_t opTensorOp,
-                           cudnnDataType_t opTensorCompType,
-                           cudnnNanPropagation_t opTensorNanOpt) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(
+    cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp,
+    cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t,
+                                   cudnnDataType_t, cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
-                           cudnnOpTensorOp_t *opTensorOp,
-                           cudnnDataType_t *opTensorCompType,
-                           cudnnNanPropagation_t *opTensorNanOpt) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(
+    const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp,
+    cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *,
+      cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
@@ -243,126 +236,136 @@ cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnOpTensor(cudnnHandle_t handle,
-              const cudnnOpTensorDescriptor_t opTensorDesc,
-              const void *alpha1,
-              const cudnnTensorDescriptor_t aDesc,
-              const void *A,
-              const void *alpha2,
-              const cudnnTensorDescriptor_t bDesc,
-              const void *B,
-              const void *beta,
-              const cudnnTensorDescriptor_t cDesc,
-              void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnOpTensor(
+    cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc,
+    const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A,
+    const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B,
+    const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnOpTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, beta, cDesc, C);
+  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B,
+                  beta, cDesc, C);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               cudnnReduceTensorOp_t reduceTensorOp,
-                               cudnnDataType_t reduceTensorCompType,
-                               cudnnNanPropagation_t reduceTensorNanOpt,
-                               cudnnReduceTensorIndices_t reduceTensorIndices,
-                               cudnnIndicesType_t reduceTensorIndicesType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType,
+    cudnnNanPropagation_t reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t reduceTensorIndices,
+    cudnnIndicesType_t reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t,
+      cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               cudnnReduceTensorOp_t *reduceTensorOp,
-                               cudnnDataType_t *reduceTensorCompType,
-                               cudnnNanPropagation_t *reduceTensorNanOpt,
-                               cudnnReduceTensorIndices_t *reduceTensorIndices,
-                               cudnnIndicesType_t *reduceTensorIndicesType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, cudnnIndicesType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(
+    const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t *reduceTensorOp,
+    cudnnDataType_t *reduceTensorCompType,
+    cudnnNanPropagation_t *reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t *reduceTensorIndices,
+    cudnnIndicesType_t *reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *,
+      cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *,
+      cudnnIndicesType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReductionIndicesSize(cudnnHandle_t handle,
-                             const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                             const cudnnTensorDescriptor_t aDesc,
-                             const cudnnTensorDescriptor_t cDesc,
-                             size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionIndicesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
-                               const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               const cudnnTensorDescriptor_t aDesc,
-                               const cudnnTensorDescriptor_t cDesc,
-                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnReduceTensor(cudnnHandle_t handle,
-                  const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                  void *indices,
-                  size_t indicesSizeInBytes,
-                  void *workspace,
-                  size_t workspaceSizeInBytes,
-                  const void *alpha,
-                  const cudnnTensorDescriptor_t aDesc,
-                  const void *A,
-                  const void *beta,
-                  const cudnnTensorDescriptor_t cDesc,
-                  void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    void *indices, size_t indicesSizeInBytes, void *workspace,
+    size_t workspaceSizeInBytes, const void *alpha,
+    const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta,
+    const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t,
+      void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReduceTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, C);
+  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes,
+                  workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc,
+                  C);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle,
+                                         const cudnnTensorDescriptor_t yDesc,
+                                         void *y, const void *valuePtr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, valuePtr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle,
+                                           const cudnnTensorDescriptor_t yDesc,
+                                           void *y, const void *alpha) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnScaleTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, alpha);
@@ -370,68 +373,70 @@ cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t dataType, /* image data type */
-                           cudnnTensorFormat_t format,
-                           int k,  /* number of output feature maps */
-                           int c,  /* number of input feature maps */
-                           int h,  /* height of each input filter */
-                           int w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType,          /* image data type */
+    cudnnTensorFormat_t format, int k, /* number of output feature maps */
+    int c,                             /* number of input feature maps */
+    int h,                             /* height of each input filter */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t *dataType, /* image data type */
-                           cudnnTensorFormat_t *format,
-                           int *k,  /* number of output feature maps */
-                           int *c,  /* number of input feature maps */
-                           int *h,  /* height of each input filter */
-                           int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(
+    const cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t *dataType,           /* image data type */
+    cudnnTensorFormat_t *format, int *k, /* number of output feature maps */
+    int *c,                              /* number of input feature maps */
+    int *h,                              /* height of each input filter */
+    int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *,
+      int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t dataType, /* image data type */
-                           cudnnTensorFormat_t format,
-                           int nbDims,
-                           const int filterDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType, /* image data type */
+    cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, nbDims, filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
-                           int nbDimsRequested,
-                           cudnnDataType_t *dataType, /* image data type */
-                           cudnnTensorFormat_t *format,
-                           int *nbDims,
-                           int filterDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, int, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(
+    const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, /* image data type */
+    cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, int, cudnnDataType_t *,
+      cudnnTensorFormat_t *, int *, int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, filterDimA);
+  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims,
+                  filterDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
@@ -439,622 +444,657 @@ cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
-                                int pad_h,      /* zero-padding height */
-                                int pad_w,      /* zero-padding width */
-                                int u,          /* vertical filter stride */
-                                int v,          /* horizontal filter stride */
-                                int dilation_h, /* filter dilation in the vertical dimension */
-                                int dilation_w, /* filter dilation in the horizontal dimension */
-                                cudnnConvolutionMode_t mode,
-                                cudnnDataType_t computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */
+    int pad_w,                                        /* zero-padding width */
+    int u,          /* vertical filter stride */
+    int v,          /* horizontal filter stride */
+    int dilation_h, /* filter dilation in the vertical dimension */
+    int dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
-                                int *pad_h,      /* zero-padding height */
-                                int *pad_w,      /* zero-padding width */
-                                int *u,          /* vertical filter stride */
-                                int *v,          /* horizontal filter stride */
-                                int *dilation_h, /* filter dilation in the vertical dimension */
-                                int *dilation_w, /* filter dilation in the horizontal dimension */
-                                cudnnConvolutionMode_t *mode,
-                                cudnnDataType_t *computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,      /* zero-padding height */
+    int *pad_w,      /* zero-padding width */
+    int *u,          /* vertical filter stride */
+    int *v,          /* horizontal filter stride */
+    int *dilation_h, /* filter dilation in the vertical dimension */
+    int *dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
-                                      const cudnnTensorDescriptor_t inputTensorDesc,
-                                      const cudnnFilterDescriptor_t filterDesc,
-                                      int *n,
-                                      int *c,
-                                      int *h,
-                                      int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim(
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
-                                int arrayLength, /* nbDims-2 size */
-                                const int padA[],
-                                const int filterStrideA[],
-                                const int dilationA[],
-                                cudnnConvolutionMode_t mode,
-                                cudnnDataType_t computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, const int [], const int [], const int [], cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */
+    const int padA[], const int filterStrideA[], const int dilationA[],
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[],
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
-                                int arrayLengthRequested,
-                                int *arrayLength,
-                                int padA[],
-                                int strideA[],
-                                int dilationA[],
-                                cudnnConvolutionMode_t *mode,
-                                cudnnDataType_t *computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int, int *, int [], int [], int [], cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested,
+    int *arrayLength, int padA[], int strideA[], int dilationA[],
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[],
+      cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA,
+                  dilationA, mode, computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
-                                      const cudnnTensorDescriptor_t inputTensorDesc,
-                                      const cudnnFilterDescriptor_t filterDesc,
-                                      int nbDims,
-                                      int tensorOutputDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim(
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int nbDims,
+    int tensorOutputDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, tensorOutputDimA);
+  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims,
+                  tensorOutputDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
-                                     const cudnnTensorDescriptor_t xDesc,
-                                     const cudnnFilterDescriptor_t wDesc,
-                                     const cudnnConvolutionDescriptor_t convDesc,
-                                     const cudnnTensorDescriptor_t yDesc,
-                                     const int requestedAlgoCount,
-                                     int *returnedAlgoCount,
-                                     cudnnConvolutionFwdAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnTensorDescriptor_t xDesc,
-                                       const void *x,
-                                       const cudnnFilterDescriptor_t wDesc,
-                                       const void *w,
-                                       const cudnnConvolutionDescriptor_t convDesc,
-                                       const cudnnTensorDescriptor_t yDesc,
-                                       void *y,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnConvolutionFwdAlgoPerf_t *perfResults,
-                                       void *workSpace,
-                                       size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults,
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardAlgorithm(cudnnHandle_t handle,
-                                    const cudnnTensorDescriptor_t xDesc,
-                                    const cudnnFilterDescriptor_t wDesc,
-                                    const cudnnConvolutionDescriptor_t convDesc,
-                                    const cudnnTensorDescriptor_t yDesc,
-                                    cudnnConvolutionFwdPreference_t preference,
-                                    size_t memoryLimitInBytes,
-                                    cudnnConvolutionFwdAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionFwdAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t,
+      cudnnConvolutionFwdAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
-                                       const cudnnTensorDescriptor_t srcDesc,
-                                       const cudnnFilterDescriptor_t filterDesc,
-                                       const cudnnConvolutionDescriptor_t convDesc,
-                                       const cudnnTensorDescriptor_t destDesc,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnConvolutionFwdAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnFilterDescriptor_t filterDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
-                                        const cudnnTensorDescriptor_t xDesc,
-                                        const cudnnFilterDescriptor_t wDesc,
-                                        const cudnnConvolutionDescriptor_t convDesc,
-                                        const cudnnTensorDescriptor_t yDesc,
-                                        cudnnConvolutionFwdAlgo_t algo,
-                                        size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionForward(cudnnHandle_t handle,
-                        const void *alpha,
-                        const cudnnTensorDescriptor_t xDesc,
-                        const void *x,
-                        const cudnnFilterDescriptor_t wDesc,
-                        const void *w,
-                        const cudnnConvolutionDescriptor_t convDesc,
-                        cudnnConvolutionFwdAlgo_t algo,
-                        void *workSpace,
-                        size_t workSpaceSizeInBytes,
-                        const void *beta,
-                        const cudnnTensorDescriptor_t yDesc,
-                        void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, yDesc, y);
+  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
-                                      const void *alpha1,
-                                      const cudnnTensorDescriptor_t xDesc,
-                                      const void *x,
-                                      const cudnnFilterDescriptor_t wDesc,
-                                      const void *w,
-                                      const cudnnConvolutionDescriptor_t convDesc,
-                                      cudnnConvolutionFwdAlgo_t algo,
-                                      void *workSpace,
-                                      size_t workSpaceSizeInBytes,
-                                      const void *alpha2,
-                                      const cudnnTensorDescriptor_t zDesc,
-                                      const void *z,
-                                      const cudnnTensorDescriptor_t biasDesc,
-                                      const void *bias,
-                                      const cudnnActivationDescriptor_t activationDesc,
-                                      const cudnnTensorDescriptor_t yDesc,
-                                      void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(
+    cudnnHandle_t handle, const void *alpha1,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2,
+    const cudnnTensorDescriptor_t zDesc, const void *z,
+    const cudnnTensorDescriptor_t biasDesc, const void *bias,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, activationDesc, yDesc, y);
+  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias,
+                  activationDesc, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardBias(cudnnHandle_t handle,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dbDesc,
-                             void *db) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dbDesc, void *db) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardBias");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
-                                            const cudnnTensorDescriptor_t xDesc,
-                                            const cudnnTensorDescriptor_t dyDesc,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnFilterDescriptor_t dwDesc,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle,
-                                              const cudnnTensorDescriptor_t xDesc,
-                                              const void *x,
-                                              const cudnnTensorDescriptor_t dyDesc,
-                                              const void *y,
-                                              const cudnnConvolutionDescriptor_t convDesc,
-                                              const cudnnFilterDescriptor_t dwDesc,
-                                              void *dw,
-                                              const int requestedAlgoCount,
-                                              int *returnedAlgoCount,
-                                              cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
-                                              void *workSpace,
-                                              size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *y,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
-                                           const cudnnTensorDescriptor_t xDesc,
-                                           const cudnnTensorDescriptor_t dyDesc,
-                                           const cudnnConvolutionDescriptor_t convDesc,
-                                           const cudnnFilterDescriptor_t dwDesc,
-                                           cudnnConvolutionBwdFilterPreference_t preference,
-                                           size_t memoryLimitInBytes,
-                                           cudnnConvolutionBwdFilterAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, size_t, cudnnConvolutionBwdFilterAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc,
+    cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdFilterAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t,
+      size_t, cudnnConvolutionBwdFilterAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
-                                              const cudnnTensorDescriptor_t srcDesc,
-                                              const cudnnTensorDescriptor_t diffDesc,
-                                              const cudnnConvolutionDescriptor_t convDesc,
-                                              const cudnnFilterDescriptor_t gradDesc,
-                                              const int requestedAlgoCount,
-                                              int *returnedAlgoCount,
-                                              cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle,
-                                               const cudnnTensorDescriptor_t xDesc,
-                                               const cudnnTensorDescriptor_t dyDesc,
-                                               const cudnnConvolutionDescriptor_t convDesc,
-                                               const cudnnFilterDescriptor_t gradDesc,
-                                               cudnnConvolutionBwdFilterAlgo_t algo,
-                                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardFilter(cudnnHandle_t handle,
-                               const void *alpha,
-                               const cudnnTensorDescriptor_t xDesc,
-                               const void *x,
-                               const cudnnTensorDescriptor_t dyDesc,
-                               const void *dy,
-                               const cudnnConvolutionDescriptor_t convDesc,
-                               cudnnConvolutionBwdFilterAlgo_t algo,
-                               void *workSpace,
-                               size_t workSpaceSizeInBytes,
-                               const void *beta,
-                               const cudnnFilterDescriptor_t dwDesc,
-                               void *dw) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnFilterDescriptor_t dwDesc, void *dw) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t,
+      void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
+  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
-                                          const cudnnFilterDescriptor_t wDesc,
-                                          const cudnnTensorDescriptor_t dyDesc,
-                                          const cudnnConvolutionDescriptor_t convDesc,
-                                          const cudnnTensorDescriptor_t dxDesc,
-                                          const int requestedAlgoCount,
-                                          int *returnedAlgoCount,
-                                          cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
-                                            const cudnnFilterDescriptor_t wDesc,
-                                            const void *w,
-                                            const cudnnTensorDescriptor_t dyDesc,
-                                            const void *dy,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnTensorDescriptor_t dxDesc,
-                                            void *dx,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
-                                            void *workSpace,
-                                            size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
-                                         const cudnnFilterDescriptor_t wDesc,
-                                         const cudnnTensorDescriptor_t dyDesc,
-                                         const cudnnConvolutionDescriptor_t convDesc,
-                                         const cudnnTensorDescriptor_t dxDesc,
-                                         cudnnConvolutionBwdDataPreference_t preference,
-                                         size_t memoryLimitInBytes,
-                                         cudnnConvolutionBwdDataAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, size_t, cudnnConvolutionBwdDataAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdDataAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t,
+      size_t, cudnnConvolutionBwdDataAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
-                                            const cudnnFilterDescriptor_t filterDesc,
-                                            const cudnnTensorDescriptor_t diffDesc,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnTensorDescriptor_t gradDesc,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
-                                             const cudnnFilterDescriptor_t wDesc,
-                                             const cudnnTensorDescriptor_t dyDesc,
-                                             const cudnnConvolutionDescriptor_t convDesc,
-                                             const cudnnTensorDescriptor_t dxDesc,
-                                             cudnnConvolutionBwdDataAlgo_t algo,
-                                             size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardData(cudnnHandle_t handle,
-                             const void *alpha,
-                             const cudnnFilterDescriptor_t wDesc,
-                             const void *w,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const cudnnConvolutionDescriptor_t convDesc,
-                             cudnnConvolutionBwdDataAlgo_t algo,
-                             void *workSpace,
-                             size_t workSpaceSizeInBytes,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dxDesc,
-                             void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
+  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnIm2Col(cudnnHandle_t handle,
-            const cudnnTensorDescriptor_t xDesc,
-            const void *x,
-            const cudnnFilterDescriptor_t wDesc,
-            const cudnnConvolutionDescriptor_t convDesc,
-            void *colBuffer) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, void *);
+cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+            const void *x, const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t,
+                                   const void *, const cudnnFilterDescriptor_t,
+                                   const cudnnConvolutionDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnIm2Col");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSoftmaxForward(cudnnHandle_t handle,
-                    cudnnSoftmaxAlgorithm_t algo,
-                    cudnnSoftmaxMode_t mode,
-                    const void *alpha,
-                    const cudnnTensorDescriptor_t xDesc,
-                    const void *x,
-                    const void *beta,
-                    const cudnnTensorDescriptor_t yDesc,
-                    void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSoftmaxBackward(cudnnHandle_t handle,
-                     cudnnSoftmaxAlgorithm_t algo,
-                     cudnnSoftmaxMode_t mode,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t dyDesc,
-                     const void *dy,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t dxDesc,
-                     void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, dx);
+  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc,
+                  dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
-                            cudnnPoolingMode_t mode,
-                            cudnnNanPropagation_t maxpoolingNanOpt,
-                            int windowHeight,
-                            int windowWidth,
-                            int verticalPadding,
-                            int horizontalPadding,
-                            int verticalStride,
-                            int horizontalStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(
+    cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode,
+    cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth,
+    int verticalPadding, int horizontalPadding, int verticalStride,
+    int horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int,
+      int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
-                            cudnnPoolingMode_t *mode,
-                            cudnnNanPropagation_t *maxpoolingNanOpt,
-                            int *windowHeight,
-                            int *windowWidth,
-                            int *verticalPadding,
-                            int *horizontalPadding,
-                            int *verticalStride,
-                            int *horizontalStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(
+    const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode,
+    cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight,
+    int *windowWidth, int *verticalPadding, int *horizontalPadding,
+    int *verticalStride, int *horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
-                            const cudnnPoolingMode_t mode,
-                            const cudnnNanPropagation_t maxpoolingNanOpt,
-                            int nbDims,
-                            const int windowDimA[],
-                            const int paddingA[],
-                            const int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int [], const int [], const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(
+    cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode,
+    const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims,
+    const int windowDimA[], const int paddingA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, const cudnnPoolingMode_t,
+      const cudnnNanPropagation_t, int, const int[], const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA,
+                  paddingA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
-                            int nbDimsRequested,
-                            cudnnPoolingMode_t *mode,
-                            cudnnNanPropagation_t *maxpoolingNanOpt,
-                            int *nbDims,
-                            int windowDimA[],
-                            int paddingA[],
-                            int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int [], int [], int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(
+    const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested,
+    cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt,
+    int *nbDims, int windowDimA[], int paddingA[], int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int[], int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims,
+                  windowDimA, paddingA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
                                   const cudnnTensorDescriptor_t inputTensorDesc,
-                                  int nbDims,
-                                  int outputTensorDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
+                                  int nbDims, int outputTensorDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                   const cudnnTensorDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA);
 }
@@ -1062,72 +1102,69 @@ cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
 cudnnStatus_t CUDNNWINAPI
 cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
                                   const cudnnTensorDescriptor_t inputTensorDesc,
-                                  int *n,
-                                  int *c,
-                                  int *h,
-                                  int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
+                                  int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnPoolingForward(cudnnHandle_t handle,
-                    const cudnnPoolingDescriptor_t poolingDesc,
-                    const void *alpha,
-                    const cudnnTensorDescriptor_t xDesc,
-                    const void *x,
-                    const void *beta,
-                    const cudnnTensorDescriptor_t yDesc,
-                    void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnPoolingBackward(cudnnHandle_t handle,
-                     const cudnnPoolingDescriptor_t poolingDesc,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t dyDesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t xDesc,
-                     const void *x,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t dxDesc,
-                     void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
-                             cudnnActivationMode_t mode,
-                             cudnnNanPropagation_t reluNanOpt,
-                             double coef) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t, cudnnActivationMode_t, cudnnNanPropagation_t, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(
+    cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode,
+    cudnnNanPropagation_t reluNanOpt, double coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t,
+                                               cudnnActivationMode_t,
+                                               cudnnNanPropagation_t, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
@@ -1136,9 +1173,10 @@ cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
 cudnnStatus_t CUDNNWINAPI
 cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
                              cudnnActivationMode_t *mode,
-                             cudnnNanPropagation_t *reluNanOpt,
-                             double *coef) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnActivationDescriptor_t, cudnnActivationMode_t *, cudnnNanPropagation_t *, double *);
+                             cudnnNanPropagation_t *reluNanOpt, double *coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnActivationDescriptor_t, cudnnActivationMode_t *,
+      cudnnNanPropagation_t *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
@@ -1146,65 +1184,68 @@ cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnActivationForward(cudnnHandle_t handle,
-                       cudnnActivationDescriptor_t activationDesc,
-                       const void *alpha,
-                       const cudnnTensorDescriptor_t xDesc,
-                       const void *x,
-                       const void *beta,
-                       const cudnnTensorDescriptor_t yDesc,
-                       void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnActivationForward(
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnActivationBackward(cudnnHandle_t handle,
-                        cudnnActivationDescriptor_t activationDesc,
-                        const void *alpha,
-                        const cudnnTensorDescriptor_t yDesc,
-                        const void *y,
-                        const cudnnTensorDescriptor_t dyDesc,
-                        const void *dy,
-                        const cudnnTensorDescriptor_t xDesc,
-                        const void *x,
-                        const void *beta,
-                        const cudnnTensorDescriptor_t dxDesc,
-                        void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int, double, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned lrnN, double lrnAlpha,
+                                                double lrnBeta, double lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int, double, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned *lrnN,
+                                                double *lrnAlpha,
+                                                double *lrnBeta, double *lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
@@ -1212,157 +1253,157 @@ cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrn
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(lrnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnLRNCrossChannelForward(cudnnHandle_t handle,
-                            cudnnLRNDescriptor_t normDesc,
-                            cudnnLRNMode_t lrnMode,
-                            const void *alpha,
-                            const cudnnTensorDescriptor_t xDesc,
-                            const void *x,
-                            const void *beta,
-                            const cudnnTensorDescriptor_t yDesc,
-                            void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
-                             cudnnLRNDescriptor_t normDesc,
-                             cudnnLRNMode_t lrnMode,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t yDesc,
-                             const void *y,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const cudnnTensorDescriptor_t xDesc,
-                             const void *x,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dxDesc,
-                             void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc,
+                  x, beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
-                                  cudnnLRNDescriptor_t normDesc,
-                                  cudnnDivNormMode_t mode,
-                                  const void *alpha,
-                                  const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
-                                  const void *x,
-                                  const void *means, /* if NULL, means are assumed to be zero */
-                                  void *temp,
-                                  void *temp2,
-                                  const void *beta,
-                                  const cudnnTensorDescriptor_t yDesc,
-                                  void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
+cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, void *, void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, beta, yDesc, y);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2,
+                  beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
-                                   cudnnLRNDescriptor_t normDesc,
-                                   cudnnDivNormMode_t mode,
-                                   const void *alpha,
-                                   const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
-                                   const void *x,
-                                   const void *means, /* if NULL, means are assumed to be zero */
-                                   const void *dy,
-                                   void *temp,
-                                   void *temp2,
-                                   const void *beta,
-                                   const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
-                                   void *dx,                                   /* output x differential */
-                                   void *dMeans) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
+cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t
+        xDesc, /* same desc for x, means, dy, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    const void *dy, void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
+    void *dx,                                   /* output x differential */
+    void *dMeans) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *, void *, const void *, const cudnnTensorDescriptor_t, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, temp2, beta, dXdMeansDesc, dx, dMeans);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp,
+                  temp2, beta, dXdMeansDesc, dx, dMeans);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
-                              const cudnnTensorDescriptor_t xDesc,
-                              cudnnBatchNormMode_t mode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, cudnnBatchNormMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(
+    cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc,
+    cudnnBatchNormMode_t mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               cudnnBatchNormMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDeriveBNTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(derivedBnDesc, xDesc, mode);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
-                                                         cudnnBatchNormMode_t mode,
-                                                         cudnnBatchNormOps_t bnOps,
-                                                         const cudnnTensorDescriptor_t xDesc,
-                                                         const cudnnTensorDescriptor_t zDesc,
-                                                         const cudnnTensorDescriptor_t yDesc,
-                                                         const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                                         const cudnnActivationDescriptor_t activationDesc,
-                                                         size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnActivationDescriptor_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize");
+cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
+    const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
+    const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnActivationDescriptor_t, size_t *);
+  static auto func_ptr = LoadSymbol<FuncPtr>(
+      "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes);
+  return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc,
+                  bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
-                                                  cudnnBatchNormMode_t mode,
-                                                  cudnnBatchNormOps_t bnOps,
-                                                  const cudnnTensorDescriptor_t xDesc,
-                                                  const cudnnTensorDescriptor_t yDesc,
-                                                  const cudnnTensorDescriptor_t dyDesc,
-                                                  const cudnnTensorDescriptor_t dzDesc,
-                                                  const cudnnTensorDescriptor_t dxDesc,
-                                                  const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                                  const cudnnActivationDescriptor_t activationDesc,
-                                                  size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnActivationDescriptor_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationBackwardExWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
+    const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc,
+    const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnActivationDescriptor_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationBackwardExWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, dBnScaleBiasDesc, activationDesc, sizeInBytes);
+  return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc,
+                  dBnScaleBiasDesc, activationDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
-                                                     cudnnBatchNormMode_t mode,
-                                                     cudnnBatchNormOps_t bnOps,
-                                                     const cudnnActivationDescriptor_t activationDesc,
-                                                     const cudnnTensorDescriptor_t xDesc,
-                                                     size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationTrainingExReserveSpaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *);
+  static auto func_ptr = LoadSymbol<FuncPtr>(
+      "cudnnGetBatchNormalizationTrainingExReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardTraining(
-    cudnnHandle_t handle,
-    cudnnBatchNormMode_t mode,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
 
     const void *alpha, /* alpha[0] = result blend factor */
     const void *beta,  /* beta[0] = dest layer blend factor */
 
-    const cudnnTensorDescriptor_t xDesc,
-    const void *x, /* NxCxHxW */
-    const cudnnTensorDescriptor_t yDesc,
-    void *y, /* NxCxHxW */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
 
     /* Shared desc for the next 6 tensors in the argument list.
        Data type to be set as follows:
@@ -1370,13 +1411,13 @@ cudnnBatchNormalizationForwardTraining(
        Dimensions for this descriptor depend on normalization mode
        - Spatial Normalization : tensors are expected to have dims 1xCx1x1
         (normalization is performed across NxHxW)
-       - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
-        (normalization is performed across N) */
+       - Per-Activation Normalization : tensors are expected to have dims of
+       1xCxHxW (normalization is performed across N) */
     const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
 
-    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
-    const void *bnScale,
-    const void *bnBias,
+    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
+     */
+    const void *bnScale, const void *bnBias,
 
     /* MUST use factor=1 in the very first call of a complete training cycle.
        Use a factor=1/(1+n) at N-th call to the function to get
@@ -1394,248 +1435,261 @@ cudnnBatchNormalizationForwardTraining(
        of  variance[x] (factor is applied in the same way as for runningMean) */
     void *resultRunningVariance,
 
-    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
     double epsilon,
 
     /* Optionally save intermediate results from the forward pass here
        - can be reused to speed up backward pass. NULL if unused */
-    void *resultSaveMean,
-    void *resultSaveInvVariance) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
+    void *resultSaveMean, void *resultSaveInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
+  return func_ptr(
+      handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc,
+      bnScale, bnBias, exponentialAverageFactor, resultRunningMean,
+      resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardTrainingEx(
-    cudnnHandle_t handle,
-    cudnnBatchNormMode_t mode,
-    cudnnBatchNormOps_t bnOps,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
 
     const void *alpha, /* alpha[0] = result blend factor */
     const void *beta,  /* beta[0] = dest layer blend factor */
 
-    const cudnnTensorDescriptor_t xDesc,
-    const void *xData,
-    const cudnnTensorDescriptor_t zDesc,
-    const void *zData,
-    const cudnnTensorDescriptor_t yDesc,
-    void *yData,
+    const cudnnTensorDescriptor_t xDesc, const void *xData,
+    const cudnnTensorDescriptor_t zDesc, const void *zData,
+    const cudnnTensorDescriptor_t yDesc, void *yData,
 
-    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-    const void *bnScale,
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
     const void *bnBias,
 
-    double exponentialAverageFactor,
-    void *resultRunningMean,
+    double exponentialAverageFactor, void *resultRunningMean,
     void *resultRunningVariance,
 
-    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
     double epsilon,
 
     /* Optionally save intermediate results from the forward pass here
        - can be reused to speed up backward pass. NULL if unused */
-    void *resultSaveMean,
-    void *resultSaveInvVariance,
+    void *resultSaveMean, void *resultSaveInvVariance,
 
-    cudnnActivationDescriptor_t activationDesc,
-    void *workspace,
-    size_t workSpaceSizeInBytes,
-    void *reserveSpace,
+    cudnnActivationDescriptor_t activationDesc, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTrainingEx");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTrainingEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance, activationDesc, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData,
+                  yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias,
+                  exponentialAverageFactor, resultRunningMean,
+                  resultRunningVariance, epsilon, resultSaveMean,
+                  resultSaveInvVariance, activationDesc, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
-                                        cudnnBatchNormMode_t mode,
-                                        const void *alpha, /* alpha[0] = result blend factor */
-                                        const void *beta,  /* beta[0] = dest layer blend factor */
-                                        const cudnnTensorDescriptor_t xDesc,
-                                        const void *x, /* NxCxHxW */
-                                        const cudnnTensorDescriptor_t yDesc,
-                                        void *y, /* NxCxHxW */
-                                        const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                        const void *bnScale,
-                                        const void *bnBias,
-                                        const void *estimatedMean,
-                                        const void *estimatedVariance,
-                                        double epsilon) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, const void *, double);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+    const void *bnBias, const void *estimatedMean,
+    const void *estimatedVariance, double epsilon) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, const void *, const void *, double);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, estimatedVariance, epsilon);
+  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y,
+                  bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean,
+                  estimatedVariance, epsilon);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationBackward(cudnnHandle_t handle,
-                                cudnnBatchNormMode_t mode,
-                                const void *alphaDataDiff,
-                                const void *betaDataDiff,
-                                const void *alphaParamDiff,
-                                const void *betaParamDiff,
-                                const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
-                                const void *x,
-                                const cudnnTensorDescriptor_t dyDesc,
-                                const void *dy,
-                                const cudnnTensorDescriptor_t dxDesc,
-                                void *dx,
-                                /* Shared tensor desc for the 4 tensors below */
-                                const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                const void *bnScale, /* bnBias doesn't affect backpropagation */
-                                /* scale and bias diff are not backpropagated below this layer */
-                                void *dBnScaleResult,
-                                void *dBnBiasResult,
-                                /* Same epsilon as forward pass */
-                                double epsilon,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff,
+    const void *betaDataDiff, const void *alphaParamDiff,
+    const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
+    const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const void *bnScale, /* bnBias doesn't affect backpropagation */
+    /* scale and bias diff are not backpropagated below this layer */
+    void *dBnScaleResult, void *dBnBiasResult,
+    /* Same epsilon as forward pass */
+    double epsilon,
 
-                                /* Optionally cached intermediate results from
-                                   forward pass */
-                                const void *savedMean,
-                                const void *savedInvVariance) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, void *, void *, double, const void *, const void *);
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const void *, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, void *, void *, double, const void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, epsilon, savedMean, savedInvVariance);
+  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff,
+                  betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx,
+                  dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult,
+                  epsilon, savedMean, savedInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
-                                  cudnnBatchNormMode_t mode,
-                                  cudnnBatchNormOps_t bnOps,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
 
-                                  const void *alphaDataDiff,
-                                  const void *betaDataDiff,
-                                  const void *alphaParamDiff,
-                                  const void *betaParamDiff,
-                                  const cudnnTensorDescriptor_t xDesc,
-                                  const void *xData,
-                                  const cudnnTensorDescriptor_t yDesc,
-                                  const void *yData,
-                                  const cudnnTensorDescriptor_t dyDesc,
-                                  const void *dyData,
-                                  const cudnnTensorDescriptor_t dzDesc,
-                                  void *dzData,
-                                  const cudnnTensorDescriptor_t dxDesc,
-                                  void *dxData,
+    const void *alphaDataDiff, const void *betaDataDiff,
+    const void *alphaParamDiff, const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, const void *xData,
+    const cudnnTensorDescriptor_t yDesc, const void *yData,
+    const cudnnTensorDescriptor_t dyDesc, const void *dyData,
+    const cudnnTensorDescriptor_t dzDesc, void *dzData,
+    const cudnnTensorDescriptor_t dxDesc, void *dxData,
 
-                                  /* Shared tensor desc for the 4 tensors below */
-                                  const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                  const void *bnScaleData,
-                                  const void *bnBiasData, /* needed if there is activation */
-                                  void *dBnScaleData,
-                                  void *dBnBiasData,
-                                  double epsilon, /* Same epsilon as forward pass */
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData,
+    const void *bnBiasData, /* needed if there is activation */
+    void *dBnScaleData, void *dBnBiasData,
+    double epsilon, /* Same epsilon as forward pass */
 
-                                  /* Optionally cached intermediate results from
-                                     forward pass */
-                                  const void *savedMean,
-                                  const void *savedInvVariance,
-                                  cudnnActivationDescriptor_t activationDesc,
-                                  void *workSpace,
-                                  size_t workSpaceSizeInBytes,
-                                  void *reserveSpace,
-                                  size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, double, const void *, const void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackwardEx");
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance,
+    cudnnActivationDescriptor_t activationDesc, void *workSpace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *,
+      const void *, const void *, const void *, const cudnnTensorDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, const void *, const void *, void *,
+      void *, double, const void *, const void *, cudnnActivationDescriptor_t,
+      void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackwardEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(
+      handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff,
+      betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData,
+      dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData,
+      dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc,
+      workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(
+    cudnnSpatialTransformerDescriptor_t *stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
-                                       cudnnSamplerType_t samplerType,
-                                       cudnnDataType_t dataType,
-                                       const int nbDims,
-                                       const int dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, const int, const int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(
+    cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType,
+    cudnnDataType_t dataType, const int nbDims, const int dimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t,
+      const int, const int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc, samplerType, dataType, nbDims, dimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(
+    cudnnSpatialTransformerDescriptor_t stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
-                                   const cudnnSpatialTransformerDescriptor_t stDesc,
-                                   const void *theta,
-                                   void *grid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *theta, void *grid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, theta, grid);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
-                                    const cudnnSpatialTransformerDescriptor_t stDesc,
-                                    const void *dgrid,
-                                    void *dtheta) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *dgrid, void *dtheta) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, dgrid, dtheta);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
-                             cudnnSpatialTransformerDescriptor_t stDesc,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t xDesc,
-                             const void *x,
-                             const void *grid,
-                             const void *beta,
-                             cudnnTensorDescriptor_t yDesc,
-                             void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc,
+    void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
-                              cudnnSpatialTransformerDescriptor_t stDesc,
-                              const void *alpha,
-                              const cudnnTensorDescriptor_t xDesc,
-                              const void *x,
-                              const void *beta,
-                              const cudnnTensorDescriptor_t dxDesc,
-                              void *dx,
-                              const void *alphaDgrid,
-                              const cudnnTensorDescriptor_t dyDesc,
-                              const void *dy,
-                              const void *grid,
-                              const void *betaDgrid,
-                              void *dgrid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc,
+    const void *dy, const void *grid, const void *betaDgrid, void *dgrid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, dyDesc, dy, grid, betaDgrid, dgrid);
+  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid,
+                  dyDesc, dy, grid, betaDgrid, dgrid);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
@@ -1643,99 +1697,95 @@ cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle,
+                                                    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetStatesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(
+    cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(xdesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                          cudnnHandle_t handle,
-                          float dropout,
-                          void *states,
-                          size_t stateSizeInBytes,
-                          unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                              cudnnHandle_t handle,
-                              float dropout,
-                              void *states,
-                              size_t stateSizeInBytes,
-                              unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                          cudnnHandle_t handle,
-                          float *dropout,
-                          void **states,
-                          unsigned long long *seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float *, void **, unsigned long long *);
+cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout,
+    void **states, unsigned long long *seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float *, void **, unsigned long long *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutForward(cudnnHandle_t handle,
-                    const cudnnDropoutDescriptor_t dropoutDesc,
-                    const cudnnTensorDescriptor_t xdesc,
-                    const void *x,
-                    const cudnnTensorDescriptor_t ydesc,
-                    void *y,
-                    void *reserveSpace,
-                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t xdesc, const void *x,
+    const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutBackward(cudnnHandle_t handle,
-                     const cudnnDropoutDescriptor_t dropoutDesc,
-                     const cudnnTensorDescriptor_t dydesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t dxdesc,
-                     void *dx,
-                     void *reserveSpace,
-                     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t dydesc, const void *dy,
+    const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
@@ -1743,184 +1793,192 @@ cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNForwardInferenceAlgorithmEx(cudnnHandle_t handle,
-                                        const cudnnRNNDescriptor_t rnnDesc,
-                                        const int seqLength,
-                                        const cudnnTensorDescriptor_t *xDesc,
-                                        const void *x,
-                                        const cudnnTensorDescriptor_t hxDesc,
-                                        const void *hx,
-                                        const cudnnTensorDescriptor_t cxDesc,
-                                        const void *cx,
-                                        const cudnnFilterDescriptor_t wDesc,
-                                        const void *w,
-                                        const cudnnTensorDescriptor_t *yDesc,
-                                        void *y,
-                                        const cudnnTensorDescriptor_t hyDesc,
-                                        void *hy,
-                                        const cudnnTensorDescriptor_t cyDesc,
-                                        void *cy,
-                                        const float findIntensity,
-                                        const int requestedAlgoCount,
-                                        int *returnedAlgoCount,
-                                        cudnnAlgorithmPerformance_t *perfResults,
-                                        void *workspace,
-                                        size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNForwardTrainingAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnRNNDescriptor_t rnnDesc,
-                                       const int seqLength,
-                                       const cudnnTensorDescriptor_t *xDesc,
-                                       const void *x,
-                                       const cudnnTensorDescriptor_t hxDesc,
-                                       const void *hx,
-                                       const cudnnTensorDescriptor_t cxDesc,
-                                       const void *cx,
-                                       const cudnnFilterDescriptor_t wDesc,
-                                       const void *w,
-                                       const cudnnTensorDescriptor_t *yDesc,
-                                       void *y,
-                                       const cudnnTensorDescriptor_t hyDesc,
-                                       void *hy,
-                                       const cudnnTensorDescriptor_t cyDesc,
-                                       void *cy,
-                                       const float findIntensity,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnAlgorithmPerformance_t *perfResults,
-                                       void *workspace,
-                                       size_t workSpaceSizeInBytes,
-                                       void *reserveSpace,
-                                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNBackwardDataAlgorithmEx(cudnnHandle_t handle,
-                                    const cudnnRNNDescriptor_t rnnDesc,
-                                    const int seqLength,
-                                    const cudnnTensorDescriptor_t *yDesc,
-                                    const void *y,
-                                    const cudnnTensorDescriptor_t *dyDesc,
-                                    const void *dy,
-                                    const cudnnTensorDescriptor_t dhyDesc,
-                                    const void *dhy,
-                                    const cudnnTensorDescriptor_t dcyDesc,
-                                    const void *dcy,
-                                    const cudnnFilterDescriptor_t wDesc,
-                                    const void *w,
-                                    const cudnnTensorDescriptor_t hxDesc,
-                                    const void *hx,
-                                    const cudnnTensorDescriptor_t cxDesc,
-                                    const void *cx,
-                                    const cudnnTensorDescriptor_t *dxDesc,
-                                    void *dx,
-                                    const cudnnTensorDescriptor_t dhxDesc,
-                                    void *dhx,
-                                    const cudnnTensorDescriptor_t dcxDesc,
-                                    void *dcx,
-                                    const float findIntensity,
-                                    const int requestedAlgoCount,
-                                    int *returnedAlgoCount,
-                                    cudnnAlgorithmPerformance_t *perfResults,
-                                    void *workspace,
-                                    size_t workSpaceSizeInBytes,
-                                    void *reserveSpace,
-                                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const cudnnTensorDescriptor_t *dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnTensorDescriptor_t *dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnRNNDescriptor_t rnnDesc,
-                                       const int seqLength,
-                                       const cudnnTensorDescriptor_t *xDesc,
-                                       const void *x,
-                                       const cudnnTensorDescriptor_t hxDesc,
-                                       const void *hx,
-                                       const cudnnTensorDescriptor_t *yDesc,
-                                       const void *y,
-                                       const float findIntensity,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnAlgorithmPerformance_t *perfResults,
-                                       const void *workspace,
-                                       size_t workSpaceSizeInBytes,
-                                       const cudnnFilterDescriptor_t dwDesc,
-                                       void *dw,
-                                       const void *reserveSpace,
-                                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const float findIntensity, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults,
+    const void *workspace, size_t workSpaceSizeInBytes,
+    const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  findIntensity, requestedAlgoCount, returnedAlgoCount,
+                  perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                             const int minibatch,
-                             const cudnnDataType_t dataType,
-                             cudnnPersistentRNNPlan_t *plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, const cudnnDataType_t, cudnnPersistentRNNPlan_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, const int minibatch,
+    const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int,
+                                               const cudnnDataType_t,
+                                               cudnnPersistentRNNPlan_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, minibatch, dataType, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, plan);
@@ -1928,289 +1986,285 @@ cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor(cudnnHandle_t handle,
-                      cudnnRNNDescriptor_t rnnDesc,
-                      const int hiddenSize,
-                      const int numLayers,
-                      cudnnDropoutDescriptor_t dropoutDesc, /* Between layers, not between recurrent steps. */
-                      cudnnRNNInputMode_t inputMode,
-                      cudnnDirectionMode_t direction,
-                      cudnnRNNMode_t mode,
-                      cudnnRNNAlgo_t algo,
-                      cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers,
+    cudnnDropoutDescriptor_t
+        dropoutDesc, /* Between layers, not between recurrent steps. */
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNProjectionLayers(cudnnHandle_t handle,
-                            cudnnRNNDescriptor_t rnnDesc,
-                            const int recProjSize,
-                            const int outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
+cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+                            const int recProjSize, const int outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNProjectionLayers(cudnnHandle_t handle,
-                            const cudnnRNNDescriptor_t rnnDesc,
-                            int *recProjSize,
-                            int *outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize,
+    int *outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNAlgorithmDescriptor(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNDescriptor(cudnnHandle_t handle,
-                      cudnnRNNDescriptor_t rnnDesc,
-                      int *hiddenSize,
-                      int *numLayers,
-                      cudnnDropoutDescriptor_t *dropoutDesc,
-                      cudnnRNNInputMode_t *inputMode,
-                      cudnnDirectionMode_t *direction,
-                      cudnnRNNMode_t *mode,
-                      cudnnRNNAlgo_t *algo,
-                      cudnnDataType_t *dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize,
+    int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc,
+    cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction,
+    cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *,
+      cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *,
+      cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType(
+    cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNWorkspaceSize(cudnnHandle_t handle,
-                         const cudnnRNNDescriptor_t rnnDesc,
-                         const int seqLength,
-                         const cudnnTensorDescriptor_t *xDesc,
-                         size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNTrainingReserveSize(cudnnHandle_t handle,
-                               const cudnnRNNDescriptor_t rnnDesc,
-                               const int seqLength,
-                               const cudnnTensorDescriptor_t *xDesc,
-                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNTrainingReserveSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNParamsSize(cudnnHandle_t handle,
-                      const cudnnRNNDescriptor_t rnnDesc,
-                      const cudnnTensorDescriptor_t xDesc,
-                      size_t *sizeInBytes,
+cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                      const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes,
                       cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, size_t *, cudnnDataType_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNParamsSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNLinLayerMatrixParams(cudnnHandle_t handle,
-                                const cudnnRNNDescriptor_t rnnDesc,
-                                const int pseudoLayer,
-                                const cudnnTensorDescriptor_t xDesc,
-                                const cudnnFilterDescriptor_t wDesc,
-                                const void *w,
-                                const int linLayerID,
-                                cudnnFilterDescriptor_t linLayerMatDesc,
-                                void **linLayerMat) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerMatrixParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerMatDesc, linLayerMat);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerMatDesc, linLayerMat);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNLinLayerBiasParams(cudnnHandle_t handle,
-                              const cudnnRNNDescriptor_t rnnDesc,
-                              const int pseudoLayer,
-                              const cudnnTensorDescriptor_t xDesc,
-                              const cudnnFilterDescriptor_t wDesc,
-                              const void *w,
-                              const int linLayerID,
-                              cudnnFilterDescriptor_t linLayerBiasDesc,
-                              void **linLayerBias) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerBiasParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerBiasDesc, linLayerBias);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerBiasDesc, linLayerBias);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardInference(cudnnHandle_t handle,
-                         const cudnnRNNDescriptor_t rnnDesc,
-                         const int seqLength,
-                         const cudnnTensorDescriptor_t *xDesc,
-                         const void *x,
-                         const cudnnTensorDescriptor_t hxDesc,
-                         const void *hx,
-                         const cudnnTensorDescriptor_t cxDesc,
-                         const void *cx,
-                         const cudnnFilterDescriptor_t wDesc,
-                         const void *w,
-                         const cudnnTensorDescriptor_t *yDesc,
-                         void *y,
-                         const cudnnTensorDescriptor_t hyDesc,
-                         void *hy,
-                         const cudnnTensorDescriptor_t cyDesc,
-                         void *cy,
-                         void *workspace,
-                         size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardTraining(cudnnHandle_t handle,
-                        const cudnnRNNDescriptor_t rnnDesc,
-                        const int seqLength,
-                        const cudnnTensorDescriptor_t *xDesc,
-                        const void *x,
-                        const cudnnTensorDescriptor_t hxDesc,
-                        const void *hx,
-                        const cudnnTensorDescriptor_t cxDesc,
-                        const void *cx,
-                        const cudnnFilterDescriptor_t wDesc,
-                        const void *w,
-                        const cudnnTensorDescriptor_t *yDesc,
-                        void *y,
-                        const cudnnTensorDescriptor_t hyDesc,
-                        void *hy,
-                        const cudnnTensorDescriptor_t cyDesc,
-                        void *cy,
-                        void *workspace,
-                        size_t workSpaceSizeInBytes,
-                        void *reserveSpace,
-                        size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardData(cudnnHandle_t handle,
-                     const cudnnRNNDescriptor_t rnnDesc,
-                     const int seqLength,
-                     const cudnnTensorDescriptor_t *yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t *dyDesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t dhyDesc,
-                     const void *dhy,
-                     const cudnnTensorDescriptor_t dcyDesc,
-                     const void *dcy,
-                     const cudnnFilterDescriptor_t wDesc,
-                     const void *w,
-                     const cudnnTensorDescriptor_t hxDesc,
-                     const void *hx,
-                     const cudnnTensorDescriptor_t cxDesc,
-                     const void *cx,
-                     const cudnnTensorDescriptor_t *dxDesc,
-                     void *dx,
-                     const cudnnTensorDescriptor_t dhxDesc,
-                     void *dhx,
-                     const cudnnTensorDescriptor_t dcxDesc,
-                     void *dcx,
-                     void *workspace,
-                     size_t workSpaceSizeInBytes,
-                     void *reserveSpace,
-                     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                     const int seqLength, const cudnnTensorDescriptor_t *yDesc,
+                     const void *y, const cudnnTensorDescriptor_t *dyDesc,
+                     const void *dy, const cudnnTensorDescriptor_t dhyDesc,
+                     const void *dhy, const cudnnTensorDescriptor_t dcyDesc,
+                     const void *dcy, const cudnnFilterDescriptor_t wDesc,
+                     const void *w, const cudnnTensorDescriptor_t hxDesc,
+                     const void *hx, const cudnnTensorDescriptor_t cxDesc,
+                     const void *cx, const cudnnTensorDescriptor_t *dxDesc,
+                     void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+                     const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+                     void *workspace, size_t workSpaceSizeInBytes,
+                     void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardWeights(cudnnHandle_t handle,
-                        const cudnnRNNDescriptor_t rnnDesc,
-                        const int seqLength,
-                        const cudnnTensorDescriptor_t *xDesc,
-                        const void *x,
-                        const cudnnTensorDescriptor_t hxDesc,
-                        const void *hx,
-                        const cudnnTensorDescriptor_t *yDesc,
-                        const void *y,
-                        const void *workspace,
-                        size_t workSpaceSizeInBytes,
-                        const cudnnFilterDescriptor_t dwDesc,
-                        void *dw,
-                        const void *reserveSpace,
-                        size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
@@ -2218,82 +2272,102 @@ cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCTCLoss(
+cudnnStatus_t CUDNNWINAPI cudnnCTCLoss(
     cudnnHandle_t handle,
     const cudnnTensorDescriptor_t
-        probsDesc,     /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
-                          mini batch size, A is the alphabet size)  */
-    const void *probs, /* probabilities after softmax, in GPU memory */
-    const int *labels, /* labels, in CPU memory */
-    const int *labelLengths,                     /* the length of each label, in CPU memory */
-    const int *inputLengths,                     /* the lengths of timing steps in each batch, in CPU memory */
-    void *costs,                                 /* the returned costs of CTC, in GPU memory */
-    const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
-    const void *gradients,   /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the
+                      mini batch size, A is the alphabet size)  */
+    const void *probs,       /* probabilities after softmax, in GPU memory */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    void *costs,             /* the returned costs of CTC, in GPU memory */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A */
+    const void *gradients,   /* the returned CTC gradients, in GPU memory, to
+                                compute costs only, set it to NULL */
     cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
     cudnnCTCLossDescriptor_t ctcLossDesc,
-    void *workspace,              /* pointer to the workspace, in GPU memory */
+    void *workspace, /* pointer to the workspace, in GPU memory */
     size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, const int *, const int *, void *, const cudnnTensorDescriptor_t, const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *,
+      const int *, const int *, void *, const cudnnTensorDescriptor_t,
+      const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCTCLoss");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths,
+                  costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossWorkspaceSize(
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize(
     cudnnHandle_t handle,
-    const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
-                                                timing steps, N is the mini batch size, A is the alphabet size) */
-    const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
-                                                    dimensions are T,N,A. To compute costs
-                                                    only, set it to NULL */
-    const int *labels,                           /* labels, in CPU memory */
-    const int *labelLengths,                     /* the length of each label, in CPU memory */
-    const int *inputLengths,                     /* the lengths of timing steps in each batch, in CPU memory */
-    cudnnCTCLossAlgo_t algo,                     /* algorithm selected, supported now 0 and 1 */
-    cudnnCTCLossDescriptor_t ctcLossDesc,
-    size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const int *, const int *, const int *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the
+                      timing steps, N is the mini batch size, A is the alphabet
+                      size) */
+    const cudnnTensorDescriptor_t
+        gradientsDesc,       /* Tensor descriptor for gradients, the
+                                dimensions are T,N,A. To compute costs
+                                only, set it to NULL */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const int *, const int *, const int *,
+      cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc, sizeInBytes);
+  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths,
+                  inputLengths, algo, ctcLossDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor(
+    cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor(
+    const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor(
+    const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCopyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(src, dest);
@@ -2301,135 +2375,141 @@ cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorith
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToCreate);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
-                             cudnnAlgorithmDescriptor_t algoDesc,
-                             cudnnStatus_t status,
-                             float time,
-                             size_t memory) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t, cudnnStatus_t, float, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc,
+    cudnnStatus_t status, float time, size_t memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t,
+                                               cudnnAlgorithmDescriptor_t,
+                                               cudnnStatus_t, float, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
-                             cudnnAlgorithmDescriptor_t *algoDesc,
-                             cudnnStatus_t *status,
-                             float *time,
-                             size_t *memory) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, cudnnStatus_t *, float *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance(
+    const cudnnAlgorithmPerformance_t algoPerf,
+    cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time,
+    size_t *memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *,
+      cudnnStatus_t *, float *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToDestroy);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize(
+    cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+    size_t *algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnSaveAlgorithm(cudnnHandle_t handle,
-                   cudnnAlgorithmDescriptor_t algoDesc,
-                   void *algoSpace,
-                   size_t algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
+cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+                   void *algoSpace, size_t algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSaveAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRestoreAlgorithm(cudnnHandle_t handle,
-                      void *algoSpace,
-                      size_t algoSpaceSizeInBytes,
-                      cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm(
+    cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNSetClip(cudnnHandle_t handle,
-                cudnnRNNDescriptor_t rnnDesc,
-                cudnnRNNClipMode_t clipMode,
-                cudnnNanPropagation_t clipNanOpt,
-                double lclip,
-                double rclip) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, cudnnNanPropagation_t, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle,
+                                          cudnnRNNDescriptor_t rnnDesc,
+                                          cudnnRNNClipMode_t clipMode,
+                                          cudnnNanPropagation_t clipNanOpt,
+                                          double lclip, double rclip) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t,
+      cudnnNanPropagation_t, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNSetClip");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNGetClip(cudnnHandle_t handle,
-                cudnnRNNDescriptor_t rnnDesc,
-                cudnnRNNClipMode_t *clipMode,
-                cudnnNanPropagation_t *clipNanOpt,
-                double *lclip,
-                double *rclip) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, cudnnNanPropagation_t *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle,
+                                          cudnnRNNDescriptor_t rnnDesc,
+                                          cudnnRNNClipMode_t *clipMode,
+                                          cudnnNanPropagation_t *clipNanOpt,
+                                          double *lclip, double *rclip) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *,
+      cudnnNanPropagation_t *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNGetClip");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata,
+                                           cudnnCallback_t fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata,
+                                           cudnnCallback_t *fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode(
+    cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNPaddingMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, paddingMode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode(
+    cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnRNNPaddingMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNPaddingMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, paddingMode);
@@ -2437,7 +2517,7 @@ cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *padd
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *RNNDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(RNNDataDesc);
@@ -2445,199 +2525,202 @@ cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *RNNDataDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(RNNDataDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc,
-                          cudnnDataType_t dataType,
-                          cudnnRNNDataLayout_t layout,
-                          int maxSeqLength,
-                          int batchSize,
-                          int vectorSize,
-                          const int seqLengthArray[], /* length of each sequence in the batch */
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, int, const int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor(
+    cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t dataType,
+    cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize,
+    int vectorSize,
+    const int seqLengthArray[], /* length of each sequence in the batch */
+    void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int,
+      int, const int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, paddingFill);
+  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize,
+                  vectorSize, seqLengthArray, paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t RNNDataDesc,
-                          cudnnDataType_t *dataType,
-                          cudnnRNNDataLayout_t *layout,
-                          int *maxSeqLength,
-                          int *batchSize,
-                          int *vectorSize,
-                          int arrayLengthRequested,
-                          int seqLengthArray[],
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, int *, int *, int *, int, int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor(
+    cudnnRNNDataDescriptor_t RNNDataDesc, cudnnDataType_t *dataType,
+    cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize,
+    int *vectorSize, int arrayLengthRequested, int seqLengthArray[],
+    void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *,
+      int *, int *, int *, int, int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize, vectorSize, arrayLengthRequested, seqLengthArray, paddingFill);
+  return func_ptr(RNNDataDesc, dataType, layout, maxSeqLength, batchSize,
+                  vectorSize, arrayLengthRequested, seqLengthArray,
+                  paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardTrainingEx(cudnnHandle_t handle,
-                          const cudnnRNNDescriptor_t rnnDesc,
-                          const cudnnRNNDataDescriptor_t xDesc,
-                          const void *x,
-                          const cudnnTensorDescriptor_t hxDesc,
-                          const void *hx,
-                          const cudnnTensorDescriptor_t cxDesc,
-                          const void *cx,
-                          const cudnnFilterDescriptor_t wDesc,
-                          const void *w,
-                          const cudnnRNNDataDescriptor_t yDesc,
-                          void *y,
-                          const cudnnTensorDescriptor_t hyDesc,
-                          void *hy,
-                          const cudnnTensorDescriptor_t cyDesc,
-                          void *cy,
-                          const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
-                          const void *keys,                     /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
-                          void *cAttn,                          /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
-                          void *iAttn,                          /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
-                          void *queries,                        /* reserved, should pass NULL */
-                          void *workSpace,
-                          size_t workSpaceSizeInBytes,
-                          void *reserveSpace,
-                          size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnRNNDataDescriptor_t yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy,
+    const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
+    const void *keys,                     /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
+    void *cAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
+    void *iAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
+    void *queries,                        /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t,
+      void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTrainingEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w,
+                  yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn,
+                  iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardInferenceEx(cudnnHandle_t handle,
-                           const cudnnRNNDescriptor_t rnnDesc,
-                           const cudnnRNNDataDescriptor_t xDesc,
-                           const void *x,
-                           const cudnnTensorDescriptor_t hxDesc,
-                           const void *hx,
-                           const cudnnTensorDescriptor_t cxDesc,
-                           const void *cx,
-                           const cudnnFilterDescriptor_t wDesc,
-                           const void *w,
-                           const cudnnRNNDataDescriptor_t yDesc,
-                           void *y,
-                           const cudnnTensorDescriptor_t hyDesc,
-                           void *hy,
-                           const cudnnTensorDescriptor_t cyDesc,
-                           void *cy,
-                           const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
-                           const void *keys,                     /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
-                           void *cAttn,                          /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
-                           void *iAttn,                          /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
-                           void *queries,                        /* reserved, should pass NULL */
-                           void *workSpace,
-                           size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnRNNDataDescriptor_t yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy,
+    const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
+    const void *keys,                     /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
+    void *cAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
+    void *iAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
+    void *queries,                        /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t,
+      void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInferenceEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w,
+                  yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn,
+                  iDesc, iAttn, qDesc, queries, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardDataEx(cudnnHandle_t handle,
-                       const cudnnRNNDescriptor_t rnnDesc,
-                       const cudnnRNNDataDescriptor_t yDesc,
-                       const void *y,
-                       const cudnnRNNDataDescriptor_t dyDesc,
-                       const void *dy,
-                       const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
-                       const void *dcAttn,                    /* reserved, should pass NULL */
-                       const cudnnTensorDescriptor_t dhyDesc,
-                       const void *dhy,
-                       const cudnnTensorDescriptor_t dcyDesc,
-                       const void *dcy,
-                       const cudnnFilterDescriptor_t wDesc,
-                       const void *w,
-                       const cudnnTensorDescriptor_t hxDesc,
-                       const void *hx,
-                       const cudnnTensorDescriptor_t cxDesc,
-                       const void *cx,
-                       const cudnnRNNDataDescriptor_t dxDesc,
-                       void *dx,
-                       const cudnnTensorDescriptor_t dhxDesc,
-                       void *dhx,
-                       const cudnnTensorDescriptor_t dcxDesc,
-                       void *dcx,
-                       const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
-                       void *dkeys,                           /* reserved, should pass NULL */
-                       void *workSpace,
-                       size_t workSpaceSizeInBytes,
-                       void *reserveSpace,
-                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t yDesc, const void *y,
+    const cudnnRNNDataDescriptor_t dyDesc, const void *dy,
+    const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
+    const void *dcAttn,                    /* reserved, should pass NULL */
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnRNNDataDescriptor_t dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+    const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
+    void *dkeys,                           /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardDataEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn,
+                  dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx,
+                  dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys,
+                  workSpace, workSpaceSizeInBytes, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardWeightsEx(cudnnHandle_t handle,
-                          const cudnnRNNDescriptor_t rnnDesc,
-                          const cudnnRNNDataDescriptor_t xDesc,
-                          const void *x,
-                          const cudnnTensorDescriptor_t hxDesc,
-                          const void *hx,
-                          const cudnnRNNDataDescriptor_t yDesc,
-                          const void *y,
-                          void *workSpace,
-                          size_t workSpaceSizeInBytes,
-                          const cudnnFilterDescriptor_t dwDesc,
-                          void *dw,
-                          void *reserveSpace,
-                          size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, void *, size_t, const cudnnFilterDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, const void *, void *, size_t,
+      const cudnnFilterDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeightsEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace,
+                  workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor_v6(cudnnHandle_t handle,
-                         cudnnRNNDescriptor_t rnnDesc,
-                         const int hiddenSize,
-                         const int numLayers,
-                         cudnnDropoutDescriptor_t dropoutDesc,
-                         cudnnRNNInputMode_t inputMode,
-                         cudnnDirectionMode_t direction,
-                         cudnnRNNMode_t mode,
-                         cudnnRNNAlgo_t algo,
-                         cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers, cudnnDropoutDescriptor_t dropoutDesc,
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v6");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, dataType);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor_v5(cudnnRNNDescriptor_t rnnDesc,
-                         int hiddenSize,
-                         int numLayers,
-                         cudnnDropoutDescriptor_t dropoutDesc,
-                         cudnnRNNInputMode_t inputMode,
-                         cudnnDirectionMode_t direction,
-                         cudnnRNNMode_t mode,
-                         cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(
+    cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers,
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode,
+    cudnnDirectionMode_t direction, cudnnRNNMode_t mode,
+    cudnnDataType_t dataType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t,
+      cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t,
+      cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, dataType);
+  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode,
+                  direction, mode, dataType);
 }
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cudnn_7_6.inc b/tensorflow/stream_executor/cuda/cudnn_7_6.inc
index 7a5f1c9751d..9dd420a9022 100644
--- a/tensorflow/stream_executor/cuda/cudnn_7_6.inc
+++ b/tensorflow/stream_executor/cuda/cudnn_7_6.inc
@@ -2,73 +2,71 @@
 
 extern "C" {
 
-size_t CUDNNWINAPI
-cudnnGetVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+size_t CUDNNWINAPI cudnnGetVersion(void) {
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-size_t CUDNNWINAPI
-cudnnGetCudartVersion(void) {
-  using FuncPtr = size_t (CUDNNWINAPI *)();
+size_t CUDNNWINAPI cudnnGetCudartVersion(void) {
+  using FuncPtr = size_t(CUDNNWINAPI *)();
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCudartVersion");
   if (!func_ptr) return 0;
   return func_ptr();
 }
 
-const char *CUDNNWINAPI
-cudnnGetErrorString(cudnnStatus_t status) {
-  using FuncPtr = const char * (CUDNNWINAPI *)(cudnnStatus_t);
+const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) {
+  using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetErrorString");
   if (!func_ptr) return "cudnnGetErrorString symbol not found.";
   return func_ptr(status);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
+cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle,
+                                                 cudnnStatus_t *rstatus,
+                                                 cudnnErrQueryMode_t mode,
+                                                 cudnnRuntimeTag_t *tag) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnQueryRuntimeError");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rstatus, mode, tag);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetProperty(libraryPropertyType type, int *value) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(libraryPropertyType, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type,
+                                           int *value) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetProperty");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(type, value);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreate(cudnnHandle_t *handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreate");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroy(cudnnHandle_t handle) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroy");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle,
+                                         cudaStream_t streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle,
+                                         cudaStream_t *streamId) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetStream");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, streamId);
@@ -76,100 +74,97 @@ cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
-                           cudnnTensorFormat_t format,
-                           cudnnDataType_t dataType, /* image data type */
-                           int n,                    /* number of inputs (batch size) */
-                           int c,                    /* number of input feature maps */
-                           int h,                    /* height of input section */
-                           int w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
-                             cudnnDataType_t dataType, /* image data type */
-                             int n,                    /* number of inputs (batch size) */
-                             int c,                    /* number of input feature maps */
-                             int h,                    /* height of input section */
-                             int w,                    /* width of input section */
-                             int nStride,
-                             int cStride,
-                             int hStride,
-                             int wStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, int, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(
+    cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t dataType, /* image data type */
+    int n,                    /* number of inputs (batch size) */
+    int c,                    /* number of input feature maps */
+    int h,                    /* height of input section */
+    int w,                    /* width of input section */
+    int nStride, int cStride, int hStride, int wStride) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t,
+                                   int, int, int, int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor4dDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
-                           cudnnDataType_t *dataType, /* image data type */
-                           int *n,                    /* number of inputs (batch size) */
-                           int *c,                    /* number of input feature maps  */
-                           int *h,                    /* height of input section */
-                           int *w,                    /* width of input section */
-                           int *nStride,
-                           int *cStride,
-                           int *hStride,
-                           int *wStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(
+    const cudnnTensorDescriptor_t tensorDesc,
+    cudnnDataType_t *dataType, /* image data type */
+    int *n,                    /* number of inputs (batch size) */
+    int *c,                    /* number of input feature maps  */
+    int *h,                    /* height of input section */
+    int *w,                    /* width of input section */
+    int *nStride, int *cStride, int *hStride, int *wStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *,
+      int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensor4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, wStride);
+  return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride,
+                  wStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
-                           cudnnDataType_t dataType,
-                           int nbDims,
-                           const int dimA[],
-                           const int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, int, const int [], const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(
+    cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
-                             cudnnTensorFormat_t format,
-                             cudnnDataType_t dataType,
-                             int nbDims,
-                             const int dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, cudnnDataType_t, int, const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(
+    cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format,
+    cudnnDataType_t dataType, int nbDims, const int dimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t,
+                                   cudnnDataType_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorNdDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, format, dataType, nbDims, dimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
-                           int nbDimsRequested,
-                           cudnnDataType_t *dataType,
-                           int *nbDims,
-                           int dimA[],
-                           int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, cudnnDataType_t *, int *, int [], int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(
+    const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int,
+                                   cudnnDataType_t *, int *, int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(
+    const cudnnTensorDescriptor_t tensorDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc, size);
@@ -177,126 +172,141 @@ cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(tensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
-                       const cudnnTensorDescriptor_t srcDesc,
-                       cudnnTensorDescriptor_t destDesc,
-                       size_t *destSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnTensorTransformDescriptor_t, const cudnnTensorDescriptor_t, cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest(
+    const cudnnTensorTransformDescriptor_t transformDesc,
+    const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc,
+    size_t *destSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnTensorTransformDescriptor_t, const cudnnTensorDescriptor_t,
+      cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnInitTransformDest");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(transformDesc, srcDesc, destDesc, destSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateTensorTransformDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor(
+    cudnnTensorTransformDescriptor_t *transformDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateTensorTransformDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(transformDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
-                                  const uint32_t nbDims,
-                                  const cudnnTensorFormat_t destFormat,
-                                  const int32_t padBeforeA[],
-                                  const int32_t padAfterA[],
-                                  const uint32_t foldA[],
-                                  const cudnnFoldingDirection_t direction) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t, const uint32_t, const cudnnTensorFormat_t, const int32_t [], const int32_t [], const uint32_t [], const cudnnFoldingDirection_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensorTransformDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor(
+    cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims,
+    const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[],
+    const int32_t padAfterA[], const uint32_t foldA[],
+    const cudnnFoldingDirection_t direction) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorTransformDescriptor_t, const uint32_t,
+      const cudnnTensorFormat_t, const int32_t[], const int32_t[],
+      const uint32_t[], const cudnnFoldingDirection_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetTensorTransformDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(transformDesc, nbDims, destFormat, padBeforeA, padAfterA, foldA, direction);
+  return func_ptr(transformDesc, nbDims, destFormat, padBeforeA, padAfterA,
+                  foldA, direction);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
-                                  uint32_t nbDimsRequested,
-                                  cudnnTensorFormat_t *destFormat,
-                                  int32_t padBeforeA[],
-                                  int32_t padAfterA[],
-                                  uint32_t foldA[],
-                                  cudnnFoldingDirection_t *direction) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t, uint32_t, cudnnTensorFormat_t *, int32_t [], int32_t [], uint32_t [], cudnnFoldingDirection_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetTensorTransformDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor(
+    cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested,
+    cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[],
+    uint32_t foldA[], cudnnFoldingDirection_t *direction) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnTensorTransformDescriptor_t, uint32_t, cudnnTensorFormat_t *,
+      int32_t[], int32_t[], uint32_t[], cudnnFoldingDirection_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetTensorTransformDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(transformDesc, nbDimsRequested, destFormat, padBeforeA, padAfterA, foldA, direction);
+  return func_ptr(transformDesc, nbDimsRequested, destFormat, padBeforeA,
+                  padAfterA, foldA, direction);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyTensorTransformDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor(
+    cudnnTensorTransformDescriptor_t transformDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyTensorTransformDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(transformDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnTransformTensor(cudnnHandle_t handle,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t xDesc,
-                     const void *x,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t yDesc,
-                     void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnTransformTensorEx(cudnnHandle_t handle,
-                       const cudnnTensorTransformDescriptor_t transDesc,
-                       const void *alpha,
-                       const cudnnTensorDescriptor_t srcDesc,
-                       const void *srcData,
-                       const void *beta,
-                       const cudnnTensorDescriptor_t destDesc,
-                       void *destData) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx(
+    cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc,
+    const void *alpha, const cudnnTensorDescriptor_t srcDesc,
+    const void *srcData, const void *beta,
+    const cudnnTensorDescriptor_t destDesc, void *destData) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformTensorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, destData);
+  return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc,
+                  destData);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
-                                          const cudnnFilterDescriptor_t filterDesc,
-                                          const cudnnTensorDescriptor_t diffDesc,
-                                          const cudnnConvolutionDescriptor_t convDesc,
-                                          const cudnnTensorDescriptor_t gradDesc,
-                                          const cudnnTensorFormat_t transformFormat,
-                                          cudnnFilterDescriptor_t foldedFilterDesc,
-                                          cudnnTensorDescriptor_t paddedDiffDesc,
-                                          cudnnConvolutionDescriptor_t foldedConvDesc,
-                                          cudnnTensorDescriptor_t foldedGradDesc,
-                                          cudnnTensorTransformDescriptor_t filterFoldTransDesc,
-                                          cudnnTensorTransformDescriptor_t diffPadTransDesc,
-                                          cudnnTensorTransformDescriptor_t gradFoldTransDesc,
-                                          cudnnTensorTransformDescriptor_t gradUnfoldTransDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorFormat_t, cudnnFilterDescriptor_t, cudnnTensorDescriptor_t, cudnnConvolutionDescriptor_t, cudnnTensorDescriptor_t, cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFoldedConvBackwardDataDescriptors");
+cudnnStatus_t CUDNNWINAPI cudnnGetFoldedConvBackwardDataDescriptors(
+    const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t gradDesc,
+    const cudnnTensorFormat_t transformFormat,
+    cudnnFilterDescriptor_t foldedFilterDesc,
+    cudnnTensorDescriptor_t paddedDiffDesc,
+    cudnnConvolutionDescriptor_t foldedConvDesc,
+    cudnnTensorDescriptor_t foldedGradDesc,
+    cudnnTensorTransformDescriptor_t filterFoldTransDesc,
+    cudnnTensorTransformDescriptor_t diffPadTransDesc,
+    cudnnTensorTransformDescriptor_t gradFoldTransDesc,
+    cudnnTensorTransformDescriptor_t gradUnfoldTransDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorFormat_t,
+      cudnnFilterDescriptor_t, cudnnTensorDescriptor_t,
+      cudnnConvolutionDescriptor_t, cudnnTensorDescriptor_t,
+      cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t,
+      cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetFoldedConvBackwardDataDescriptors");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, transformFormat, foldedFilterDesc, paddedDiffDesc, foldedConvDesc, foldedGradDesc, filterFoldTransDesc, diffPadTransDesc, gradFoldTransDesc, gradUnfoldTransDesc);
+  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc,
+                  transformFormat, foldedFilterDesc, paddedDiffDesc,
+                  foldedConvDesc, foldedGradDesc, filterFoldTransDesc,
+                  diffPadTransDesc, gradFoldTransDesc, gradUnfoldTransDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnAddTensor(cudnnHandle_t handle,
-               const void *alpha,
-               const cudnnTensorDescriptor_t aDesc,
-               const void *A,
-               const void *beta,
-               const cudnnTensorDescriptor_t cDesc,
-               void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle,
+                                         const void *alpha,
+                                         const cudnnTensorDescriptor_t aDesc,
+                                         const void *A, const void *beta,
+                                         const cudnnTensorDescriptor_t cDesc,
+                                         void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnAddTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C);
@@ -304,29 +314,29 @@ cudnnAddTensor(cudnnHandle_t handle,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
-                           cudnnOpTensorOp_t opTensorOp,
-                           cudnnDataType_t opTensorCompType,
-                           cudnnNanPropagation_t opTensorNanOpt) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(
+    cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp,
+    cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t,
+                                   cudnnDataType_t, cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
-                           cudnnOpTensorOp_t *opTensorOp,
-                           cudnnDataType_t *opTensorCompType,
-                           cudnnNanPropagation_t *opTensorNanOpt) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(
+    const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp,
+    cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *,
+      cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt);
@@ -334,126 +344,136 @@ cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyOpTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(opTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnOpTensor(cudnnHandle_t handle,
-              const cudnnOpTensorDescriptor_t opTensorDesc,
-              const void *alpha1,
-              const cudnnTensorDescriptor_t aDesc,
-              const void *A,
-              const void *alpha2,
-              const cudnnTensorDescriptor_t bDesc,
-              const void *B,
-              const void *beta,
-              const cudnnTensorDescriptor_t cDesc,
-              void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnOpTensor(
+    cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc,
+    const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A,
+    const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B,
+    const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnOpTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, beta, cDesc, C);
+  return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B,
+                  beta, cDesc, C);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t *reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               cudnnReduceTensorOp_t reduceTensorOp,
-                               cudnnDataType_t reduceTensorCompType,
-                               cudnnNanPropagation_t reduceTensorNanOpt,
-                               cudnnReduceTensorIndices_t reduceTensorIndices,
-                               cudnnIndicesType_t reduceTensorIndicesType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType,
+    cudnnNanPropagation_t reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t reduceTensorIndices,
+    cudnnIndicesType_t reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t,
+      cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               cudnnReduceTensorOp_t *reduceTensorOp,
-                               cudnnDataType_t *reduceTensorCompType,
-                               cudnnNanPropagation_t *reduceTensorNanOpt,
-                               cudnnReduceTensorIndices_t *reduceTensorIndices,
-                               cudnnIndicesType_t *reduceTensorIndicesType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, cudnnIndicesType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(
+    const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    cudnnReduceTensorOp_t *reduceTensorOp,
+    cudnnDataType_t *reduceTensorCompType,
+    cudnnNanPropagation_t *reduceTensorNanOpt,
+    cudnnReduceTensorIndices_t *reduceTensorIndices,
+    cudnnIndicesType_t *reduceTensorIndicesType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *,
+      cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *,
+      cudnnIndicesType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, reduceTensorNanOpt, reduceTensorIndices, reduceTensorIndicesType);
+  return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType,
+                  reduceTensorNanOpt, reduceTensorIndices,
+                  reduceTensorIndicesType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(
+    cudnnReduceTensorDescriptor_t reduceTensorDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyReduceTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(reduceTensorDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReductionIndicesSize(cudnnHandle_t handle,
-                             const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                             const cudnnTensorDescriptor_t aDesc,
-                             const cudnnTensorDescriptor_t cDesc,
-                             size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionIndicesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
-                               const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                               const cudnnTensorDescriptor_t aDesc,
-                               const cudnnTensorDescriptor_t cDesc,
-                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetReductionWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnReduceTensor(cudnnHandle_t handle,
-                  const cudnnReduceTensorDescriptor_t reduceTensorDesc,
-                  void *indices,
-                  size_t indicesSizeInBytes,
-                  void *workspace,
-                  size_t workspaceSizeInBytes,
-                  const void *alpha,
-                  const cudnnTensorDescriptor_t aDesc,
-                  const void *A,
-                  const void *beta,
-                  const cudnnTensorDescriptor_t cDesc,
-                  void *C) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(
+    cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc,
+    void *indices, size_t indicesSizeInBytes, void *workspace,
+    size_t workspaceSizeInBytes, const void *alpha,
+    const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta,
+    const cudnnTensorDescriptor_t cDesc, void *C) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t,
+      void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReduceTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, C);
+  return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes,
+                  workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc,
+                  C);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle,
+                                         const cudnnTensorDescriptor_t yDesc,
+                                         void *y, const void *valuePtr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, valuePtr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
+cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle,
+                                           const cudnnTensorDescriptor_t yDesc,
+                                           void *y, const void *alpha) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnScaleTensor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, yDesc, y, alpha);
@@ -461,745 +481,785 @@ cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t dataType, /* image data type */
-                           cudnnTensorFormat_t format,
-                           int k,  /* number of output feature maps */
-                           int c,  /* number of input feature maps */
-                           int h,  /* height of each input filter */
-                           int w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType,          /* image data type */
+    cudnnTensorFormat_t format, int k, /* number of output feature maps */
+    int c,                             /* number of input feature maps */
+    int h,                             /* height of each input filter */
+    int w) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t *dataType, /* image data type */
-                           cudnnTensorFormat_t *format,
-                           int *k,  /* number of output feature maps */
-                           int *c,  /* number of input feature maps */
-                           int *h,  /* height of each input filter */
-                           int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(
+    const cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t *dataType,           /* image data type */
+    cudnnTensorFormat_t *format, int *k, /* number of output feature maps */
+    int *c,                              /* number of input feature maps */
+    int *h,                              /* height of each input filter */
+    int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *,
+      int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilter4dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, k, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
-                           cudnnDataType_t dataType, /* image data type */
-                           cudnnTensorFormat_t format,
-                           int nbDims,
-                           const int filterDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, cudnnTensorFormat_t, int, const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(
+    cudnnFilterDescriptor_t filterDesc,
+    cudnnDataType_t dataType, /* image data type */
+    cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t,
+                                   cudnnTensorFormat_t, int, const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, dataType, format, nbDims, filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
-                           int nbDimsRequested,
-                           cudnnDataType_t *dataType, /* image data type */
-                           cudnnTensorFormat_t *format,
-                           int *nbDims,
-                           int filterDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, int, cudnnDataType_t *, cudnnTensorFormat_t *, int *, int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(
+    const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested,
+    cudnnDataType_t *dataType, /* image data type */
+    cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFilterDescriptor_t, int, cudnnDataType_t *,
+      cudnnTensorFormat_t *, int *, int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, filterDimA);
+  return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims,
+                  filterDimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFilterDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes(
+    const cudnnFilterDescriptor_t filterDesc, size_t *size) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnFilterDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFilterSizeInBytes");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc, size);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnTransformFilter(cudnnHandle_t handle,
-                     const cudnnTensorTransformDescriptor_t transDesc,
-                     const void *alpha,
-                     const cudnnFilterDescriptor_t srcDesc,
-                     const void *srcData,
-                     const void *beta,
-                     const cudnnFilterDescriptor_t destDesc,
-                     void *destData) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const void *, const cudnnFilterDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnTransformFilter(
+    cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc,
+    const void *alpha, const cudnnFilterDescriptor_t srcDesc,
+    const void *srcData, const void *beta,
+    const cudnnFilterDescriptor_t destDesc, void *destData) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *, const void *,
+      const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnTransformFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, destData);
+  return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc,
+                  destData);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFilterDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFilterDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(filterDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnReorderFilterAndBias(cudnnHandle_t handle,
-                          const cudnnFilterDescriptor_t filterDesc,
-                          cudnnReorderType_t reorderType,
-                          const void *filterData,
-                          void *reorderedFilterData,
-                          int reorderBias,
-                          const void *biasData,
-                          void *reorderedBiasData) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, cudnnReorderType_t, const void *, void *, int, const void *, void *);
+cudnnStatus_t CUDNNWINAPI cudnnReorderFilterAndBias(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    cudnnReorderType_t reorderType, const void *filterData,
+    void *reorderedFilterData, int reorderBias, const void *biasData,
+    void *reorderedBiasData) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, cudnnReorderType_t,
+      const void *, void *, int, const void *, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnReorderFilterAndBias");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, reorderType, filterData, reorderedFilterData, reorderBias, biasData, reorderedBiasData);
+  return func_ptr(handle, filterDesc, reorderType, filterData,
+                  reorderedFilterData, reorderBias, biasData,
+                  reorderedBiasData);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, mathType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount(
+    cudnnConvolutionDescriptor_t convDesc, int *groupCount) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionGroupCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, groupCount);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnReorderType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionReorderType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnReorderType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionReorderType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, reorderType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, cudnnReorderType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionReorderType(
+    cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t,
+                                               cudnnReorderType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionReorderType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, reorderType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
-                                int pad_h,      /* zero-padding height */
-                                int pad_w,      /* zero-padding width */
-                                int u,          /* vertical filter stride */
-                                int v,          /* horizontal filter stride */
-                                int dilation_h, /* filter dilation in the vertical dimension */
-                                int dilation_w, /* filter dilation in the horizontal dimension */
-                                cudnnConvolutionMode_t mode,
-                                cudnnDataType_t computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, int, int, int, int, int, cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */
+    int pad_w,                                        /* zero-padding width */
+    int u,          /* vertical filter stride */
+    int v,          /* horizontal filter stride */
+    int dilation_h, /* filter dilation in the vertical dimension */
+    int dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, int, int, int, int, int,
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
-                                int *pad_h,      /* zero-padding height */
-                                int *pad_w,      /* zero-padding width */
-                                int *u,          /* vertical filter stride */
-                                int *v,          /* horizontal filter stride */
-                                int *dilation_h, /* filter dilation in the vertical dimension */
-                                int *dilation_w, /* filter dilation in the horizontal dimension */
-                                cudnnConvolutionMode_t *mode,
-                                cudnnDataType_t *computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc,
+    int *pad_h,      /* zero-padding height */
+    int *pad_w,      /* zero-padding width */
+    int *u,          /* vertical filter stride */
+    int *v,          /* horizontal filter stride */
+    int *dilation_h, /* filter dilation in the vertical dimension */
+    int *dilation_w, /* filter dilation in the horizontal dimension */
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *,
+      int *, cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, computeType);
+  return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
-                                      const cudnnTensorDescriptor_t inputTensorDesc,
-                                      const cudnnFilterDescriptor_t filterDesc,
-                                      int *n,
-                                      int *c,
-                                      int *h,
-                                      int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim(
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolution2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
-                                int arrayLength, /* nbDims-2 size */
-                                const int padA[],
-                                const int filterStrideA[],
-                                const int dilationA[],
-                                cudnnConvolutionMode_t mode,
-                                cudnnDataType_t computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int, const int [], const int [], const int [], cudnnConvolutionMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor(
+    cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */
+    const int padA[], const int filterStrideA[], const int dilationA[],
+    cudnnConvolutionMode_t mode, cudnnDataType_t computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[],
+      cudnnConvolutionMode_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode,
+                  computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
-                                int arrayLengthRequested,
-                                int *arrayLength,
-                                int padA[],
-                                int strideA[],
-                                int dilationA[],
-                                cudnnConvolutionMode_t *mode,
-                                cudnnDataType_t *computeType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, int, int *, int [], int [], int [], cudnnConvolutionMode_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor(
+    const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested,
+    int *arrayLength, int padA[], int strideA[], int dilationA[],
+    cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[],
+      cudnnConvolutionMode_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, dilationA, mode, computeType);
+  return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA,
+                  dilationA, mode, computeType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
-                                      const cudnnTensorDescriptor_t inputTensorDesc,
-                                      const cudnnFilterDescriptor_t filterDesc,
-                                      int nbDims,
-                                      int tensorOutputDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim(
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t inputTensorDesc,
+    const cudnnFilterDescriptor_t filterDesc, int nbDims,
+    int tensorOutputDimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, tensorOutputDimA);
+  return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims,
+                  tensorOutputDimA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyConvolutionDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(convDesc);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
-                                     const cudnnTensorDescriptor_t xDesc,
-                                     const cudnnFilterDescriptor_t wDesc,
-                                     const cudnnConvolutionDescriptor_t convDesc,
-                                     const cudnnTensorDescriptor_t yDesc,
-                                     const int requestedAlgoCount,
-                                     int *returnedAlgoCount,
-                                     cudnnConvolutionFwdAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnTensorDescriptor_t xDesc,
-                                       const void *x,
-                                       const cudnnFilterDescriptor_t wDesc,
-                                       const void *w,
-                                       const cudnnConvolutionDescriptor_t convDesc,
-                                       const cudnnTensorDescriptor_t yDesc,
-                                       void *y,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnConvolutionFwdAlgoPerf_t *perfResults,
-                                       void *workSpace,
-                                       size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults,
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionForwardAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardAlgorithm(cudnnHandle_t handle,
-                                    const cudnnTensorDescriptor_t xDesc,
-                                    const cudnnFilterDescriptor_t wDesc,
-                                    const cudnnConvolutionDescriptor_t convDesc,
-                                    const cudnnTensorDescriptor_t yDesc,
-                                    cudnnConvolutionFwdPreference_t preference,
-                                    size_t memoryLimitInBytes,
-                                    cudnnConvolutionFwdAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t, cudnnConvolutionFwdAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    cudnnConvolutionFwdPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionFwdAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdPreference_t, size_t,
+      cudnnConvolutionFwdAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
-                                       const cudnnTensorDescriptor_t srcDesc,
-                                       const cudnnFilterDescriptor_t filterDesc,
-                                       const cudnnConvolutionDescriptor_t convDesc,
-                                       const cudnnTensorDescriptor_t destDesc,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnConvolutionFwdAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionFwdAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnFilterDescriptor_t filterDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionFwdAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
-                                        const cudnnTensorDescriptor_t xDesc,
-                                        const cudnnFilterDescriptor_t wDesc,
-                                        const cudnnConvolutionDescriptor_t convDesc,
-                                        const cudnnTensorDescriptor_t yDesc,
-                                        cudnnConvolutionFwdAlgo_t algo,
-                                        size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionForwardWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionForward(cudnnHandle_t handle,
-                        const void *alpha,
-                        const cudnnTensorDescriptor_t xDesc,
-                        const void *x,
-                        const cudnnFilterDescriptor_t wDesc,
-                        const void *w,
-                        const cudnnConvolutionDescriptor_t convDesc,
-                        cudnnConvolutionFwdAlgo_t algo,
-                        void *workSpace,
-                        size_t workSpaceSizeInBytes,
-                        const void *beta,
-                        const cudnnTensorDescriptor_t yDesc,
-                        void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, yDesc, y);
+  return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
-                                      const void *alpha1,
-                                      const cudnnTensorDescriptor_t xDesc,
-                                      const void *x,
-                                      const cudnnFilterDescriptor_t wDesc,
-                                      const void *w,
-                                      const cudnnConvolutionDescriptor_t convDesc,
-                                      cudnnConvolutionFwdAlgo_t algo,
-                                      void *workSpace,
-                                      size_t workSpaceSizeInBytes,
-                                      const void *alpha2,
-                                      const cudnnTensorDescriptor_t zDesc,
-                                      const void *z,
-                                      const cudnnTensorDescriptor_t biasDesc,
-                                      const void *bias,
-                                      const cudnnActivationDescriptor_t activationDesc,
-                                      const cudnnTensorDescriptor_t yDesc,
-                                      void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward(
+    cudnnHandle_t handle, const void *alpha1,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo,
+    void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2,
+    const cudnnTensorDescriptor_t zDesc, const void *z,
+    const cudnnTensorDescriptor_t biasDesc, const void *bias,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnConvolutionBiasActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, activationDesc, yDesc, y);
+  return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace,
+                  workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias,
+                  activationDesc, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardBias(cudnnHandle_t handle,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dbDesc,
-                             void *db) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dbDesc, void *db) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardBias");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
-                                            const cudnnTensorDescriptor_t xDesc,
-                                            const cudnnTensorDescriptor_t dyDesc,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnFilterDescriptor_t dwDesc,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle,
-                                              const cudnnTensorDescriptor_t xDesc,
-                                              const void *x,
-                                              const cudnnTensorDescriptor_t dyDesc,
-                                              const void *y,
-                                              const cudnnConvolutionDescriptor_t convDesc,
-                                              const cudnnFilterDescriptor_t dwDesc,
-                                              void *dw,
-                                              const int requestedAlgoCount,
-                                              int *returnedAlgoCount,
-                                              cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
-                                              void *workSpace,
-                                              size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *y,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardFilterAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
-                                           const cudnnTensorDescriptor_t xDesc,
-                                           const cudnnTensorDescriptor_t dyDesc,
-                                           const cudnnConvolutionDescriptor_t convDesc,
-                                           const cudnnFilterDescriptor_t dwDesc,
-                                           cudnnConvolutionBwdFilterPreference_t preference,
-                                           size_t memoryLimitInBytes,
-                                           cudnnConvolutionBwdFilterAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t, size_t, cudnnConvolutionBwdFilterAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t dwDesc,
+    cudnnConvolutionBwdFilterPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdFilterAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterPreference_t,
+      size_t, cudnnConvolutionBwdFilterAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
-                                              const cudnnTensorDescriptor_t srcDesc,
-                                              const cudnnTensorDescriptor_t diffDesc,
-                                              const cudnnConvolutionDescriptor_t convDesc,
-                                              const cudnnFilterDescriptor_t gradDesc,
-                                              const int requestedAlgoCount,
-                                              int *returnedAlgoCount,
-                                              cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, const int, int *,
+      cudnnConvolutionBwdFilterAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle,
-                                               const cudnnTensorDescriptor_t xDesc,
-                                               const cudnnTensorDescriptor_t dyDesc,
-                                               const cudnnConvolutionDescriptor_t convDesc,
-                                               const cudnnFilterDescriptor_t gradDesc,
-                                               cudnnConvolutionBwdFilterAlgo_t algo,
-                                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize(
+    cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnFilterDescriptor_t gradDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardFilterWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardFilter(cudnnHandle_t handle,
-                               const void *alpha,
-                               const cudnnTensorDescriptor_t xDesc,
-                               const void *x,
-                               const cudnnTensorDescriptor_t dyDesc,
-                               const void *dy,
-                               const cudnnConvolutionDescriptor_t convDesc,
-                               cudnnConvolutionBwdFilterAlgo_t algo,
-                               void *workSpace,
-                               size_t workSpaceSizeInBytes,
-                               const void *beta,
-                               const cudnnFilterDescriptor_t dwDesc,
-                               void *dw) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnFilterDescriptor_t dwDesc, void *dw) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t,
+      void *, size_t, const void *, const cudnnFilterDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardFilter");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
+  return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dwDesc, dw);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
-                                          const cudnnFilterDescriptor_t wDesc,
-                                          const cudnnTensorDescriptor_t dyDesc,
-                                          const cudnnConvolutionDescriptor_t convDesc,
-                                          const cudnnTensorDescriptor_t dxDesc,
-                                          const int requestedAlgoCount,
-                                          int *returnedAlgoCount,
-                                          cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount,
+                  returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
-                                            const cudnnFilterDescriptor_t wDesc,
-                                            const void *w,
-                                            const cudnnTensorDescriptor_t dyDesc,
-                                            const void *dy,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnTensorDescriptor_t dxDesc,
-                                            void *dx,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
-                                            void *workSpace,
-                                            size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *,
+      const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindConvolutionBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
-                                         const cudnnFilterDescriptor_t wDesc,
-                                         const cudnnTensorDescriptor_t dyDesc,
-                                         const cudnnConvolutionDescriptor_t convDesc,
-                                         const cudnnTensorDescriptor_t dxDesc,
-                                         cudnnConvolutionBwdDataPreference_t preference,
-                                         size_t memoryLimitInBytes,
-                                         cudnnConvolutionBwdDataAlgo_t *algo) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t, size_t, cudnnConvolutionBwdDataAlgo_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    cudnnConvolutionBwdDataPreference_t preference, size_t memoryLimitInBytes,
+    cudnnConvolutionBwdDataAlgo_t *algo) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataPreference_t,
+      size_t, cudnnConvolutionBwdDataAlgo_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference, memoryLimitInBytes, algo);
+  return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, preference,
+                  memoryLimitInBytes, algo);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
-                                            const cudnnFilterDescriptor_t filterDesc,
-                                            const cudnnTensorDescriptor_t diffDesc,
-                                            const cudnnConvolutionDescriptor_t convDesc,
-                                            const cudnnTensorDescriptor_t gradDesc,
-                                            const int requestedAlgoCount,
-                                            int *returnedAlgoCount,
-                                            cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc,
+    const cudnnTensorDescriptor_t diffDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, const int, int *,
+      cudnnConvolutionBwdDataAlgoPerf_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataAlgorithm_v7");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, requestedAlgoCount, returnedAlgoCount, perfResults);
+  return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc,
+                  requestedAlgoCount, returnedAlgoCount, perfResults);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
-                                             const cudnnFilterDescriptor_t wDesc,
-                                             const cudnnTensorDescriptor_t dyDesc,
-                                             const cudnnConvolutionDescriptor_t convDesc,
-                                             const cudnnTensorDescriptor_t dxDesc,
-                                             cudnnConvolutionBwdDataAlgo_t algo,
-                                             size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFilterDescriptor_t, const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize(
+    cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc,
+    const cudnnTensorDescriptor_t dyDesc,
+    const cudnnConvolutionDescriptor_t convDesc,
+    const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnFilterDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t,
+      const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetConvolutionBackwardDataWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnConvolutionBackwardData(cudnnHandle_t handle,
-                             const void *alpha,
-                             const cudnnFilterDescriptor_t wDesc,
-                             const void *w,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const cudnnConvolutionDescriptor_t convDesc,
-                             cudnnConvolutionBwdDataAlgo_t algo,
-                             void *workSpace,
-                             size_t workSpaceSizeInBytes,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dxDesc,
-                             void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, size_t, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData(
+    cudnnHandle_t handle, const void *alpha,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnConvolutionDescriptor_t convDesc,
+    cudnnConvolutionBwdDataAlgo_t algo, void *workSpace,
+    size_t workSpaceSizeInBytes, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *,
+      size_t, const void *, const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnConvolutionBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
+  return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo,
+                  workSpace, workSpaceSizeInBytes, beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnIm2Col(cudnnHandle_t handle,
-            const cudnnTensorDescriptor_t xDesc,
-            const void *x,
-            const cudnnFilterDescriptor_t wDesc,
-            const cudnnConvolutionDescriptor_t convDesc,
-            void *colBuffer) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, void *);
+cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc,
+            const void *x, const cudnnFilterDescriptor_t wDesc,
+            const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t,
+                                   const void *, const cudnnFilterDescriptor_t,
+                                   const cudnnConvolutionDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnIm2Col");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSoftmaxForward(cudnnHandle_t handle,
-                    cudnnSoftmaxAlgorithm_t algo,
-                    cudnnSoftmaxMode_t mode,
-                    const void *alpha,
-                    const cudnnTensorDescriptor_t xDesc,
-                    const void *x,
-                    const void *beta,
-                    const cudnnTensorDescriptor_t yDesc,
-                    void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSoftmaxBackward(cudnnHandle_t handle,
-                     cudnnSoftmaxAlgorithm_t algo,
-                     cudnnSoftmaxMode_t mode,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t dyDesc,
-                     const void *dy,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t dxDesc,
-                     void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(
+    cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSoftmaxBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, dx);
+  return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc,
+                  dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
-                            cudnnPoolingMode_t mode,
-                            cudnnNanPropagation_t maxpoolingNanOpt,
-                            int windowHeight,
-                            int windowWidth,
-                            int verticalPadding,
-                            int horizontalPadding,
-                            int verticalStride,
-                            int horizontalStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(
+    cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode,
+    cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth,
+    int verticalPadding, int horizontalPadding, int verticalStride,
+    int horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int,
+      int, int, int, int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
-                            cudnnPoolingMode_t *mode,
-                            cudnnNanPropagation_t *maxpoolingNanOpt,
-                            int *windowHeight,
-                            int *windowWidth,
-                            int *verticalPadding,
-                            int *horizontalPadding,
-                            int *verticalStride,
-                            int *horizontalStride) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(
+    const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode,
+    cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight,
+    int *windowWidth, int *verticalPadding, int *horizontalPadding,
+    int *verticalStride, int *horizontalStride) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, windowWidth, verticalPadding, horizontalPadding, verticalStride, horizontalStride);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight,
+                  windowWidth, verticalPadding, horizontalPadding,
+                  verticalStride, horizontalStride);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
-                            const cudnnPoolingMode_t mode,
-                            const cudnnNanPropagation_t maxpoolingNanOpt,
-                            int nbDims,
-                            const int windowDimA[],
-                            const int paddingA[],
-                            const int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, const cudnnNanPropagation_t, int, const int [], const int [], const int []);
+cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(
+    cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode,
+    const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims,
+    const int windowDimA[], const int paddingA[], const int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnPoolingDescriptor_t, const cudnnPoolingMode_t,
+      const cudnnNanPropagation_t, int, const int[], const int[], const int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA,
+                  paddingA, strideA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
-                            int nbDimsRequested,
-                            cudnnPoolingMode_t *mode,
-                            cudnnNanPropagation_t *maxpoolingNanOpt,
-                            int *nbDims,
-                            int windowDimA[],
-                            int paddingA[],
-                            int strideA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, cudnnNanPropagation_t *, int *, int [], int [], int []);
+cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(
+    const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested,
+    cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt,
+    int *nbDims, int windowDimA[], int paddingA[], int strideA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *,
+      cudnnNanPropagation_t *, int *, int[], int[], int[]);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, windowDimA, paddingA, strideA);
+  return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims,
+                  windowDimA, paddingA, strideA);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
                                   const cudnnTensorDescriptor_t inputTensorDesc,
-                                  int nbDims,
-                                  int outputTensorDimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int, int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
+                                  int nbDims, int outputTensorDimA[]) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                   const cudnnTensorDescriptor_t, int, int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPoolingNdForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA);
 }
@@ -1207,72 +1267,69 @@ cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
 cudnnStatus_t CUDNNWINAPI
 cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
                                   const cudnnTensorDescriptor_t inputTensorDesc,
-                                  int *n,
-                                  int *c,
-                                  int *h,
-                                  int *w) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, const cudnnTensorDescriptor_t, int *, int *, int *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
+                                  int *n, int *c, int *h, int *w) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               int *, int *, int *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetPooling2dForwardOutputDim");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPoolingDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(poolingDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnPoolingForward(cudnnHandle_t handle,
-                    const cudnnPoolingDescriptor_t poolingDesc,
-                    const void *alpha,
-                    const cudnnTensorDescriptor_t xDesc,
-                    const void *x,
-                    const void *beta,
-                    const cudnnTensorDescriptor_t yDesc,
-                    void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnPoolingBackward(cudnnHandle_t handle,
-                     const cudnnPoolingDescriptor_t poolingDesc,
-                     const void *alpha,
-                     const cudnnTensorDescriptor_t yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t dyDesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t xDesc,
-                     const void *x,
-                     const void *beta,
-                     const cudnnTensorDescriptor_t dxDesc,
-                     void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(
+    cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnPoolingBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
-                             cudnnActivationMode_t mode,
-                             cudnnNanPropagation_t reluNanOpt,
-                             double coef) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t, cudnnActivationMode_t, cudnnNanPropagation_t, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(
+    cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode,
+    cudnnNanPropagation_t reluNanOpt, double coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t,
+                                               cudnnActivationMode_t,
+                                               cudnnNanPropagation_t, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
@@ -1281,9 +1338,10 @@ cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
 cudnnStatus_t CUDNNWINAPI
 cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
                              cudnnActivationMode_t *mode,
-                             cudnnNanPropagation_t *reluNanOpt,
-                             double *coef) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnActivationDescriptor_t, cudnnActivationMode_t *, cudnnNanPropagation_t *, double *);
+                             cudnnNanPropagation_t *reluNanOpt, double *coef) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnActivationDescriptor_t, cudnnActivationMode_t *,
+      cudnnNanPropagation_t *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc, mode, reluNanOpt, coef);
@@ -1291,65 +1349,68 @@ cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnActivationDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyActivationDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(activationDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnActivationForward(cudnnHandle_t handle,
-                       cudnnActivationDescriptor_t activationDesc,
-                       const void *alpha,
-                       const cudnnTensorDescriptor_t xDesc,
-                       const void *x,
-                       const void *beta,
-                       const cudnnTensorDescriptor_t yDesc,
-                       void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnActivationForward(
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnActivationBackward(cudnnHandle_t handle,
-                        cudnnActivationDescriptor_t activationDesc,
-                        const void *alpha,
-                        const cudnnTensorDescriptor_t yDesc,
-                        const void *y,
-                        const cudnnTensorDescriptor_t dyDesc,
-                        const void *dy,
-                        const cudnnTensorDescriptor_t xDesc,
-                        const void *x,
-                        const void *beta,
-                        const cudnnTensorDescriptor_t dxDesc,
-                        void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnActivationDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(
+    cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnActivationDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnActivationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x,
+                  beta, dxDesc, dx);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int, double, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned lrnN, double lrnAlpha,
+                                                double lrnBeta, double lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int, double, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc,
+                                                unsigned *lrnN,
+                                                double *lrnAlpha,
+                                                double *lrnBeta, double *lrnK) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK);
@@ -1357,157 +1418,157 @@ cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrn
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnLRNDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyLRNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(lrnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnLRNCrossChannelForward(cudnnHandle_t handle,
-                            cudnnLRNDescriptor_t normDesc,
-                            cudnnLRNMode_t lrnMode,
-                            const void *alpha,
-                            const cudnnTensorDescriptor_t xDesc,
-                            const void *x,
-                            const void *beta,
-                            const cudnnTensorDescriptor_t yDesc,
-                            void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
-                             cudnnLRNDescriptor_t normDesc,
-                             cudnnLRNMode_t lrnMode,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t yDesc,
-                             const void *y,
-                             const cudnnTensorDescriptor_t dyDesc,
-                             const void *dy,
-                             const cudnnTensorDescriptor_t xDesc,
-                             const void *x,
-                             const void *beta,
-                             const cudnnTensorDescriptor_t dxDesc,
-                             void *dx) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode,
+    const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y,
+    const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta,
+    const cudnnTensorDescriptor_t dxDesc, void *dx) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnLRNCrossChannelBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, x, beta, dxDesc, dx);
+  return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc,
+                  x, beta, dxDesc, dx);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
-                                  cudnnLRNDescriptor_t normDesc,
-                                  cudnnDivNormMode_t mode,
-                                  const void *alpha,
-                                  const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
-                                  const void *x,
-                                  const void *means, /* if NULL, means are assumed to be zero */
-                                  void *temp,
-                                  void *temp2,
-                                  const void *beta,
-                                  const cudnnTensorDescriptor_t yDesc,
-                                  void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
+cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t yDesc, void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, void *, void *,
+      const void *, const cudnnTensorDescriptor_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, beta, yDesc, y);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2,
+                  beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
-                                   cudnnLRNDescriptor_t normDesc,
-                                   cudnnDivNormMode_t mode,
-                                   const void *alpha,
-                                   const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
-                                   const void *x,
-                                   const void *means, /* if NULL, means are assumed to be zero */
-                                   const void *dy,
-                                   void *temp,
-                                   void *temp2,
-                                   const void *beta,
-                                   const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
-                                   void *dx,                                   /* output x differential */
-                                   void *dMeans) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *, void *, const void *, const cudnnTensorDescriptor_t, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
+cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(
+    cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc,
+    cudnnDivNormMode_t mode, const void *alpha,
+    const cudnnTensorDescriptor_t
+        xDesc, /* same desc for x, means, dy, temp, temp2 */
+    const void *x,
+    const void *means, /* if NULL, means are assumed to be zero */
+    const void *dy, void *temp, void *temp2, const void *beta,
+    const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
+    void *dx,                                   /* output x differential */
+    void *dMeans) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *, void *, const void *, const cudnnTensorDescriptor_t, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDivisiveNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, temp2, beta, dXdMeansDesc, dx, dMeans);
+  return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp,
+                  temp2, beta, dXdMeansDesc, dx, dMeans);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
-                              const cudnnTensorDescriptor_t xDesc,
-                              cudnnBatchNormMode_t mode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, cudnnBatchNormMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(
+    cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc,
+    cudnnBatchNormMode_t mode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t,
+                                               const cudnnTensorDescriptor_t,
+                                               cudnnBatchNormMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDeriveBNTensorDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(derivedBnDesc, xDesc, mode);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
-                                                         cudnnBatchNormMode_t mode,
-                                                         cudnnBatchNormOps_t bnOps,
-                                                         const cudnnTensorDescriptor_t xDesc,
-                                                         const cudnnTensorDescriptor_t zDesc,
-                                                         const cudnnTensorDescriptor_t yDesc,
-                                                         const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                                         const cudnnActivationDescriptor_t activationDesc,
-                                                         size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnActivationDescriptor_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize");
+cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
+    const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc,
+    const cudnnTensorDescriptor_t yDesc,
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
+    const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnActivationDescriptor_t, size_t *);
+  static auto func_ptr = LoadSymbol<FuncPtr>(
+      "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes);
+  return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc,
+                  bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
-                                                  cudnnBatchNormMode_t mode,
-                                                  cudnnBatchNormOps_t bnOps,
-                                                  const cudnnTensorDescriptor_t xDesc,
-                                                  const cudnnTensorDescriptor_t yDesc,
-                                                  const cudnnTensorDescriptor_t dyDesc,
-                                                  const cudnnTensorDescriptor_t dzDesc,
-                                                  const cudnnTensorDescriptor_t dxDesc,
-                                                  const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                                  const cudnnActivationDescriptor_t activationDesc,
-                                                  size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const cudnnActivationDescriptor_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationBackwardExWorkspaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
+    const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc,
+    const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc,
+    const cudnnTensorDescriptor_t dxDesc,
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t,
+      const cudnnActivationDescriptor_t, size_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationBackwardExWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, dBnScaleBiasDesc, activationDesc, sizeInBytes);
+  return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc,
+                  dBnScaleBiasDesc, activationDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
-                                                     cudnnBatchNormMode_t mode,
-                                                     cudnnBatchNormOps_t bnOps,
-                                                     const cudnnActivationDescriptor_t activationDesc,
-                                                     const cudnnTensorDescriptor_t xDesc,
-                                                     size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, size_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetBatchNormalizationTrainingExReserveSpaceSize");
+cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
+    const cudnnActivationDescriptor_t activationDesc,
+    const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t,
+      const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *);
+  static auto func_ptr = LoadSymbol<FuncPtr>(
+      "cudnnGetBatchNormalizationTrainingExReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardTraining(
-    cudnnHandle_t handle,
-    cudnnBatchNormMode_t mode,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
 
     const void *alpha, /* alpha[0] = result blend factor */
     const void *beta,  /* beta[0] = dest layer blend factor */
 
-    const cudnnTensorDescriptor_t xDesc,
-    const void *x, /* NxCxHxW */
-    const cudnnTensorDescriptor_t yDesc,
-    void *y, /* NxCxHxW */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
 
     /* Shared desc for the next 6 tensors in the argument list.
        Data type to be set as follows:
@@ -1515,13 +1576,13 @@ cudnnBatchNormalizationForwardTraining(
        Dimensions for this descriptor depend on normalization mode
        - Spatial Normalization : tensors are expected to have dims 1xCx1x1
         (normalization is performed across NxHxW)
-       - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
-        (normalization is performed across N) */
+       - Per-Activation Normalization : tensors are expected to have dims of
+       1xCxHxW (normalization is performed across N) */
     const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
 
-    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
-    const void *bnScale,
-    const void *bnBias,
+    /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation
+     */
+    const void *bnScale, const void *bnBias,
 
     /* MUST use factor=1 in the very first call of a complete training cycle.
        Use a factor=1/(1+n) at N-th call to the function to get
@@ -1539,248 +1600,261 @@ cudnnBatchNormalizationForwardTraining(
        of  variance[x] (factor is applied in the same way as for runningMean) */
     void *resultRunningVariance,
 
-    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
     double epsilon,
 
     /* Optionally save intermediate results from the forward pass here
        - can be reused to speed up backward pass. NULL if unused */
-    void *resultSaveMean,
-    void *resultSaveInvVariance) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
+    void *resultSaveMean, void *resultSaveInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
+  return func_ptr(
+      handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc,
+      bnScale, bnBias, exponentialAverageFactor, resultRunningMean,
+      resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardTrainingEx(
-    cudnnHandle_t handle,
-    cudnnBatchNormMode_t mode,
-    cudnnBatchNormOps_t bnOps,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
 
     const void *alpha, /* alpha[0] = result blend factor */
     const void *beta,  /* beta[0] = dest layer blend factor */
 
-    const cudnnTensorDescriptor_t xDesc,
-    const void *xData,
-    const cudnnTensorDescriptor_t zDesc,
-    const void *zData,
-    const cudnnTensorDescriptor_t yDesc,
-    void *yData,
+    const cudnnTensorDescriptor_t xDesc, const void *xData,
+    const cudnnTensorDescriptor_t zDesc, const void *zData,
+    const cudnnTensorDescriptor_t yDesc, void *yData,
 
-    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-    const void *bnScale,
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
     const void *bnBias,
 
-    double exponentialAverageFactor,
-    void *resultRunningMean,
+    double exponentialAverageFactor, void *resultRunningMean,
     void *resultRunningVariance,
 
-    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
+    /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and
+       backward functions. */
     double epsilon,
 
     /* Optionally save intermediate results from the forward pass here
        - can be reused to speed up backward pass. NULL if unused */
-    void *resultSaveMean,
-    void *resultSaveInvVariance,
+    void *resultSaveMean, void *resultSaveInvVariance,
 
-    cudnnActivationDescriptor_t activationDesc,
-    void *workspace,
-    size_t workSpaceSizeInBytes,
-    void *reserveSpace,
+    cudnnActivationDescriptor_t activationDesc, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, double, void *, void *, double, void *, void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTrainingEx");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, double, void *, void *, double, void *,
+      void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardTrainingEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, exponentialAverageFactor, resultRunningMean, resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance, activationDesc, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData,
+                  yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias,
+                  exponentialAverageFactor, resultRunningMean,
+                  resultRunningVariance, epsilon, resultSaveMean,
+                  resultSaveInvVariance, activationDesc, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
-                                        cudnnBatchNormMode_t mode,
-                                        const void *alpha, /* alpha[0] = result blend factor */
-                                        const void *beta,  /* beta[0] = dest layer blend factor */
-                                        const cudnnTensorDescriptor_t xDesc,
-                                        const void *x, /* NxCxHxW */
-                                        const cudnnTensorDescriptor_t yDesc,
-                                        void *y, /* NxCxHxW */
-                                        const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
-                                        const void *bnScale,
-                                        const void *bnBias,
-                                        const void *estimatedMean,
-                                        const void *estimatedVariance,
-                                        double epsilon) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, const void *, double);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode,
+    const void *alpha, /* alpha[0] = result blend factor */
+    const void *beta,  /* beta[0] = dest layer blend factor */
+    const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */
+    const cudnnTensorDescriptor_t yDesc, void *y,       /* NxCxHxW */
+    const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
+    const void *bnBias, const void *estimatedMean,
+    const void *estimatedVariance, double epsilon) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, const void *, const void *, const void *, double);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, estimatedVariance, epsilon);
+  return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y,
+                  bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean,
+                  estimatedVariance, epsilon);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationBackward(cudnnHandle_t handle,
-                                cudnnBatchNormMode_t mode,
-                                const void *alphaDataDiff,
-                                const void *betaDataDiff,
-                                const void *alphaParamDiff,
-                                const void *betaParamDiff,
-                                const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
-                                const void *x,
-                                const cudnnTensorDescriptor_t dyDesc,
-                                const void *dy,
-                                const cudnnTensorDescriptor_t dxDesc,
-                                void *dx,
-                                /* Shared tensor desc for the 4 tensors below */
-                                const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                const void *bnScale, /* bnBias doesn't affect backpropagation */
-                                /* scale and bias diff are not backpropagated below this layer */
-                                void *dBnScaleResult,
-                                void *dBnBiasResult,
-                                /* Same epsilon as forward pass */
-                                double epsilon,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff,
+    const void *betaDataDiff, const void *alphaParamDiff,
+    const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
+    const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dxDesc, void *dx,
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc,
+    const void *bnScale, /* bnBias doesn't affect backpropagation */
+    /* scale and bias diff are not backpropagated below this layer */
+    void *dBnScaleResult, void *dBnBiasResult,
+    /* Same epsilon as forward pass */
+    double epsilon,
 
-                                /* Optionally cached intermediate results from
-                                   forward pass */
-                                const void *savedMean,
-                                const void *savedInvVariance) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, void *, void *, double, const void *, const void *);
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *,
+      const void *, const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      const void *, void *, void *, double, const void *, const void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, epsilon, savedMean, savedInvVariance);
+  return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff,
+                  betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx,
+                  dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult,
+                  epsilon, savedMean, savedInvVariance);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
-                                  cudnnBatchNormMode_t mode,
-                                  cudnnBatchNormOps_t bnOps,
+cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx(
+    cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps,
 
-                                  const void *alphaDataDiff,
-                                  const void *betaDataDiff,
-                                  const void *alphaParamDiff,
-                                  const void *betaParamDiff,
-                                  const cudnnTensorDescriptor_t xDesc,
-                                  const void *xData,
-                                  const cudnnTensorDescriptor_t yDesc,
-                                  const void *yData,
-                                  const cudnnTensorDescriptor_t dyDesc,
-                                  const void *dyData,
-                                  const cudnnTensorDescriptor_t dzDesc,
-                                  void *dzData,
-                                  const cudnnTensorDescriptor_t dxDesc,
-                                  void *dxData,
+    const void *alphaDataDiff, const void *betaDataDiff,
+    const void *alphaParamDiff, const void *betaParamDiff,
+    const cudnnTensorDescriptor_t xDesc, const void *xData,
+    const cudnnTensorDescriptor_t yDesc, const void *yData,
+    const cudnnTensorDescriptor_t dyDesc, const void *dyData,
+    const cudnnTensorDescriptor_t dzDesc, void *dzData,
+    const cudnnTensorDescriptor_t dxDesc, void *dxData,
 
-                                  /* Shared tensor desc for the 4 tensors below */
-                                  const cudnnTensorDescriptor_t dBnScaleBiasDesc,
-                                  const void *bnScaleData,
-                                  const void *bnBiasData, /* needed if there is activation */
-                                  void *dBnScaleData,
-                                  void *dBnBiasData,
-                                  double epsilon, /* Same epsilon as forward pass */
+    /* Shared tensor desc for the 4 tensors below */
+    const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData,
+    const void *bnBiasData, /* needed if there is activation */
+    void *dBnScaleData, void *dBnBiasData,
+    double epsilon, /* Same epsilon as forward pass */
 
-                                  /* Optionally cached intermediate results from
-                                     forward pass */
-                                  const void *savedMean,
-                                  const void *savedInvVariance,
-                                  cudnnActivationDescriptor_t activationDesc,
-                                  void *workSpace,
-                                  size_t workSpaceSizeInBytes,
-                                  void *reserveSpace,
-                                  size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, const void *, const void *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, double, const void *, const void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackwardEx");
+    /* Optionally cached intermediate results from
+       forward pass */
+    const void *savedMean, const void *savedInvVariance,
+    cudnnActivationDescriptor_t activationDesc, void *workSpace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *,
+      const void *, const void *, const void *, const cudnnTensorDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, const void *, const void *, void *,
+      void *, double, const void *, const void *, cudnnActivationDescriptor_t,
+      void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnBatchNormalizationBackwardEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(
+      handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff,
+      betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData,
+      dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData,
+      dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc,
+      workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(
+    cudnnSpatialTransformerDescriptor_t *stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateSpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
-                                       cudnnSamplerType_t samplerType,
-                                       cudnnDataType_t dataType,
-                                       const int nbDims,
-                                       const int dimA[]) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, const int, const int []);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(
+    cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType,
+    cudnnDataType_t dataType, const int nbDims, const int dimA[]) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t,
+      const int, const int[]);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetSpatialTransformerNdDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc, samplerType, dataType, nbDims, dimA);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
+cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(
+    cudnnSpatialTransformerDescriptor_t stDesc) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroySpatialTransformerDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(stDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
-                                   const cudnnSpatialTransformerDescriptor_t stDesc,
-                                   const void *theta,
-                                   void *grid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *theta, void *grid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, theta, grid);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
-                                    const cudnnSpatialTransformerDescriptor_t stDesc,
-                                    const void *dgrid,
-                                    void *dtheta) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(
+    cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *dgrid, void *dtheta) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *,
+      void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSpatialTfGridGeneratorBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, dgrid, dtheta);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
-                             cudnnSpatialTransformerDescriptor_t stDesc,
-                             const void *alpha,
-                             const cudnnTensorDescriptor_t xDesc,
-                             const void *x,
-                             const void *grid,
-                             const void *beta,
-                             cudnnTensorDescriptor_t yDesc,
-                             void *y) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, cudnnTensorDescriptor_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc,
+    void *y) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      cudnnTensorDescriptor_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerForward");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
-                              cudnnSpatialTransformerDescriptor_t stDesc,
-                              const void *alpha,
-                              const cudnnTensorDescriptor_t xDesc,
-                              const void *x,
-                              const void *beta,
-                              const cudnnTensorDescriptor_t dxDesc,
-                              void *dx,
-                              const void *alphaDgrid,
-                              const cudnnTensorDescriptor_t dyDesc,
-                              const void *dy,
-                              const void *grid,
-                              const void *betaDgrid,
-                              void *dgrid) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const cudnnTensorDescriptor_t, void *, const void *, const cudnnTensorDescriptor_t, const void *, const void *, const void *, void *);
+cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(
+    cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc,
+    const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x,
+    const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx,
+    const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc,
+    const void *dy, const void *grid, const void *betaDgrid, void *dgrid) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *,
+      const cudnnTensorDescriptor_t, void *, const void *,
+      const cudnnTensorDescriptor_t, const void *, const void *, const void *,
+      void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSpatialTfSamplerBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, dyDesc, dy, grid, betaDgrid, dgrid);
+  return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid,
+                  dyDesc, dy, grid, betaDgrid, dgrid);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
@@ -1788,99 +1862,95 @@ cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle,
+                                                    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetStatesSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(
+    cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutGetReserveSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(xdesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                          cudnnHandle_t handle,
-                          float dropout,
-                          void *states,
-                          size_t stateSizeInBytes,
-                          unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                              cudnnHandle_t handle,
-                              float dropout,
-                              void *states,
-                              size_t stateSizeInBytes,
-                              unsigned long long seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float, void *, size_t, unsigned long long);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout,
+    void *states, size_t stateSizeInBytes, unsigned long long seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float, void *, size_t, unsigned long long);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
-                          cudnnHandle_t handle,
-                          float *dropout,
-                          void **states,
-                          unsigned long long *seed) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, float *, void **, unsigned long long *);
+cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout,
+    void **states, unsigned long long *seed) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t,
+                                   float *, void **, unsigned long long *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetDropoutDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(dropoutDesc, handle, dropout, states, seed);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutForward(cudnnHandle_t handle,
-                    const cudnnDropoutDescriptor_t dropoutDesc,
-                    const cudnnTensorDescriptor_t xdesc,
-                    const void *x,
-                    const cudnnTensorDescriptor_t ydesc,
-                    void *y,
-                    void *reserveSpace,
-                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t xdesc, const void *x,
+    const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDropoutBackward(cudnnHandle_t handle,
-                     const cudnnDropoutDescriptor_t dropoutDesc,
-                     const cudnnTensorDescriptor_t dydesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t dxdesc,
-                     void *dx,
-                     void *reserveSpace,
-                     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnDropoutDescriptor_t, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(
+    cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc,
+    const cudnnTensorDescriptor_t dydesc, const void *dy,
+    const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnDropoutDescriptor_t,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDropoutBackward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
@@ -1888,132 +1958,130 @@ cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor(cudnnHandle_t handle,
-                      cudnnRNNDescriptor_t rnnDesc,
-                      const int hiddenSize,
-                      const int numLayers,
-                      cudnnDropoutDescriptor_t dropoutDesc,
-                      cudnnRNNInputMode_t inputMode,
-                      cudnnDirectionMode_t direction,
-                      cudnnRNNMode_t mode,
-                      cudnnRNNAlgo_t algo,
-                      cudnnDataType_t mathPrec) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers, cudnnDropoutDescriptor_t dropoutDesc,
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, mathPrec);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, mathPrec);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNDescriptor(cudnnHandle_t handle,
-                      cudnnRNNDescriptor_t rnnDesc,
-                      int *hiddenSize,
-                      int *numLayers,
-                      cudnnDropoutDescriptor_t *dropoutDesc,
-                      cudnnRNNInputMode_t *inputMode,
-                      cudnnDirectionMode_t *direction,
-                      cudnnRNNMode_t *mode,
-                      cudnnRNNAlgo_t *algo,
-                      cudnnDataType_t *mathPrec) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize,
+    int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc,
+    cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction,
+    cudnnRNNMode_t *mode, cudnnRNNAlgo_t *algo, cudnnDataType_t *mathPrec) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *,
+      cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *,
+      cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, mathPrec);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, mathPrec);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType(
+    cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNMatrixMathType");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, mType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t biasMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc,
+                                              cudnnRNNBiasMode_t biasMode) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNBiasMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, biasMode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t *biasMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc,
+                                              cudnnRNNBiasMode_t *biasMode) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBiasMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, biasMode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNSetClip(cudnnHandle_t handle,
-                cudnnRNNDescriptor_t rnnDesc,
-                cudnnRNNClipMode_t clipMode,
-                cudnnNanPropagation_t clipNanOpt,
-                double lclip,
-                double rclip) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, cudnnNanPropagation_t, double, double);
+cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle,
+                                          cudnnRNNDescriptor_t rnnDesc,
+                                          cudnnRNNClipMode_t clipMode,
+                                          cudnnNanPropagation_t clipNanOpt,
+                                          double lclip, double rclip) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t,
+      cudnnNanPropagation_t, double, double);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNSetClip");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNGetClip(cudnnHandle_t handle,
-                cudnnRNNDescriptor_t rnnDesc,
-                cudnnRNNClipMode_t *clipMode,
-                cudnnNanPropagation_t *clipNanOpt,
-                double *lclip,
-                double *rclip) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, cudnnNanPropagation_t *, double *, double *);
+cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle,
+                                          cudnnRNNDescriptor_t rnnDesc,
+                                          cudnnRNNClipMode_t *clipMode,
+                                          cudnnNanPropagation_t *clipNanOpt,
+                                          double *lclip, double *rclip) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *,
+      cudnnNanPropagation_t *, double *, double *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNGetClip");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNProjectionLayers(cudnnHandle_t handle,
-                            cudnnRNNDescriptor_t rnnDesc,
-                            const int recProjSize,
-                            const int outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
+cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+                            const int recProjSize, const int outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNProjectionLayers(cudnnHandle_t handle,
-                            const cudnnRNNDescriptor_t rnnDesc,
-                            int *recProjSize,
-                            int *outProjSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize,
+    int *outProjSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNProjectionLayers");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, recProjSize, outProjSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
-                             const int minibatch,
-                             const cudnnDataType_t dataType,
-                             cudnnPersistentRNNPlan_t *plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, const cudnnDataType_t, cudnnPersistentRNNPlan_t *);
+cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, const int minibatch,
+    const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int,
+                                               const cudnnDataType_t,
+                                               cudnnPersistentRNNPlan_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreatePersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, minibatch, dataType, plan);
@@ -2021,209 +2089,206 @@ cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnPersistentRNNPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan(
+    cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnPersistentRNNPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetPersistentRNNPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, plan);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNWorkspaceSize(cudnnHandle_t handle,
-                         const cudnnRNNDescriptor_t rnnDesc,
-                         const int seqLength,
-                         const cudnnTensorDescriptor_t *xDesc,
-                         size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNTrainingReserveSize(cudnnHandle_t handle,
-                               const cudnnRNNDescriptor_t rnnDesc,
-                               const int seqLength,
-                               const cudnnTensorDescriptor_t *xDesc,
-                               size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc,
+    size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNTrainingReserveSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNParamsSize(cudnnHandle_t handle,
-                      const cudnnRNNDescriptor_t rnnDesc,
-                      const cudnnTensorDescriptor_t xDesc,
-                      size_t *sizeInBytes,
+cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                      const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes,
                       cudnnDataType_t dataType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, size_t *, cudnnDataType_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t,
+      size_t *, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNParamsSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNLinLayerMatrixParams(cudnnHandle_t handle,
-                                const cudnnRNNDescriptor_t rnnDesc,
-                                const int pseudoLayer,
-                                const cudnnTensorDescriptor_t xDesc,
-                                const cudnnFilterDescriptor_t wDesc,
-                                const void *w,
-                                const int linLayerID,
-                                cudnnFilterDescriptor_t linLayerMatDesc,
-                                void **linLayerMat) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerMatrixParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerMatDesc, linLayerMat);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerMatDesc, linLayerMat);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNLinLayerBiasParams(cudnnHandle_t handle,
-                              const cudnnRNNDescriptor_t rnnDesc,
-                              const int pseudoLayer,
-                              const cudnnTensorDescriptor_t xDesc,
-                              const cudnnFilterDescriptor_t wDesc,
-                              const void *w,
-                              const int linLayerID,
-                              cudnnFilterDescriptor_t linLayerBiasDesc,
-                              void **linLayerBias) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, const void *, const int, cudnnFilterDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int pseudoLayer, const cudnnTensorDescriptor_t xDesc,
+    const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID,
+    cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t,
+      const void *, const int, cudnnFilterDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNLinLayerBiasParams");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, linLayerBiasDesc, linLayerBias);
+  return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID,
+                  linLayerBiasDesc, linLayerBias);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardInference(cudnnHandle_t handle,
-                         const cudnnRNNDescriptor_t rnnDesc,
-                         const int seqLength,
-                         const cudnnTensorDescriptor_t *xDesc,
-                         const void *x,
-                         const cudnnTensorDescriptor_t hxDesc,
-                         const void *hx,
-                         const cudnnTensorDescriptor_t cxDesc,
-                         const void *cx,
-                         const cudnnFilterDescriptor_t wDesc,
-                         const void *w,
-                         const cudnnTensorDescriptor_t *yDesc,
-                         void *y,
-                         const cudnnTensorDescriptor_t hyDesc,
-                         void *hy,
-                         const cudnnTensorDescriptor_t cyDesc,
-                         void *cy,
-                         void *workspace,
-                         size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInference");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardTraining(cudnnHandle_t handle,
-                        const cudnnRNNDescriptor_t rnnDesc,
-                        const int seqLength,
-                        const cudnnTensorDescriptor_t *xDesc,
-                        const void *x,
-                        const cudnnTensorDescriptor_t hxDesc,
-                        const void *hx,
-                        const cudnnTensorDescriptor_t cxDesc,
-                        const void *cx,
-                        const cudnnFilterDescriptor_t wDesc,
-                        const void *w,
-                        const cudnnTensorDescriptor_t *yDesc,
-                        void *y,
-                        const cudnnTensorDescriptor_t hyDesc,
-                        void *hy,
-                        const cudnnTensorDescriptor_t cyDesc,
-                        void *cy,
-                        void *workspace,
-                        size_t workSpaceSizeInBytes,
-                        void *reserveSpace,
-                        size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTraining");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardData(cudnnHandle_t handle,
-                     const cudnnRNNDescriptor_t rnnDesc,
-                     const int seqLength,
-                     const cudnnTensorDescriptor_t *yDesc,
-                     const void *y,
-                     const cudnnTensorDescriptor_t *dyDesc,
-                     const void *dy,
-                     const cudnnTensorDescriptor_t dhyDesc,
-                     const void *dhy,
-                     const cudnnTensorDescriptor_t dcyDesc,
-                     const void *dcy,
-                     const cudnnFilterDescriptor_t wDesc,
-                     const void *w,
-                     const cudnnTensorDescriptor_t hxDesc,
-                     const void *hx,
-                     const cudnnTensorDescriptor_t cxDesc,
-                     const void *cx,
-                     const cudnnTensorDescriptor_t *dxDesc,
-                     void *dx,
-                     const cudnnTensorDescriptor_t dhxDesc,
-                     void *dhx,
-                     const cudnnTensorDescriptor_t dcxDesc,
-                     void *dcx,
-                     void *workspace,
-                     size_t workSpaceSizeInBytes,
-                     void *reserveSpace,
-                     size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+                     const int seqLength, const cudnnTensorDescriptor_t *yDesc,
+                     const void *y, const cudnnTensorDescriptor_t *dyDesc,
+                     const void *dy, const cudnnTensorDescriptor_t dhyDesc,
+                     const void *dhy, const cudnnTensorDescriptor_t dcyDesc,
+                     const void *dcy, const cudnnFilterDescriptor_t wDesc,
+                     const void *w, const cudnnTensorDescriptor_t hxDesc,
+                     const void *hx, const cudnnTensorDescriptor_t cxDesc,
+                     const void *cx, const cudnnTensorDescriptor_t *dxDesc,
+                     void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+                     const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+                     void *workspace, size_t workSpaceSizeInBytes,
+                     void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardWeights(cudnnHandle_t handle,
-                        const cudnnRNNDescriptor_t rnnDesc,
-                        const int seqLength,
-                        const cudnnTensorDescriptor_t *xDesc,
-                        const void *x,
-                        const cudnnTensorDescriptor_t hxDesc,
-                        const void *hx,
-                        const cudnnTensorDescriptor_t *yDesc,
-                        const void *y,
-                        const void *workspace,
-                        size_t workSpaceSizeInBytes,
-                        const cudnnFilterDescriptor_t dwDesc,
-                        void *dw,
-                        const void *reserveSpace,
-                        size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    const void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode(
+    cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t paddingMode) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNPaddingMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, paddingMode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNPaddingMode_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode(
+    cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *paddingMode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t,
+                                               cudnnRNNPaddingMode_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNPaddingMode");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDesc, paddingMode);
@@ -2231,7 +2296,7 @@ cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNPaddingMode_t *padd
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDataDesc);
@@ -2239,338 +2304,352 @@ cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(rnnDataDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
-                          cudnnDataType_t dataType,
-                          cudnnRNNDataLayout_t layout,
-                          int maxSeqLength,
-                          int batchSize,
-                          int vectorSize,
-                          const int seqLengthArray[], /* length of each sequence in the batch */
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, int, const int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor(
+    cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType,
+    cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize,
+    int vectorSize,
+    const int seqLengthArray[], /* length of each sequence in the batch */
+    void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int,
+      int, const int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, paddingFill);
+  return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize,
+                  vectorSize, seqLengthArray, paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
-                          cudnnDataType_t *dataType,
-                          cudnnRNNDataLayout_t *layout,
-                          int *maxSeqLength,
-                          int *batchSize,
-                          int *vectorSize,
-                          int arrayLengthRequested,
-                          int seqLengthArray[],
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, int *, int *, int *, int, int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor(
+    cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t *dataType,
+    cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize,
+    int *vectorSize, int arrayLengthRequested, int seqLengthArray[],
+    void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *,
+      int *, int *, int *, int, int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, vectorSize, arrayLengthRequested, seqLengthArray, paddingFill);
+  return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize,
+                  vectorSize, arrayLengthRequested, seqLengthArray,
+                  paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardTrainingEx(cudnnHandle_t handle,
-                          const cudnnRNNDescriptor_t rnnDesc,
-                          const cudnnRNNDataDescriptor_t xDesc,
-                          const void *x,
-                          const cudnnTensorDescriptor_t hxDesc,
-                          const void *hx,
-                          const cudnnTensorDescriptor_t cxDesc,
-                          const void *cx,
-                          const cudnnFilterDescriptor_t wDesc,
-                          const void *w,
-                          const cudnnRNNDataDescriptor_t yDesc,
-                          void *y,
-                          const cudnnTensorDescriptor_t hyDesc,
-                          void *hy,
-                          const cudnnTensorDescriptor_t cyDesc,
-                          void *cy,
-                          const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
-                          const void *keys,                     /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
-                          void *cAttn,                          /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
-                          void *iAttn,                          /* reserved, should pass NULL */
-                          const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
-                          void *queries,                        /* reserved, should pass NULL */
-                          void *workSpace,
-                          size_t workSpaceSizeInBytes,
-                          void *reserveSpace,
-                          size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnRNNDataDescriptor_t yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy,
+    const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
+    const void *keys,                     /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
+    void *cAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
+    void *iAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
+    void *queries,                        /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t,
+      void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardTrainingEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w,
+                  yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn,
+                  iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNForwardInferenceEx(cudnnHandle_t handle,
-                           const cudnnRNNDescriptor_t rnnDesc,
-                           const cudnnRNNDataDescriptor_t xDesc,
-                           const void *x,
-                           const cudnnTensorDescriptor_t hxDesc,
-                           const void *hx,
-                           const cudnnTensorDescriptor_t cxDesc,
-                           const void *cx,
-                           const cudnnFilterDescriptor_t wDesc,
-                           const void *w,
-                           const cudnnRNNDataDescriptor_t yDesc,
-                           void *y,
-                           const cudnnTensorDescriptor_t hyDesc,
-                           void *hy,
-                           const cudnnTensorDescriptor_t cyDesc,
-                           void *cy,
-                           const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
-                           const void *keys,                     /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
-                           void *cAttn,                          /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
-                           void *iAttn,                          /* reserved, should pass NULL */
-                           const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
-                           void *queries,                        /* reserved, should pass NULL */
-                           void *workSpace,
-                           size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnRNNDataDescriptor_t yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy,
+    const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
+    const void *keys,                     /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
+    void *cAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
+    void *iAttn,                          /* reserved, should pass NULL */
+    const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
+    void *queries,                        /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t,
+      void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNForwardInferenceEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w,
+                  yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn,
+                  iDesc, iAttn, qDesc, queries, workSpace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardDataEx(cudnnHandle_t handle,
-                       const cudnnRNNDescriptor_t rnnDesc,
-                       const cudnnRNNDataDescriptor_t yDesc,
-                       const void *y,
-                       const cudnnRNNDataDescriptor_t dyDesc,
-                       const void *dy,
-                       const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
-                       const void *dcAttn,                    /* reserved, should pass NULL */
-                       const cudnnTensorDescriptor_t dhyDesc,
-                       const void *dhy,
-                       const cudnnTensorDescriptor_t dcyDesc,
-                       const void *dcy,
-                       const cudnnFilterDescriptor_t wDesc,
-                       const void *w,
-                       const cudnnTensorDescriptor_t hxDesc,
-                       const void *hx,
-                       const cudnnTensorDescriptor_t cxDesc,
-                       const void *cx,
-                       const cudnnRNNDataDescriptor_t dxDesc,
-                       void *dx,
-                       const cudnnTensorDescriptor_t dhxDesc,
-                       void *dhx,
-                       const cudnnTensorDescriptor_t dcxDesc,
-                       void *dcx,
-                       const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
-                       void *dkeys,                           /* reserved, should pass NULL */
-                       void *workSpace,
-                       size_t workSpaceSizeInBytes,
-                       void *reserveSpace,
-                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t yDesc, const void *y,
+    const cudnnRNNDataDescriptor_t dyDesc, const void *dy,
+    const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
+    const void *dcAttn,                    /* reserved, should pass NULL */
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnRNNDataDescriptor_t dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx,
+    const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
+    void *dkeys,                           /* reserved, should pass NULL */
+    void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *,
+      const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardDataEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn,
+                  dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx,
+                  dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys,
+                  workSpace, workSpaceSizeInBytes, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRNNBackwardWeightsEx(cudnnHandle_t handle,
-                          const cudnnRNNDescriptor_t rnnDesc,
-                          const cudnnRNNDataDescriptor_t xDesc,
-                          const void *x,
-                          const cudnnTensorDescriptor_t hxDesc,
-                          const void *hx,
-                          const cudnnRNNDataDescriptor_t yDesc,
-                          const void *y,
-                          void *workSpace,
-                          size_t workSpaceSizeInBytes,
-                          const cudnnFilterDescriptor_t dwDesc,
-                          void *dw,
-                          void *reserveSpace,
-                          size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnRNNDataDescriptor_t, const void *, void *, size_t, const cudnnFilterDescriptor_t, void *, void *, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const cudnnRNNDataDescriptor_t xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace,
+    size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw,
+    void *reserveSpace, size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t,
+      const void *, const cudnnTensorDescriptor_t, const void *,
+      const cudnnRNNDataDescriptor_t, const void *, void *, size_t,
+      const cudnnFilterDescriptor_t, void *, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRNNBackwardWeightsEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace,
+                  workSpaceSizeInBytes, dwDesc, dw, reserveSpace,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNAlgorithmDescriptor(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNAlgorithmDescriptor(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardInferenceAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardInferenceAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNForwardInferenceAlgorithmEx(cudnnHandle_t handle,
-                                        const cudnnRNNDescriptor_t rnnDesc,
-                                        const int seqLength,
-                                        const cudnnTensorDescriptor_t *xDesc,
-                                        const void *x,
-                                        const cudnnTensorDescriptor_t hxDesc,
-                                        const void *hx,
-                                        const cudnnTensorDescriptor_t cxDesc,
-                                        const void *cx,
-                                        const cudnnFilterDescriptor_t wDesc,
-                                        const void *w,
-                                        const cudnnTensorDescriptor_t *yDesc,
-                                        void *y,
-                                        const cudnnTensorDescriptor_t hyDesc,
-                                        void *hy,
-                                        const cudnnTensorDescriptor_t cyDesc,
-                                        void *cy,
-                                        const float findIntensity,
-                                        const int requestedAlgoCount,
-                                        int *returnedAlgoCount,
-                                        cudnnAlgorithmPerformance_t *perfResults,
-                                        void *workspace,
-                                        size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardInferenceAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardInferenceAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNForwardTrainingAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNForwardTrainingAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNForwardTrainingAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnRNNDescriptor_t rnnDesc,
-                                       const int seqLength,
-                                       const cudnnTensorDescriptor_t *xDesc,
-                                       const void *x,
-                                       const cudnnTensorDescriptor_t hxDesc,
-                                       const void *hx,
-                                       const cudnnTensorDescriptor_t cxDesc,
-                                       const void *cx,
-                                       const cudnnFilterDescriptor_t wDesc,
-                                       const void *w,
-                                       const cudnnTensorDescriptor_t *yDesc,
-                                       void *y,
-                                       const cudnnTensorDescriptor_t hyDesc,
-                                       void *hy,
-                                       const cudnnTensorDescriptor_t cyDesc,
-                                       void *cy,
-                                       const float findIntensity,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnAlgorithmPerformance_t *perfResults,
-                                       void *workspace,
-                                       size_t workSpaceSizeInBytes,
-                                       void *reserveSpace,
-                                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNForwardTrainingAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t *yDesc, void *y,
+    const cudnnTensorDescriptor_t hyDesc, void *hy,
+    const cudnnTensorDescriptor_t cyDesc, void *cy, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNForwardTrainingAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx,
+                  wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardDataAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardDataAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNBackwardDataAlgorithmEx(cudnnHandle_t handle,
-                                    const cudnnRNNDescriptor_t rnnDesc,
-                                    const int seqLength,
-                                    const cudnnTensorDescriptor_t *yDesc,
-                                    const void *y,
-                                    const cudnnTensorDescriptor_t *dyDesc,
-                                    const void *dy,
-                                    const cudnnTensorDescriptor_t dhyDesc,
-                                    const void *dhy,
-                                    const cudnnTensorDescriptor_t dcyDesc,
-                                    const void *dcy,
-                                    const cudnnFilterDescriptor_t wDesc,
-                                    const void *w,
-                                    const cudnnTensorDescriptor_t hxDesc,
-                                    const void *hx,
-                                    const cudnnTensorDescriptor_t cxDesc,
-                                    const void *cx,
-                                    const cudnnTensorDescriptor_t *dxDesc,
-                                    void *dx,
-                                    const cudnnTensorDescriptor_t dhxDesc,
-                                    void *dhx,
-                                    const cudnnTensorDescriptor_t dcxDesc,
-                                    void *dcx,
-                                    const float findIntensity,
-                                    const int requestedAlgoCount,
-                                    int *returnedAlgoCount,
-                                    cudnnAlgorithmPerformance_t *perfResults,
-                                    void *workspace,
-                                    size_t workSpaceSizeInBytes,
-                                    void *reserveSpace,
-                                    size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnFilterDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardDataAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const cudnnTensorDescriptor_t *dyDesc, const void *dy,
+    const cudnnTensorDescriptor_t dhyDesc, const void *dhy,
+    const cudnnTensorDescriptor_t dcyDesc, const void *dcy,
+    const cudnnFilterDescriptor_t wDesc, const void *w,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t cxDesc, const void *cx,
+    const cudnnTensorDescriptor_t *dxDesc, void *dx,
+    const cudnnTensorDescriptor_t dhxDesc, void *dhx,
+    const cudnnTensorDescriptor_t dcxDesc, void *dcx, const float findIntensity,
+    const int requestedAlgoCount, int *returnedAlgoCount,
+    cudnnAlgorithmPerformance_t *perfResults, void *workspace,
+    size_t workSpaceSizeInBytes, void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnFilterDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t,
+      void *, const cudnnTensorDescriptor_t, void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, void *, size_t, void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardDataAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc,
+                  dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc,
+                  dx, dhxDesc, dhx, dcxDesc, dcx, findIntensity,
+                  requestedAlgoCount, returnedAlgoCount, perfResults, workspace,
+                  workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
+cudnnStatus_t CUDNNWINAPI cudnnGetRNNBackwardWeightsAlgorithmMaxCount(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetRNNBackwardWeightsAlgorithmMaxCount");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, rnnDesc, count);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnHandle_t handle,
-                                       const cudnnRNNDescriptor_t rnnDesc,
-                                       const int seqLength,
-                                       const cudnnTensorDescriptor_t *xDesc,
-                                       const void *x,
-                                       const cudnnTensorDescriptor_t hxDesc,
-                                       const void *hx,
-                                       const cudnnTensorDescriptor_t *yDesc,
-                                       const void *y,
-                                       const float findIntensity,
-                                       const int requestedAlgoCount,
-                                       int *returnedAlgoCount,
-                                       cudnnAlgorithmPerformance_t *perfResults,
-                                       const void *workspace,
-                                       size_t workSpaceSizeInBytes,
-                                       const cudnnFilterDescriptor_t dwDesc,
-                                       void *dw,
-                                       const void *reserveSpace,
-                                       size_t reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnRNNDescriptor_t, const int, const cudnnTensorDescriptor_t *, const void *, const cudnnTensorDescriptor_t, const void *, const cudnnTensorDescriptor_t *, const void *, const float, const int, int *, cudnnAlgorithmPerformance_t *, const void *, size_t, const cudnnFilterDescriptor_t, void *, const void *, size_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
+cudnnStatus_t CUDNNWINAPI cudnnFindRNNBackwardWeightsAlgorithmEx(
+    cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc,
+    const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x,
+    const cudnnTensorDescriptor_t hxDesc, const void *hx,
+    const cudnnTensorDescriptor_t *yDesc, const void *y,
+    const float findIntensity, const int requestedAlgoCount,
+    int *returnedAlgoCount, cudnnAlgorithmPerformance_t *perfResults,
+    const void *workspace, size_t workSpaceSizeInBytes,
+    const cudnnFilterDescriptor_t dwDesc, void *dw, const void *reserveSpace,
+    size_t reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnRNNDescriptor_t, const int,
+      const cudnnTensorDescriptor_t *, const void *,
+      const cudnnTensorDescriptor_t, const void *,
+      const cudnnTensorDescriptor_t *, const void *, const float, const int,
+      int *, cudnnAlgorithmPerformance_t *, const void *, size_t,
+      const cudnnFilterDescriptor_t, void *, const void *, size_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnFindRNNBackwardWeightsAlgorithmEx");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, findIntensity, requestedAlgoCount, returnedAlgoCount, perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, reserveSpaceSizeInBytes);
+  return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y,
+                  findIntensity, requestedAlgoCount, returnedAlgoCount,
+                  perfResults, workspace, workSpaceSizeInBytes, dwDesc, dw,
+                  reserveSpace, reserveSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSeqDataDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateSeqDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(seqDataDesc);
@@ -2578,47 +2657,43 @@ cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSeqDataDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroySeqDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(seqDataDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc,
-                          cudnnDataType_t dataType,
-                          int nbDims,
-                          const int dimA[],
-                          const cudnnSeqDataAxis_t axes[],
-                          size_t seqLengthArraySize,
-                          const int seqLengthArray[],
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnSeqDataDescriptor_t, cudnnDataType_t, int, const int [], const cudnnSeqDataAxis_t [], size_t, const int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnSetSeqDataDescriptor(
+    cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims,
+    const int dimA[], const cudnnSeqDataAxis_t axes[],
+    size_t seqLengthArraySize, const int seqLengthArray[], void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnSeqDataDescriptor_t, cudnnDataType_t, int, const int[],
+      const cudnnSeqDataAxis_t[], size_t, const int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetSeqDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(seqDataDesc, dataType, nbDims, dimA, axes, seqLengthArraySize, seqLengthArray, paddingFill);
+  return func_ptr(seqDataDesc, dataType, nbDims, dimA, axes, seqLengthArraySize,
+                  seqLengthArray, paddingFill);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc,
-                          cudnnDataType_t *dataType,
-                          int *nbDims,
-                          int nbDimsRequested,
-                          int dimA[],
-                          cudnnSeqDataAxis_t axes[],
-                          size_t *seqLengthArraySize,
-                          size_t seqLengthSizeRequested,
-                          int seqLengthArray[],
-                          void *paddingFill) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnSeqDataDescriptor_t, cudnnDataType_t *, int *, int, int [], cudnnSeqDataAxis_t [], size_t *, size_t, int [], void *);
+cudnnStatus_t CUDNNWINAPI cudnnGetSeqDataDescriptor(
+    const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t *dataType,
+    int *nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[],
+    size_t *seqLengthArraySize, size_t seqLengthSizeRequested,
+    int seqLengthArray[], void *paddingFill) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnSeqDataDescriptor_t, cudnnDataType_t *, int *, int, int[],
+      cudnnSeqDataAxis_t[], size_t *, size_t, int[], void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetSeqDataDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(seqDataDesc, dataType, nbDims, nbDimsRequested, dimA, axes, seqLengthArraySize, seqLengthSizeRequested, seqLengthArray, paddingFill);
+  return func_ptr(seqDataDesc, dataType, nbDims, nbDimsRequested, dimA, axes,
+                  seqLengthArraySize, seqLengthSizeRequested, seqLengthArray,
+                  paddingFill);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAttnDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAttnDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(attnDesc);
@@ -2626,217 +2701,198 @@ cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc) {
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAttnDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAttnDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(attnDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
-                       cudnnAttnQueryMap_t queryMap,
-                       int nHeads,
-                       double smScaler,
-                       cudnnDataType_t dataType,
-                       cudnnDataType_t computePrec,
-                       cudnnMathType_t mathType,
-                       cudnnDropoutDescriptor_t attnDropoutDesc,
-                       cudnnDropoutDescriptor_t postDropoutDesc,
-                       int qSize,
-                       int kSize,
-                       int vSize,
-                       int qProjSize,
-                       int kProjSize,
-                       int vProjSize,
-                       int oProjSize,
-                       int qoMaxSeqLength,
-                       int kvMaxSeqLength,
-                       int maxBatchSize,
-                       int maxBeamSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAttnDescriptor_t, cudnnAttnQueryMap_t, int, double, cudnnDataType_t, cudnnDataType_t, cudnnMathType_t, cudnnDropoutDescriptor_t, cudnnDropoutDescriptor_t, int, int, int, int, int, int, int, int, int, int, int);
+cudnnStatus_t CUDNNWINAPI cudnnSetAttnDescriptor(
+    cudnnAttnDescriptor_t attnDesc, cudnnAttnQueryMap_t queryMap, int nHeads,
+    double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec,
+    cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc,
+    cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize,
+    int qProjSize, int kProjSize, int vProjSize, int oProjSize,
+    int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnAttnDescriptor_t, cudnnAttnQueryMap_t, int, double, cudnnDataType_t,
+      cudnnDataType_t, cudnnMathType_t, cudnnDropoutDescriptor_t,
+      cudnnDropoutDescriptor_t, int, int, int, int, int, int, int, int, int,
+      int, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAttnDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(attnDesc, queryMap, nHeads, smScaler, dataType, computePrec, mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, vSize, qProjSize, kProjSize, vProjSize, oProjSize, qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize);
+  return func_ptr(attnDesc, queryMap, nHeads, smScaler, dataType, computePrec,
+                  mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize,
+                  vSize, qProjSize, kProjSize, vProjSize, oProjSize,
+                  qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
-                       cudnnAttnQueryMap_t *queryMap,
-                       int *nHeads,
-                       double *smScaler,
-                       cudnnDataType_t *dataType,
-                       cudnnDataType_t *computePrec,
-                       cudnnMathType_t *mathType,
-                       cudnnDropoutDescriptor_t *attnDropoutDesc,
-                       cudnnDropoutDescriptor_t *postDropoutDesc,
-                       int *qSize,
-                       int *kSize,
-                       int *vSize,
-                       int *qProjSize,
-                       int *kProjSize,
-                       int *vProjSize,
-                       int *oProjSize,
-                       int *qoMaxSeqLength,
-                       int *kvMaxSeqLength,
-                       int *maxBatchSize,
-                       int *maxBeamSize) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAttnDescriptor_t, cudnnAttnQueryMap_t *, int *, double *, cudnnDataType_t *, cudnnDataType_t *, cudnnMathType_t *, cudnnDropoutDescriptor_t *, cudnnDropoutDescriptor_t *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAttnDescriptor(
+    cudnnAttnDescriptor_t attnDesc, cudnnAttnQueryMap_t *queryMap, int *nHeads,
+    double *smScaler, cudnnDataType_t *dataType, cudnnDataType_t *computePrec,
+    cudnnMathType_t *mathType, cudnnDropoutDescriptor_t *attnDropoutDesc,
+    cudnnDropoutDescriptor_t *postDropoutDesc, int *qSize, int *kSize,
+    int *vSize, int *qProjSize, int *kProjSize, int *vProjSize, int *oProjSize,
+    int *qoMaxSeqLength, int *kvMaxSeqLength, int *maxBatchSize,
+    int *maxBeamSize) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnAttnDescriptor_t, cudnnAttnQueryMap_t *, int *, double *,
+      cudnnDataType_t *, cudnnDataType_t *, cudnnMathType_t *,
+      cudnnDropoutDescriptor_t *, cudnnDropoutDescriptor_t *, int *, int *,
+      int *, int *, int *, int *, int *, int *, int *, int *, int *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAttnDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(attnDesc, queryMap, nHeads, smScaler, dataType, computePrec, mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, vSize, qProjSize, kProjSize, vProjSize, oProjSize, qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize);
+  return func_ptr(attnDesc, queryMap, nHeads, smScaler, dataType, computePrec,
+                  mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize,
+                  vSize, qProjSize, kProjSize, vProjSize, oProjSize,
+                  qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetMultiHeadAttnBuffers(cudnnHandle_t handle,
-                             const cudnnAttnDescriptor_t attnDesc,
-                             size_t *weightSizeInBytes,
-                             size_t *workSpaceSizeInBytes,
-                             size_t *reserveSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnAttnDescriptor_t, size_t *, size_t *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnBuffers(
+    cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc,
+    size_t *weightSizeInBytes, size_t *workSpaceSizeInBytes,
+    size_t *reserveSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnAttnDescriptor_t, size_t *, size_t *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetMultiHeadAttnBuffers");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, attnDesc, weightSizeInBytes, workSpaceSizeInBytes, reserveSpaceSizeInBytes);
+  return func_ptr(handle, attnDesc, weightSizeInBytes, workSpaceSizeInBytes,
+                  reserveSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetMultiHeadAttnWeights(cudnnHandle_t handle,
-                             const cudnnAttnDescriptor_t attnDesc,
-                             cudnnMultiHeadAttnWeightKind_t wKind,
-                             size_t weightSizeInBytes,
-                             const void *w,
-                             cudnnTensorDescriptor_t wDesc,
-                             void **wAddr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnMultiHeadAttnWeightKind_t, size_t, const void *, cudnnTensorDescriptor_t, void **);
+cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnWeights(
+    cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc,
+    cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes,
+    const void *w, cudnnTensorDescriptor_t wDesc, void **wAddr) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnAttnDescriptor_t,
+      cudnnMultiHeadAttnWeightKind_t, size_t, const void *,
+      cudnnTensorDescriptor_t, void **);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetMultiHeadAttnWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, attnDesc, wKind, weightSizeInBytes, w, wDesc, wAddr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnMultiHeadAttnForward(cudnnHandle_t handle,
-                          const cudnnAttnDescriptor_t attnDesc,
-                          int currIdx,
-                          const int *loWinIdx,
-                          const int *hiWinIdx,
-                          const int *seqLengthArrayQRO,
-                          const int *seqLengthArrayKV,
-                          const cudnnSeqDataDescriptor_t qDesc,
-                          const void *queries,
-                          const void *residuals,
-                          const cudnnSeqDataDescriptor_t kDesc,
-                          const void *keys,
-                          const cudnnSeqDataDescriptor_t vDesc,
-                          const void *values,
-                          const cudnnSeqDataDescriptor_t oDesc,
-                          void *out,
-                          size_t weightSizeInBytes,
-                          const void *w,
-                          size_t workSpaceSizeInBytes,
-                          void *workSpace,
-                          size_t reserveSpaceSizeInBytes,
-                          void *reserveSpace) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnAttnDescriptor_t, int, const int *, const int *, const int *, const int *, const cudnnSeqDataDescriptor_t, const void *, const void *, const cudnnSeqDataDescriptor_t, const void *, const cudnnSeqDataDescriptor_t, const void *, const cudnnSeqDataDescriptor_t, void *, size_t, const void *, size_t, void *, size_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnForward(
+    cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx,
+    const int *loWinIdx, const int *hiWinIdx, const int *seqLengthArrayQRO,
+    const int *seqLengthArrayKV, const cudnnSeqDataDescriptor_t qDesc,
+    const void *queries, const void *residuals,
+    const cudnnSeqDataDescriptor_t kDesc, const void *keys,
+    const cudnnSeqDataDescriptor_t vDesc, const void *values,
+    const cudnnSeqDataDescriptor_t oDesc, void *out, size_t weightSizeInBytes,
+    const void *w, size_t workSpaceSizeInBytes, void *workSpace,
+    size_t reserveSpaceSizeInBytes, void *reserveSpace) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnAttnDescriptor_t, int, const int *, const int *,
+      const int *, const int *, const cudnnSeqDataDescriptor_t, const void *,
+      const void *, const cudnnSeqDataDescriptor_t, const void *,
+      const cudnnSeqDataDescriptor_t, const void *,
+      const cudnnSeqDataDescriptor_t, void *, size_t, const void *, size_t,
+      void *, size_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnMultiHeadAttnForward");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, attnDesc, currIdx, loWinIdx, hiWinIdx, seqLengthArrayQRO, seqLengthArrayKV, qDesc, queries, residuals, kDesc, keys, vDesc, values, oDesc, out, weightSizeInBytes, w, workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, reserveSpace);
+  return func_ptr(handle, attnDesc, currIdx, loWinIdx, hiWinIdx,
+                  seqLengthArrayQRO, seqLengthArrayKV, qDesc, queries,
+                  residuals, kDesc, keys, vDesc, values, oDesc, out,
+                  weightSizeInBytes, w, workSpaceSizeInBytes, workSpace,
+                  reserveSpaceSizeInBytes, reserveSpace);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnMultiHeadAttnBackwardData(cudnnHandle_t handle,
-                               const cudnnAttnDescriptor_t attnDesc,
-                               const int *loWinIdx,
-                               const int *hiWinIdx,
-                               const int *seqLengthArrayDQDO,
-                               const int *seqLengthArrayDKDV,
-                               const cudnnSeqDataDescriptor_t doDesc,
-                               const void *dout,
-                               const cudnnSeqDataDescriptor_t dqDesc,
-                               void *dqueries,
-                               const void *queries,
-                               const cudnnSeqDataDescriptor_t dkDesc,
-                               void *dkeys,
-                               const void *keys,
-                               const cudnnSeqDataDescriptor_t dvDesc,
-                               void *dvalues,
-                               const void *values,
-                               size_t weightSizeInBytes,
-                               const void *w,
-                               size_t workSpaceSizeInBytes,
-                               void *workSpace,
-                               size_t reserveSpaceSizeInBytes,
-                               void *reserveSpace) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnAttnDescriptor_t, const int *, const int *, const int *, const int *, const cudnnSeqDataDescriptor_t, const void *, const cudnnSeqDataDescriptor_t, void *, const void *, const cudnnSeqDataDescriptor_t, void *, const void *, const cudnnSeqDataDescriptor_t, void *, const void *, size_t, const void *, size_t, void *, size_t, void *);
+cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardData(
+    cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc,
+    const int *loWinIdx, const int *hiWinIdx, const int *seqLengthArrayDQDO,
+    const int *seqLengthArrayDKDV, const cudnnSeqDataDescriptor_t doDesc,
+    const void *dout, const cudnnSeqDataDescriptor_t dqDesc, void *dqueries,
+    const void *queries, const cudnnSeqDataDescriptor_t dkDesc, void *dkeys,
+    const void *keys, const cudnnSeqDataDescriptor_t dvDesc, void *dvalues,
+    const void *values, size_t weightSizeInBytes, const void *w,
+    size_t workSpaceSizeInBytes, void *workSpace,
+    size_t reserveSpaceSizeInBytes, void *reserveSpace) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnAttnDescriptor_t, const int *, const int *,
+      const int *, const int *, const cudnnSeqDataDescriptor_t, const void *,
+      const cudnnSeqDataDescriptor_t, void *, const void *,
+      const cudnnSeqDataDescriptor_t, void *, const void *,
+      const cudnnSeqDataDescriptor_t, void *, const void *, size_t,
+      const void *, size_t, void *, size_t, void *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnMultiHeadAttnBackwardData");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, attnDesc, loWinIdx, hiWinIdx, seqLengthArrayDQDO, seqLengthArrayDKDV, doDesc, dout, dqDesc, dqueries, queries, dkDesc, dkeys, keys, dvDesc, dvalues, values, weightSizeInBytes, w, workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, reserveSpace);
+  return func_ptr(handle, attnDesc, loWinIdx, hiWinIdx, seqLengthArrayDQDO,
+                  seqLengthArrayDKDV, doDesc, dout, dqDesc, dqueries, queries,
+                  dkDesc, dkeys, keys, dvDesc, dvalues, values,
+                  weightSizeInBytes, w, workSpaceSizeInBytes, workSpace,
+                  reserveSpaceSizeInBytes, reserveSpace);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle,
-                                  const cudnnAttnDescriptor_t attnDesc,
-                                  cudnnWgradMode_t addGrad,
-                                  const cudnnSeqDataDescriptor_t qDesc,
-                                  const void *queries,
-                                  const cudnnSeqDataDescriptor_t kDesc,
-                                  const void *keys,
-                                  const cudnnSeqDataDescriptor_t vDesc,
-                                  const void *values,
-                                  const cudnnSeqDataDescriptor_t doDesc,
-                                  const void *dout,
-                                  size_t weightSizeInBytes,
-                                  const void *w,
-                                  void *dw,
-                                  size_t workSpaceSizeInBytes,
-                                  void *workSpace,
-                                  size_t reserveSpaceSizeInBytes,
-                                  void *reserveSpace) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnWgradMode_t, const cudnnSeqDataDescriptor_t, const void *, const cudnnSeqDataDescriptor_t, const void *, const cudnnSeqDataDescriptor_t, const void *, const cudnnSeqDataDescriptor_t, const void *, size_t, const void *, void *, size_t, void *, size_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnMultiHeadAttnBackwardWeights");
+cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardWeights(
+    cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc,
+    cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc,
+    const void *queries, const cudnnSeqDataDescriptor_t kDesc, const void *keys,
+    const cudnnSeqDataDescriptor_t vDesc, const void *values,
+    const cudnnSeqDataDescriptor_t doDesc, const void *dout,
+    size_t weightSizeInBytes, const void *w, void *dw,
+    size_t workSpaceSizeInBytes, void *workSpace,
+    size_t reserveSpaceSizeInBytes, void *reserveSpace) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnWgradMode_t,
+      const cudnnSeqDataDescriptor_t, const void *,
+      const cudnnSeqDataDescriptor_t, const void *,
+      const cudnnSeqDataDescriptor_t, const void *,
+      const cudnnSeqDataDescriptor_t, const void *, size_t, const void *,
+      void *, size_t, void *, size_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnMultiHeadAttnBackwardWeights");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, attnDesc, addGrad, qDesc, queries, kDesc, keys, vDesc, values, doDesc, dout, weightSizeInBytes, w, dw, workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, reserveSpace);
+  return func_ptr(handle, attnDesc, addGrad, qDesc, queries, kDesc, keys, vDesc,
+                  values, doDesc, dout, weightSizeInBytes, w, dw,
+                  workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes,
+                  reserveSpace);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
-                            cudnnDataType_t compType,
-                            cudnnLossNormalizationMode_t normMode,
-                            cudnnNanPropagation_t gradMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, cudnnNanPropagation_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptorEx(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType,
+    cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t,
+      cudnnNanPropagation_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCTCLossDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType, normMode, gradMode);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
-                            cudnnDataType_t *compType,
-                            cudnnLossNormalizationMode_t *normMode,
-                            cudnnNanPropagation_t *gradMode) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *, cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptorEx(
+    cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType,
+    cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnCTCLossDescriptor_t, cudnnDataType_t *,
+      cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossDescriptorEx");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc, compType, normMode, gradMode);
@@ -2844,82 +2900,102 @@ cudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyCTCLossDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(ctcLossDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCTCLoss(
+cudnnStatus_t CUDNNWINAPI cudnnCTCLoss(
     cudnnHandle_t handle,
     const cudnnTensorDescriptor_t
-        probsDesc,     /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
-                          mini batch size, A is the alphabet size)  */
-    const void *probs, /* probabilities after softmax, in GPU memory */
-    const int *labels, /* labels, in CPU memory */
-    const int *labelLengths,                     /* the length of each label, in CPU memory */
-    const int *inputLengths,                     /* the lengths of timing steps in each batch, in CPU memory */
-    void *costs,                                 /* the returned costs of CTC, in GPU memory */
-    const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
-    const void *gradients,   /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the timing steps, N is the
+                      mini batch size, A is the alphabet size)  */
+    const void *probs,       /* probabilities after softmax, in GPU memory */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    void *costs,             /* the returned costs of CTC, in GPU memory */
+    const cudnnTensorDescriptor_t
+        gradientsDesc, /* Tensor descriptor for gradients, the dimensions are
+                          T,N,A */
+    const void *gradients,   /* the returned CTC gradients, in GPU memory, to
+                                compute costs only, set it to NULL */
     cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
     cudnnCTCLossDescriptor_t ctcLossDesc,
-    void *workspace,              /* pointer to the workspace, in GPU memory */
+    void *workspace, /* pointer to the workspace, in GPU memory */
     size_t workSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *, const int *, const int *, void *, const cudnnTensorDescriptor_t, const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int *,
+      const int *, const int *, void *, const cudnnTensorDescriptor_t,
+      const void *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *,
+      size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCTCLoss");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths, costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace, workSpaceSizeInBytes);
+  return func_ptr(handle, probsDesc, probs, labels, labelLengths, inputLengths,
+                  costs, gradientsDesc, gradients, algo, ctcLossDesc, workspace,
+                  workSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCTCLossWorkspaceSize(
+cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize(
     cudnnHandle_t handle,
-    const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
-                                                timing steps, N is the mini batch size, A is the alphabet size) */
-    const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
-                                                    dimensions are T,N,A. To compute costs
-                                                    only, set it to NULL */
-    const int *labels,                           /* labels, in CPU memory */
-    const int *labelLengths,                     /* the length of each label, in CPU memory */
-    const int *inputLengths,                     /* the lengths of timing steps in each batch, in CPU memory */
-    cudnnCTCLossAlgo_t algo,                     /* algorithm selected, supported now 0 and 1 */
-    cudnnCTCLossDescriptor_t ctcLossDesc,
-    size_t *sizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, const int *, const int *, const int *, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
+    const cudnnTensorDescriptor_t
+        probsDesc, /* Tensor descriptor for probabilities, the dimensions are
+                      T,N,A (T is the
+                      timing steps, N is the mini batch size, A is the alphabet
+                      size) */
+    const cudnnTensorDescriptor_t
+        gradientsDesc,       /* Tensor descriptor for gradients, the
+                                dimensions are T,N,A. To compute costs
+                                only, set it to NULL */
+    const int *labels,       /* labels, in CPU memory */
+    const int *labelLengths, /* the length of each label, in CPU memory */
+    const int *inputLengths, /* the lengths of timing steps in each batch, in
+                                CPU memory */
+    cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
+    cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, const cudnnTensorDescriptor_t,
+      const cudnnTensorDescriptor_t, const int *, const int *, const int *,
+      cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCTCLossWorkspaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, inputLengths, algo, ctcLossDesc, sizeInBytes);
+  return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths,
+                  inputLengths, algo, ctcLossDesc, sizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmDescriptor(
+    cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithm_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmDescriptor(
+    const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithm_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc, algorithm);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnCopyAlgorithmDescriptor(
+    const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnAlgorithmDescriptor_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCopyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(src, dest);
@@ -2927,236 +3003,255 @@ cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorith
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmDescriptor");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToCreate);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
-                             cudnnAlgorithmDescriptor_t algoDesc,
-                             cudnnStatus_t status,
-                             float time,
-                             size_t memory) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t, cudnnStatus_t, float, size_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc,
+    cudnnStatus_t status, float time, size_t memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t,
+                                               cudnnAlgorithmDescriptor_t,
+                                               cudnnStatus_t, float, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
-                             cudnnAlgorithmDescriptor_t *algoDesc,
-                             cudnnStatus_t *status,
-                             float *time,
-                             size_t *memory) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *, cudnnStatus_t *, float *, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmPerformance(
+    const cudnnAlgorithmPerformance_t algoPerf,
+    cudnnAlgorithmDescriptor_t *algoDesc, cudnnStatus_t *status, float *time,
+    size_t *memory) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnAlgorithmPerformance_t, cudnnAlgorithmDescriptor_t *,
+      cudnnStatus_t *, float *, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, algoDesc, status, time, memory);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
+cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance(
+    cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyAlgorithmPerformance");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(algoPerf, numberToDestroy);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetAlgorithmSpaceSize(
+    cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+    size_t *algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetAlgorithmSpaceSize");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnSaveAlgorithm(cudnnHandle_t handle,
-                   cudnnAlgorithmDescriptor_t algoDesc,
-                   void *algoSpace,
-                   size_t algoSpaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
+cudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc,
+                   void *algoSpace, size_t algoSpaceSizeInBytes) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnAlgorithmDescriptor_t, void *, size_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSaveAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoDesc, algoSpace, algoSpaceSizeInBytes);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnRestoreAlgorithm(cudnnHandle_t handle,
-                      void *algoSpace,
-                      size_t algoSpaceSizeInBytes,
-                      cudnnAlgorithmDescriptor_t algoDesc) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, void *, size_t, cudnnAlgorithmDescriptor_t);
+cudnnStatus_t CUDNNWINAPI cudnnRestoreAlgorithm(
+    cudnnHandle_t handle, void *algoSpace, size_t algoSpaceSizeInBytes,
+    cudnnAlgorithmDescriptor_t algoDesc) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, void *, size_t,
+                                               cudnnAlgorithmDescriptor_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnRestoreAlgorithm");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, algoSpace, algoSpaceSizeInBytes, algoDesc);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata,
+                                           cudnnCallback_t fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
+cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata,
+                                           cudnnCallback_t *fptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetCallback");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(mask, udata, fptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t *, cudnnFusedOps_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFusedOpsConstParamPack");
+cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsConstParamPack(
+    cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t *,
+                                               cudnnFusedOps_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateFusedOpsConstParamPack");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(constPack, ops);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFusedOpsConstParamPack");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyFusedOpsConstParamPack");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(constPack);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFusedOpsConstParamPackAttribute(cudnnFusedOpsConstParamPack_t constPack,
-                                        cudnnFusedOpsConstParamLabel_t paramLabel,
-                                        const void *param) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t, const void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFusedOpsConstParamPackAttribute");
+cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsConstParamPackAttribute(
+    cudnnFusedOpsConstParamPack_t constPack,
+    cudnnFusedOpsConstParamLabel_t paramLabel, const void *param) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t,
+                                               cudnnFusedOpsConstParamLabel_t,
+                                               const void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetFusedOpsConstParamPackAttribute");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(constPack, paramLabel, param);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFusedOpsConstParamPackAttribute(const cudnnFusedOpsConstParamPack_t constPack,
-                                        cudnnFusedOpsConstParamLabel_t paramLabel,
-                                        void *param,
-                                        int *isNULL) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t, void *, int *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFusedOpsConstParamPackAttribute");
+cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsConstParamPackAttribute(
+    const cudnnFusedOpsConstParamPack_t constPack,
+    cudnnFusedOpsConstParamLabel_t paramLabel, void *param, int *isNULL) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      const cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t,
+      void *, int *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetFusedOpsConstParamPackAttribute");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(constPack, paramLabel, param, isNULL);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t *, cudnnFusedOps_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFusedOpsVariantParamPack");
+cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsVariantParamPack(
+    cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnFusedOpsVariantParamPack_t *, cudnnFusedOps_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnCreateFusedOpsVariantParamPack");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(varPack, ops);
 }
 
 cudnnStatus_t CUDNNWINAPI
 cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFusedOpsVariantParamPack");
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnDestroyFusedOpsVariantParamPack");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(varPack);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetFusedOpsVariantParamPackAttribute(cudnnFusedOpsVariantParamPack_t varPack,
-                                          cudnnFusedOpsVariantParamLabel_t paramLabel,
-                                          void *ptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t, cudnnFusedOpsVariantParamLabel_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetFusedOpsVariantParamPackAttribute");
+cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsVariantParamPackAttribute(
+    cudnnFusedOpsVariantParamPack_t varPack,
+    cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t,
+                                   cudnnFusedOpsVariantParamLabel_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnSetFusedOpsVariantParamPackAttribute");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(varPack, paramLabel, ptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnGetFusedOpsVariantParamPackAttribute(const cudnnFusedOpsVariantParamPack_t varPack,
-                                          cudnnFusedOpsVariantParamLabel_t paramLabel,
-                                          void *ptr) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(const cudnnFusedOpsVariantParamPack_t, cudnnFusedOpsVariantParamLabel_t, void *);
-  static auto func_ptr = LoadSymbol<FuncPtr>("cudnnGetFusedOpsVariantParamPackAttribute");
+cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsVariantParamPackAttribute(
+    const cudnnFusedOpsVariantParamPack_t varPack,
+    cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(const cudnnFusedOpsVariantParamPack_t,
+                                   cudnnFusedOpsVariantParamLabel_t, void *);
+  static auto func_ptr =
+      LoadSymbol<FuncPtr>("cudnnGetFusedOpsVariantParamPackAttribute");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(varPack, paramLabel, ptr);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, cudnnFusedOps_t ops) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsPlan_t *, cudnnFusedOps_t);
+cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan,
+                                                  cudnnFusedOps_t ops) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t *, cudnnFusedOps_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnCreateFusedOpsPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan, ops);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnFusedOpsPlan_t);
+cudnnStatus_t CUDNNWINAPI cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnDestroyFusedOpsPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(plan);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnMakeFusedOpsPlan(cudnnHandle_t handle,
-                      cudnnFusedOpsPlan_t plan,
+cudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan,
                       const cudnnFusedOpsConstParamPack_t constPack,
                       size_t *workspaceSizeInBytes) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnFusedOpsPlan_t, const cudnnFusedOpsConstParamPack_t, size_t *);
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnFusedOpsPlan_t, const cudnnFusedOpsConstParamPack_t,
+      size_t *);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnMakeFusedOpsPlan");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, plan, constPack, workspaceSizeInBytes);
 }
 
 cudnnStatus_t CUDNNWINAPI
-cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, cudnnFusedOpsVariantParamPack_t varPack) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, const cudnnFusedOpsPlan_t, cudnnFusedOpsVariantParamPack_t);
+cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan,
+                     cudnnFusedOpsVariantParamPack_t varPack) {
+  using FuncPtr =
+      cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnFusedOpsPlan_t,
+                                   cudnnFusedOpsVariantParamPack_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnFusedOpsExecute");
   if (!func_ptr) return GetSymbolNotFoundError();
   return func_ptr(handle, plan, varPack);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor_v6(cudnnHandle_t handle,
-                         cudnnRNNDescriptor_t rnnDesc,
-                         const int hiddenSize,
-                         const int numLayers,
-                         cudnnDropoutDescriptor_t dropoutDesc,
-                         cudnnRNNInputMode_t inputMode,
-                         cudnnDirectionMode_t direction,
-                         cudnnRNNMode_t mode,
-                         cudnnRNNAlgo_t algo,
-                         cudnnDataType_t mathPrec) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6(
+    cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize,
+    const int numLayers, cudnnDropoutDescriptor_t dropoutDesc,
+    cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction,
+    cudnnRNNMode_t mode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int,
+      cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t,
+      cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v6");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, algo, mathPrec);
+  return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc,
+                  inputMode, direction, mode, algo, mathPrec);
 }
 
-cudnnStatus_t CUDNNWINAPI
-cudnnSetRNNDescriptor_v5(cudnnRNNDescriptor_t rnnDesc,
-                         int hiddenSize,
-                         int numLayers,
-                         cudnnDropoutDescriptor_t dropoutDesc,
-                         cudnnRNNInputMode_t inputMode,
-                         cudnnDirectionMode_t direction,
-                         cudnnRNNMode_t mode,
-                         cudnnDataType_t mathPrec) {
-  using FuncPtr = cudnnStatus_t (CUDNNWINAPI *)(cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t, cudnnDataType_t);
+cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v5(
+    cudnnRNNDescriptor_t rnnDesc, int hiddenSize, int numLayers,
+    cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode,
+    cudnnDirectionMode_t direction, cudnnRNNMode_t mode,
+    cudnnDataType_t mathPrec) {
+  using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(
+      cudnnRNNDescriptor_t, int, int, cudnnDropoutDescriptor_t,
+      cudnnRNNInputMode_t, cudnnDirectionMode_t, cudnnRNNMode_t,
+      cudnnDataType_t);
   static auto func_ptr = LoadSymbol<FuncPtr>("cudnnSetRNNDescriptor_v5");
   if (!func_ptr) return GetSymbolNotFoundError();
-  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode, direction, mode, mathPrec);
+  return func_ptr(rnnDesc, hiddenSize, numLayers, dropoutDesc, inputMode,
+                  direction, mode, mathPrec);
 }
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cusparse_10_1.inc b/tensorflow/stream_executor/cuda/cusparse_10_1.inc
index 3b7f3815829..e94aa081b8c 100644
--- a/tensorflow/stream_executor/cuda/cusparse_10_1.inc
+++ b/tensorflow/stream_executor/cuda/cusparse_10_1.inc
@@ -8225,6 +8225,6 @@ cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM_bufferSize(
                   bufferSize);
 }
 
-#endif // _WIN32
+#endif  // _WIN32
 
 }  // extern "C"
diff --git a/tensorflow/stream_executor/cuda/cusparse_10_2.inc b/tensorflow/stream_executor/cuda/cusparse_10_2.inc
index 3b7f3815829..e94aa081b8c 100644
--- a/tensorflow/stream_executor/cuda/cusparse_10_2.inc
+++ b/tensorflow/stream_executor/cuda/cusparse_10_2.inc
@@ -8225,6 +8225,6 @@ cusparseStatus_t CUSPARSEAPI cusparseConstrainedGeMM_bufferSize(
                   bufferSize);
 }
 
-#endif // _WIN32
+#endif  // _WIN32
 
 }  // extern "C"