From 18d3a8c2e8b4365d458862e4e315123e7994816f Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 25 Apr 2019 14:11:04 -0700 Subject: [PATCH] Add cuda runtime 9.0 API to dlopen wrapper. PiperOrigin-RevId: 245304515 --- tensorflow/stream_executor/cuda/BUILD | 2 +- .../stream_executor/cuda/cuda_runtime_9_0.inc | 1410 +++++++++++++++++ .../stream_executor/cuda/cudart_stub.cc | 6 + 3 files changed, 1417 insertions(+), 1 deletion(-) create mode 100644 tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD index 53521fb93ca..d5b77081ff6 100644 --- a/tensorflow/stream_executor/cuda/BUILD +++ b/tensorflow/stream_executor/cuda/BUILD @@ -118,7 +118,7 @@ cc_library( "//tensorflow:using_cuda_clang_with_dynamic_build": ["cudart_stub.cc"], "//conditions:default": [], }), - textual_hdrs = ["cuda_runtime_10_0.inc"], + textual_hdrs = glob(["cuda_runtime_*.inc"]), visibility = ["//visibility:public"], deps = select({ "//tensorflow:using_cuda_nvcc_with_dynamic_build": [ diff --git a/tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc b/tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc new file mode 100644 index 00000000000..3acb28626ea --- /dev/null +++ b/tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc @@ -0,0 +1,1410 @@ +// Auto-generated, do not edit. + +extern "C" { + +extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaDeviceReset"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceSynchronize(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, + size_t value) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); + static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(limit, value); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); + static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pValue, limit); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); + static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCacheConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); + static auto func_ptr = + LoadSymbol("cudaDeviceGetStreamPriorityRange"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(leastPriority, greatestPriority); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); + static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(cacheConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); + static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pConfig); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); + static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); + static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, pciBusId); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, + int len, + int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pciBusId, len, device); +} + +extern __host__ cudaError_t CUDARTAPI +cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, event); +} + +extern __host__ cudaError_t CUDARTAPI +cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); + static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, handle); +} + +extern __host__ cudaError_t CUDARTAPI +cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); + static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, devPtr); +} + +extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( + void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, handle, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr); +} + +extern __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaThreadExit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ cudaError_t CUDARTAPI cudaThreadSetLimit(enum cudaLimit limit, + size_t value) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); + static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(limit, value); +} + +extern __host__ cudaError_t CUDARTAPI cudaThreadGetLimit(size_t *pValue, + enum cudaLimit limit) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); + static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pValue, limit); +} + +extern __host__ cudaError_t CUDARTAPI +cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); + static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCacheConfig); +} + +extern __host__ cudaError_t CUDARTAPI +cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); + static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(cacheConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetLastError(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaGetLastError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaPeekAtLastError(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ const char *CUDARTAPI +cudaGetErrorName(cudaError_t error) { + using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); + static auto func_ptr = LoadSymbol("cudaGetErrorName"); + if (!func_ptr) return "cudaGetErrorName symbol not found."; + return func_ptr(error); +} + +extern __host__ __cudart_builtin__ const char *CUDARTAPI +cudaGetErrorString(cudaError_t error) { + using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); + static auto func_ptr = LoadSymbol("cudaGetErrorString"); + if (!func_ptr) return "cudaGetErrorString symbol not found."; + return func_ptr(error); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetDeviceCount(int *count) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); + static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(prop, device); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(value, attr, device); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, + int srcDevice, int dstDevice) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(value, attr, srcDevice, dstDevice); +} + +extern __host__ cudaError_t CUDARTAPI +cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); + static auto func_ptr = LoadSymbol("cudaChooseDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, prop); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int); + static auto func_ptr = LoadSymbol("cudaSetDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetDevice(int *device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaGetDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, + int len) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); + static auto func_ptr = LoadSymbol("cudaSetValidDevices"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device_arr, len); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); + static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); + static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); + static auto func_ptr = LoadSymbol("cudaStreamCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pStream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pStream, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, + int priority) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); + static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pStream, flags, priority); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamGetPriority(cudaStream_t hStream, int *priority) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); + static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, priority); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); + static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamDestroy(cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( + cudaStream_t stream, cudaEvent_t event, unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, event, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, + void *userData, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, + void *, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, callback, userData, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaStreamSynchronize(cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamQuery"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, + size_t length __dv(0), + unsigned int flags __dv(cudaMemAttachSingle)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, devPtr, length, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); + static auto func_ptr = LoadSymbol("cudaEventCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); + static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaEventRecord"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventQuery"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventDestroy(cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, + cudaEvent_t start, + cudaEvent_t end) { + using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ms, start, end); +} + +extern __host__ cudaError_t CUDARTAPI +cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, + size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaLaunchKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( + const void *func, dim3 gridDim, dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, + size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernelMultiDevice( + struct cudaLaunchParams *launchParamsList, unsigned int numDevices, + unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, + unsigned int, unsigned int); + static auto func_ptr = + LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(launchParamsList, numDevices, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); + static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, cacheConfig); +} + +extern __host__ cudaError_t CUDARTAPI +cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); + static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, config); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); + static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attr, func); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); + static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, attr, value); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetDoubleForDevice(double *d) { + using FuncPtr = cudaError_t(CUDARTAPI *)(double *); + static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(d); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetDoubleForHost(double *d) { + using FuncPtr = cudaError_t(CUDARTAPI *)(double *); + static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(d); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, + int blockSize, + size_t dynamicSMemSize) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); + static auto func_ptr = + LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, + const void *func, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); + static auto func_ptr = LoadSymbol( + "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem __dv(0), + cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(dim3, dim3, size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaConfigureCall"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(gridDim, blockDim, sharedMem, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetupArgument(const void *arg, + size_t size, + size_t offset) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, size_t); + static auto func_ptr = LoadSymbol("cudaSetupArgument"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(arg, size, offset); +} + +extern __host__ cudaError_t CUDARTAPI cudaLaunch(const void *func) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *); + static auto func_ptr = LoadSymbol("cudaLaunch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMallocManaged( + void **devPtr, size_t size, unsigned int flags __dv(cudaMemAttachGlobal)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaMallocManaged"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMalloc(void **devPtr, size_t size) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); + static auto func_ptr = LoadSymbol("cudaMalloc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); + static auto func_ptr = LoadSymbol("cudaMallocHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, + size_t *pitch, + size_t width, + size_t height) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); + static auto func_ptr = LoadSymbol("cudaMallocPitch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, pitch, width, height); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocArray( + cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, + size_t height __dv(0), unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, + const struct cudaChannelFormatDesc *, + size_t, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaMallocArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array, desc, width, height, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaFree(void *devPtr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaFree"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr); +} + +extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaFreeHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr); +} + +extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); + static auto func_ptr = LoadSymbol("cudaFreeArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array); +} + +extern __host__ cudaError_t CUDARTAPI +cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); + static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmappedArray); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, + unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaHostAlloc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHost, size, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, + unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaHostRegister"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaHostUnregister"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr); +} + +extern __host__ cudaError_t CUDARTAPI +cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); + static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pDevice, pHost, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, + void *pHost) { + using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); + static auto func_ptr = LoadSymbol("cudaHostGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pFlags, pHost); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); + static auto func_ptr = LoadSymbol("cudaMalloc3D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pitchedDevPtr, extent); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, + struct cudaExtent extent, unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, + const struct cudaChannelFormatDesc *, + struct cudaExtent, unsigned int); + static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array, desc, extent, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( + cudaMipmappedArray_t *mipmappedArray, + const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, + unsigned int numLevels, unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, + struct cudaExtent, unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmappedArray, desc, extent, numLevels, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( + cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, + unsigned int level) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(levelArray, mipmappedArray, level); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); + static auto func_ptr = LoadSymbol("cudaMemcpy3D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); + static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( + const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( + const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, + cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, + size_t *total) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaMemGetInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(free, total); +} + +extern __host__ cudaError_t CUDARTAPI +cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, + unsigned int *flags, cudaArray_t array) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, + struct cudaExtent *, unsigned int *, + cudaArray_t); + static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(desc, extent, flags, array); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, + size_t count, + enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, + enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, + const void *src, + int srcDevice, + size_t count) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); + static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dstDevice, src, srcDevice, count); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, + const void *src, size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, count, kind); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, wOffset, hOffset, count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( + cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, + enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, + size_t, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, + count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, + const void *src, + size_t spitch, size_t width, + size_t height, + enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, spitch, width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( + cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, + size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, + size_t, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( + void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, + size_t, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( + cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, + size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, + cudaArray_const_t, size_t, size_t, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, + width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( + const void *symbol, const void *src, size_t count, size_t offset __dv(0), + enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, + size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(symbol, src, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( + void *dst, const void *symbol, size_t count, size_t offset __dv(0), + enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, + enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, symbol, count, offset, kind); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMemcpyAsync(void *dst, const void *src, size_t count, + enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, count, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, + size_t count, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, + size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dstDevice, src, srcDevice, count, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( + cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, + size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, + size_t, enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromArrayAsync( + void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, + size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, + size_t, enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( + void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, + size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, + size_t, enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( + cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, + size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, + cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, + const void *, size_t, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, + stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( + void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, + cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, + size_t, size_t, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, + stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( + const void *symbol, const void *src, size_t count, size_t offset, + enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(symbol, src, count, offset, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( + void *dst, const void *symbol, size_t count, size_t offset, + enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, symbol, count, offset, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, + size_t count) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); + static auto func_ptr = LoadSymbol("cudaMemset"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, value, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, + int value, size_t width, + size_t height) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); + static auto func_ptr = LoadSymbol("cudaMemset2D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, pitch, value, width, height); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemset3D( + struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); + static auto func_ptr = LoadSymbol("cudaMemset3D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pitchedDevPtr, value, extent); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( + void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemsetAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, value, count, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, + size_t height, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, + cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, pitch, value, width, height, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, + struct cudaExtent extent, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, + struct cudaExtent, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pitchedDevPtr, value, extent, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, + const void *symbol) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); + static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, symbol); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, + const void *symbol) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); + static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(size, symbol); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, + cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, count, dstDevice, stream); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, + int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, + enum cudaMemoryAdvise, int); + static auto func_ptr = LoadSymbol("cudaMemAdvise"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, count, advice, device); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( + void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, + const void *devPtr, size_t count) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); + static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, dataSize, attribute, devPtr, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( + void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, + size_t numAttributes, const void *devPtr, size_t count) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, + size_t, const void *, size_t); + static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( + struct cudaPointerAttributes *attributes, const void *ptr) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); + static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attributes, ptr); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); + static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(canAccessPeer, device, peerDevice); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); + static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(peerDevice, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceDisablePeerAccess(int peerDevice) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int); + static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(peerDevice); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); + static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(resource); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( + cudaGraphicsResource_t resource, unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(resource, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( + int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count, resources, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( + int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count, resources, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( + void **devPtr, size_t *size, cudaGraphicsResource_t resource) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); + static auto func_ptr = + LoadSymbol("cudaGraphicsResourceGetMappedPointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size, resource); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( + cudaArray_t *array, cudaGraphicsResource_t resource, + unsigned int arrayIndex, unsigned int mipLevel) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); + static auto func_ptr = + LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array, resource, arrayIndex, mipLevel); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphicsResourceGetMappedMipmappedArray( + cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); + static auto func_ptr = + LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmappedArray, resource); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( + struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, + cudaArray_const_t); + static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(desc, array); +} + +extern __host__ cudaError_t CUDARTAPI cudaBindTexture( + size_t *offset, const struct textureReference *texref, const void *devPtr, + const struct cudaChannelFormatDesc *desc, size_t size __dv(UINT_MAX)) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + size_t *, const struct textureReference *, const void *, + const struct cudaChannelFormatDesc *, size_t); + static auto func_ptr = LoadSymbol("cudaBindTexture"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(offset, texref, devPtr, desc, size); +} + +extern __host__ cudaError_t CUDARTAPI +cudaBindTexture2D(size_t *offset, const struct textureReference *texref, + const void *devPtr, const struct cudaChannelFormatDesc *desc, + size_t width, size_t height, size_t pitch) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + size_t *, const struct textureReference *, const void *, + const struct cudaChannelFormatDesc *, size_t, size_t, size_t); + static auto func_ptr = LoadSymbol("cudaBindTexture2D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(offset, texref, devPtr, desc, width, height, pitch); +} + +extern __host__ cudaError_t CUDARTAPI cudaBindTextureToArray( + const struct textureReference *texref, cudaArray_const_t array, + const struct cudaChannelFormatDesc *desc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + const struct textureReference *, cudaArray_const_t, + const struct cudaChannelFormatDesc *); + static auto func_ptr = LoadSymbol("cudaBindTextureToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texref, array, desc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaBindTextureToMipmappedArray(const struct textureReference *texref, + cudaMipmappedArray_const_t mipmappedArray, + const struct cudaChannelFormatDesc *desc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + const struct textureReference *, cudaMipmappedArray_const_t, + const struct cudaChannelFormatDesc *); + static auto func_ptr = LoadSymbol("cudaBindTextureToMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texref, mipmappedArray, desc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaUnbindTexture(const struct textureReference *texref) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const struct textureReference *); + static auto func_ptr = LoadSymbol("cudaUnbindTexture"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texref); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureAlignmentOffset( + size_t *offset, const struct textureReference *texref) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(size_t *, const struct textureReference *); + static auto func_ptr = LoadSymbol("cudaGetTextureAlignmentOffset"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(offset, texref); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureReference( + const struct textureReference **texref, const void *symbol) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const struct textureReference **, const void *); + static auto func_ptr = LoadSymbol("cudaGetTextureReference"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texref, symbol); +} + +extern __host__ cudaError_t CUDARTAPI cudaBindSurfaceToArray( + const struct surfaceReference *surfref, cudaArray_const_t array, + const struct cudaChannelFormatDesc *desc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + const struct surfaceReference *, cudaArray_const_t, + const struct cudaChannelFormatDesc *); + static auto func_ptr = LoadSymbol("cudaBindSurfaceToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(surfref, array, desc); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceReference( + const struct surfaceReference **surfref, const void *symbol) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const struct surfaceReference **, const void *); + static auto func_ptr = LoadSymbol("cudaGetSurfaceReference"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(surfref, symbol); +} + +extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( + cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, + const struct cudaTextureDesc *pTexDesc, + const struct cudaResourceViewDesc *pResViewDesc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaTextureObject_t *, const struct cudaResourceDesc *, + const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); + static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDestroyTextureObject(cudaTextureObject_t texObject) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); + static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( + struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); + static auto func_ptr = + LoadSymbol("cudaGetTextureObjectResourceDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResDesc, texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( + struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); + static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexDesc, texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( + struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, + cudaTextureObject_t); + static auto func_ptr = + LoadSymbol("cudaGetTextureObjectResourceViewDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResViewDesc, texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( + cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, + const struct cudaResourceDesc *); + static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pSurfObject, pResDesc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); + static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(surfObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( + struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); + static auto func_ptr = + LoadSymbol("cudaGetSurfaceObjectResourceDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResDesc, surfObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(driverVersion); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaRuntimeGetVersion(int *runtimeVersion) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(runtimeVersion); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( + const void **ppExportTable, const cudaUUID_t *pExportTableId) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); + static auto func_ptr = LoadSymbol("cudaGetExportTable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ppExportTable, pExportTableId); +} + +} // extern "C" diff --git a/tensorflow/stream_executor/cuda/cudart_stub.cc b/tensorflow/stream_executor/cuda/cudart_stub.cc index 4d77b76a3d0..53df4a8e24a 100644 --- a/tensorflow/stream_executor/cuda/cudart_stub.cc +++ b/tensorflow/stream_executor/cuda/cudart_stub.cc @@ -45,7 +45,13 @@ cudaError_t GetSymbolNotFoundError() { #define __dv(v) #define __CUDA_DEPRECATED + +// A bunch of new symbols were introduced in version 10 +#if CUDA_VERSION <= 9020 +#include "tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc" +#else #include "tensorflow/stream_executor/cuda/cuda_runtime_10_0.inc" +#endif #undef __dv #undef __CUDA_DEPRECATED