Regenerated wrapper includes for all CUDA versions & libraries.

PiperOrigin-RevId: 301883437
Change-Id: I60eb5e45b6eec404c0694a95e091b5e17dd02585
This commit is contained in:
Artem Belevich 2020-03-19 13:26:23 -07:00 committed by TensorFlower Gardener
parent 0c53d83001
commit f29c62f405
23 changed files with 28474 additions and 62 deletions

View File

@ -0,0 +1,52 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn_init.h"
#include <fcntl.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include "remote.h" // NOLINT
#include "rpcmem.h" // NOLINT
#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/soc_model.h"
#ifdef __cplusplus
extern "C" {
#endif
// Version 1.14
static const int kHexagonNNVersion = 136193;
#pragma weak remote_handle_control // Declare it as a weak symbol
void hexagon_nn_global_init() {
rpcmem_init();
// Non-domains QoS invocation
struct remote_rpc_control_latency data;
data.enable = 1;
if (remote_handle_control) { // Check if API is available before invoking
remote_handle_control(DSPRPC_CONTROL_LATENCY, (void*)&data, sizeof(data));
}
}
void hexagon_nn_global_teardown() { rpcmem_deinit(); }
bool hexagon_nn_is_device_supported() {
return tflite::delegates::getsoc_model().mode != UNSPECIFIED_MODE;
}
int hexagon_nn_hexagon_interface_version() { return kHexagonNNVersion; }
#ifdef __cplusplus
}
#endif

View File

@ -1,7 +1,7 @@
# Description:
# CUDA-platform specific StreamExecutor support code.
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_cuda_cc_test")
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_copts", "tf_cuda_cc_test")
load(
"//tensorflow/stream_executor:build_defs.bzl",
"stream_executor_friends",
@ -9,7 +9,6 @@ load(
"tf_additional_cuda_platform_deps",
"tf_additional_cudnn_plugin_deps",
)
load("//tensorflow:tensorflow.bzl", "tf_copts")
load(
"//tensorflow/core/platform/default:cuda_build_defs.bzl",
"if_cuda_is_configured",
@ -90,7 +89,7 @@ cc_library(
cc_library(
name = "cuda_stub",
srcs = if_cuda_is_configured(["cuda_stub.cc"]),
textual_hdrs = ["cuda_10_0.inc"],
textual_hdrs = glob(["cuda_*.inc"]),
deps = if_cuda_is_configured([
"@local_config_cuda//cuda:cuda_headers",
"//tensorflow/stream_executor/lib",
@ -271,7 +270,7 @@ cc_library(
cc_library(
name = "cufft_stub",
srcs = if_cuda_is_configured(["cufft_stub.cc"]),
textual_hdrs = ["cufft_10_0.inc"],
textual_hdrs = glob(["cufft_*.inc"]),
deps = if_cuda_is_configured([
"@local_config_cuda//cuda:cuda_headers",
"//tensorflow/stream_executor/lib",
@ -426,7 +425,7 @@ cc_library(
cc_library(
name = "cusolver_stub",
srcs = if_cuda_is_configured(["cusolver_stub.cc"]),
textual_hdrs = ["cusolver_dense_10_0.inc"],
textual_hdrs = glob(["cusolver_dense_*.inc"]),
deps = if_cuda_is_configured([
# LINT.IfChange
"@local_config_cuda//cuda:cublas_headers",

File diff suppressed because it is too large Load Diff

View File

@ -57,11 +57,16 @@ cublasStatus_t GetSymbolNotFoundError() { return CUBLAS_STATUS_INTERNAL_ERROR; }
typedef enum {} cublasMath_t;
#endif
// Parameter constness changed in cuBLAS 9.2
#if CUDA_VERSION < 9020
#include "tensorflow/stream_executor/cuda/cublas_9_0.inc"
#elif CUDA_VERSION < 10010
#elif CUDA_VERSION == 10000
#include "tensorflow/stream_executor/cuda/cublas_10_0.inc"
#else
#elif CUDA_VERSION == 10010
#include "tensorflow/stream_executor/cuda/cublas_10_1.inc"
#elif CUDA_VERSION == 10020
#include "tensorflow/stream_executor/cuda/cublas_10_2.inc"
#elif CUDA_VERSION == 11000
#include "tensorflow/stream_executor/cuda/cublas_11_0.inc"
#else
#error "We have no wrapper for this version."
#endif

View File

@ -1,6 +1,7 @@
// Auto-generated, do not edit.
extern "C" {
CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) {
using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGetErrorString");
@ -1024,6 +1025,28 @@ CUresult CUDAAPI cuStreamAddCallback(CUstream hStream,
return func_ptr(hStream, callback, userData, flags);
}
CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream) {
using FuncPtr = CUresult(CUDAAPI *)(CUstream);
static auto func_ptr = LoadSymbol<FuncPtr>("cuStreamBeginCapture");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hStream);
}
CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) {
using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuStreamEndCapture");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hStream, phGraph);
}
CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream,
CUstreamCaptureStatus *captureStatus) {
using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuStreamIsCapturing");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hStream, captureStatus);
}
CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr,
size_t length, unsigned int flags) {
using FuncPtr =
@ -1385,6 +1408,284 @@ __CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc,
return func_ptr(hfunc, texunit, hTexRef);
}
CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphCreate");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraph, flags);
}
CUresult CUDAAPI cuGraphAddKernelNode(
CUgraphNode *phGraphNode, CUgraph hGraph, CUgraphNode *dependencies,
size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
size_t, const CUDA_KERNEL_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddKernelNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
nodeParams);
}
CUresult CUDAAPI cuGraphKernelNodeGetParams(
CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphKernelNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphKernelNodeSetParams(
CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphKernelNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph,
CUgraphNode *dependencies,
size_t numDependencies,
const CUDA_MEMCPY3D *copyParams,
CUcontext ctx) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
size_t, const CUDA_MEMCPY3D *, CUcontext);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddMemcpyNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
copyParams, ctx);
}
CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode,
CUDA_MEMCPY3D *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemcpyNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode,
const CUDA_MEMCPY3D *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemcpyNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphAddMemsetNode(
CUgraphNode *phGraphNode, CUgraph hGraph, CUgraphNode *dependencies,
size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams,
CUcontext ctx) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, size_t,
const CUDA_MEMSET_NODE_PARAMS *, CUcontext);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddMemsetNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
memsetParams, ctx);
}
CUresult CUDAAPI cuGraphMemsetNodeGetParams(
CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemsetNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphMemsetNodeSetParams(
CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemsetNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph,
CUgraphNode *dependencies,
size_t numDependencies,
const CUDA_HOST_NODE_PARAMS *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
size_t, const CUDA_HOST_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddHostNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
nodeParams);
}
CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode,
CUDA_HOST_NODE_PARAMS *nodeParams) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphHostNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphHostNodeSetParams(
CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphHostNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, nodeParams);
}
CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode,
CUgraph hGraph,
CUgraphNode *dependencies,
size_t numDependencies,
CUgraph childGraph) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
size_t, CUgraph);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddChildGraphNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
childGraph);
}
CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode,
CUgraph *phGraph) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphChildGraphNodeGetGraph");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, phGraph);
}
CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph,
CUgraphNode *dependencies,
size_t numDependencies) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddEmptyNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies);
}
CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphClone");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphClone, originalGraph);
}
CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode,
CUgraphNode hOriginalNode,
CUgraph hClonedGraph) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeFindInClone");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phNode, hOriginalNode, hClonedGraph);
}
CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeGetType");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, type);
}
CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes,
size_t *numNodes) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphGetNodes");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraph, nodes, numNodes);
}
CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes,
size_t *numRootNodes) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphGetRootNodes");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraph, rootNodes, numRootNodes);
}
CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from,
CUgraphNode *to, size_t *numEdges) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphGetEdges");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraph, from, to, numEdges);
}
CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode,
CUgraphNode *dependencies,
size_t *numDependencies) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeGetDependencies");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, dependencies, numDependencies);
}
CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode,
CUgraphNode *dependentNodes,
size_t *numDependentNodes) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeGetDependentNodes");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode, dependentNodes, numDependentNodes);
}
CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, CUgraphNode *from,
CUgraphNode *to,
size_t numDependencies) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddDependencies");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraph, from, to, numDependencies);
}
CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, CUgraphNode *from,
CUgraphNode *to,
size_t numDependencies) {
using FuncPtr =
CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphRemoveDependencies");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraph, from, to, numDependencies);
}
CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphDestroyNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hNode);
}
CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph,
CUgraphNode *phErrorNode, char *logBuffer,
size_t bufferSize) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *,
char *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphInstantiate");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize);
}
CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphLaunch");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraphExec, hStream);
}
CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphExecDestroy");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraphExec);
}
CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) {
using FuncPtr = CUresult(CUDAAPI *)(CUgraph);
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphDestroy");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(hGraph);
}
CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor(
int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) {
using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -430,6 +430,14 @@ bool CUDABlas::DoBlasInternalImpl(FuncT cublas_func, Stream *stream,
return ret == CUBLAS_STATUS_SUCCESS;
}
// cublas_func may be overloaded, so we need to figure out which one we really
// need to call based on the args. One way to do it is to wrap it in lambda.
#define AS_LAMBDA(func) \
[](auto &&... args) -> decltype( \
func(std::forward<decltype(args)>(args)...)) { \
return func(std::forward<decltype(args)>(args)...); \
}
bool CUDABlas::DoBlasAsum(Stream *stream, uint64 elem_count,
const DeviceMemory<float> &x, int incx,
DeviceMemory<float> *result) {
@ -1953,8 +1961,9 @@ bool CUDABlas::DoBlasGemmWithAlgorithmImpl(
// essentially reinterpet_cast to __half, which is safe because Eigen::half
// inherits from __half.
bool result = DoBlasInternalFailureOK(
cublasGemmEx, stream, /* pointer_mode_host = */ !alpha.is_pointer(),
CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k,
AS_LAMBDA(cublasGemmEx), stream,
/* pointer_mode_host = */ !alpha.is_pointer(), CUDABlasTranspose(transa),
CUDABlasTranspose(transb), m, n, k,
alpha.is_pointer() ? GpuMemory(alpha.pointer()) : &alpha.value(),
GpuMemory(a), cuda_in_type, lda, GpuMemory(b), cuda_in_type, ldb,
beta.is_pointer() ? GpuMemory(beta.pointer()) : &beta.value(),
@ -2227,7 +2236,7 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal(
reinterpret_cast<void **>(const_cast<CUDA_T **>(GpuMemory(c)));
bool ok;
ok = DoBlasInternalImpl(
cublasGemmBatchedEx, stream, true /* = pointer_mode_host */,
AS_LAMBDA(cublasGemmBatchedEx), stream, true /* = pointer_mode_host */,
true /* = err_on_failure */, use_tensor_ops, CUDABlasTranspose(transa),
CUDABlasTranspose(transb), m, n, k, &alpha, a_void_ptrs, data_type, lda,
b_void_ptrs, data_type, ldb, &beta, c_void_ptrs, data_type, ldc,
@ -2375,12 +2384,12 @@ bool CUDABlas::DoBlasGemmStridedBatched(
cublasGemmAlgo_t algo =
(use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT);
bool ok = DoBlasInternalImpl(
cublasGemmStridedBatchedEx, stream, true /* = pointer_mode_host */,
true /* = err_on_failure */, use_tensor_ops,
CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, &alpha,
GpuMemory(a), CUDA_R_16F, lda, stride_a, GpuMemory(b), CUDA_R_16F,
ldb, stride_b, &beta, GpuMemoryMutable(c), CUDA_R_16F, ldc, stride_c,
batch_count, CUDA_R_32F, algo);
AS_LAMBDA(cublasGemmStridedBatchedEx), stream,
true /* = pointer_mode_host */, true /* = err_on_failure */,
use_tensor_ops, CUDABlasTranspose(transa), CUDABlasTranspose(transb),
m, n, k, &alpha, GpuMemory(a), CUDA_R_16F, lda, stride_a,
GpuMemory(b), CUDA_R_16F, ldb, stride_b, &beta, GpuMemoryMutable(c),
CUDA_R_16F, ldc, stride_c, batch_count, CUDA_R_32F, algo);
if (ok) {
return true;
}

View File

@ -383,6 +383,22 @@ cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr,
return func_ptr(stream, devPtr, length, flags);
}
extern __host__ cudaError_t CUDARTAPI
cudaStreamBeginCapture(cudaStream_t stream) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaStreamBeginCapture");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(stream);
}
extern __host__ cudaError_t CUDARTAPI
cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaStreamEndCapture");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(stream, pGraph);
}
extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing(
cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) {
using FuncPtr =
@ -1508,6 +1524,306 @@ cudaRuntimeGetVersion(int *runtimeVersion) {
return func_ptr(runtimeVersion);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph,
unsigned int flags) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphCreate");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraph, flags);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
cudaGraphNode_t *pDependencies, size_t numDependencies,
const struct cudaKernelNodeParams *pNodeParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
cudaGraphNode_t *, size_t,
const struct cudaKernelNodeParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddKernelNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams(
cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphKernelNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams(
cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
const struct cudaKernelNodeParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphKernelNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
cudaGraphNode_t *pDependencies, size_t numDependencies,
const struct cudaMemcpy3DParms *pCopyParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
cudaGraphNode_t *, size_t,
const struct cudaMemcpy3DParms *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddMemcpyNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
pCopyParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams(
cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemcpyNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams(
cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
const struct cudaMemcpy3DParms *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemcpyNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
cudaGraphNode_t *pDependencies, size_t numDependencies,
const struct cudaMemsetParams *pMemsetParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
cudaGraphNode_t *, size_t,
const struct cudaMemsetParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddMemsetNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
pMemsetParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams(
cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemsetNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams(
cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
const struct cudaMemsetParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemsetNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
cudaGraphNode_t *pDependencies, size_t numDependencies,
const struct cudaHostNodeParams *pNodeParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
cudaGraphNode_t *, size_t,
const struct cudaHostNodeParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddHostNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams(
cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphHostNodeGetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams(
cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
const struct cudaHostNodeParams *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphHostNodeSetParams");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pNodeParams);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
cudaGraphNode_t *pDependencies,
size_t numDependencies, cudaGraph_t childGraph) {
using FuncPtr = cudaError_t(CUDARTAPI *)(
cudaGraphNode_t *, cudaGraph_t, cudaGraphNode_t *, size_t, cudaGraph_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddChildGraphNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
childGraph);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphChildGraphNodeGetGraph");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pGraph);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
cudaGraphNode_t *pDependencies, size_t numDependencies) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
cudaGraphNode_t *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddEmptyNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphNode, graph, pDependencies, numDependencies);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphClone");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphClone, originalGraph);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode,
cudaGraph_t clonedGraph) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeFindInClone");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pNode, originalNode, clonedGraph);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeGetType");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pType);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph,
cudaGraphNode_t *nodes,
size_t *numNodes) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphGetNodes");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graph, nodes, numNodes);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes(
cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphGetRootNodes");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graph, pRootNodes, pNumRootNodes);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph,
cudaGraphNode_t *from,
cudaGraphNode_t *to,
size_t *numEdges) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *,
cudaGraphNode_t *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphGetEdges");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graph, from, to, numEdges);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies(
cudaGraphNode_t node, cudaGraphNode_t *pDependencies,
size_t *pNumDependencies) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeGetDependencies");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pDependencies, pNumDependencies);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes(
cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes,
size_t *pNumDependentNodes) {
using FuncPtr =
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeGetDependentNodes");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node, pDependentNodes, pNumDependentNodes);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphAddDependencies(cudaGraph_t graph, cudaGraphNode_t *from,
cudaGraphNode_t *to, size_t numDependencies) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *,
cudaGraphNode_t *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddDependencies");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graph, from, to, numDependencies);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphRemoveDependencies(cudaGraph_t graph, cudaGraphNode_t *from,
cudaGraphNode_t *to, size_t numDependencies) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *,
cudaGraphNode_t *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphRemoveDependencies");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graph, from, to, numDependencies);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphDestroyNode(cudaGraphNode_t node) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphDestroyNode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(node);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(
cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode,
char *pLogBuffer, size_t bufferSize) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t,
cudaGraphNode_t *, char *, size_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphInstantiate");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec,
cudaStream_t stream) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphLaunch");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graphExec, stream);
}
extern __host__ cudaError_t CUDARTAPI
cudaGraphExecDestroy(cudaGraphExec_t graphExec) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphExecDestroy");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graphExec);
}
extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) {
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphDestroy");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(graph);
}
extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(
const void **ppExportTable, const cudaUUID_t *pExportTableId) {
using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *);
@ -1515,4 +1831,5 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(ppExportTable, pExportTableId);
}
} // extern "C"

File diff suppressed because it is too large Load Diff

View File

@ -93,7 +93,16 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
typedef void(CUDA_CB* CUhostFn)(void* userData);
// For now only one stub implementation is needed. If a function that is not
// available in the given CUDA release, the corresponding wrapper returns
// CUDA_ERROR_SHARED_OBJECT_INIT_FAILED.
#if CUDA_VERSION <= 9000
#include "tensorflow/stream_executor/cuda/cuda_9_0.inc"
#elif CUDA_VERSION == 10000
#include "tensorflow/stream_executor/cuda/cuda_10_0.inc"
#elif CUDA_VERSION <= 10010
#include "tensorflow/stream_executor/cuda/cuda_10_1.inc"
#elif CUDA_VERSION <= 10020
#include "tensorflow/stream_executor/cuda/cuda_10_2.inc"
#elif CUDA_VERSION <= 11000
#include "tensorflow/stream_executor/cuda/cuda_11_0.inc"
#else
#error "We have no wrapper for this version."
#endif

View File

@ -53,10 +53,16 @@ cudaError_t GetSymbolNotFoundError() {
// A bunch of new symbols were introduced in version 10
#if CUDART_VERSION <= 9020
#include "tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc"
#elif CUDART_VERSION < 10010
#elif CUDART_VERSION == 10000
#include "tensorflow/stream_executor/cuda/cuda_runtime_10_0.inc"
#else
#elif CUDART_VERSION == 10010
#include "tensorflow/stream_executor/cuda/cuda_runtime_10_1.inc"
#elif CUDART_VERSION == 10020
#include "tensorflow/stream_executor/cuda/cuda_runtime_10_2.inc"
#elif CUDART_VERSION == 11000
#include "tensorflow/stream_executor/cuda/cuda_runtime_11_0.inc"
#else
#error "We have no wrapper for this version."
#endif
#undef __dv
#undef __CUDA_DEPRECATED

View File

@ -1,6 +1,7 @@
// Auto-generated, do not edit.
extern "C" {
cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type,
int batch) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, cufftType, int);

View File

@ -0,0 +1,307 @@
// Auto-generated, do not edit.
extern "C" {
cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type,
int batch) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, cufftType, int);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlan1d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, nx, type, batch);
}
cufftResult CUFFTAPI cufftPlan2d(cufftHandle *plan, int nx, int ny,
cufftType type) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int, cufftType);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlan2d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, nx, ny, type);
}
cufftResult CUFFTAPI cufftPlan3d(cufftHandle *plan, int nx, int ny, int nz,
cufftType type) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle *, int, int, int, cufftType);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlan3d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, nx, ny, nz, type);
}
cufftResult CUFFTAPI cufftPlanMany(cufftHandle *plan, int rank, int *n,
int *inembed, int istride, int idist,
int *onembed, int ostride, int odist,
cufftType type, int batch) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int *, int *, int,
int, int *, int, int, cufftType, int);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlanMany");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
odist, type, batch);
}
cufftResult CUFFTAPI cufftMakePlan1d(cufftHandle plan, int nx, cufftType type,
int batch, size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlan1d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, nx, type, batch, workSize);
}
cufftResult CUFFTAPI cufftMakePlan2d(cufftHandle plan, int nx, int ny,
cufftType type, size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlan2d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, nx, ny, type, workSize);
}
cufftResult CUFFTAPI cufftMakePlan3d(cufftHandle plan, int nx, int ny, int nz,
cufftType type, size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlan3d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, nx, ny, nz, type, workSize);
}
cufftResult CUFFTAPI cufftMakePlanMany(cufftHandle plan, int rank, int *n,
int *inembed, int istride, int idist,
int *onembed, int ostride, int odist,
cufftType type, int batch,
size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *,
int, int, cufftType, int, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlanMany");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
odist, type, batch, workSize);
}
cufftResult CUFFTAPI cufftMakePlanMany64(
cufftHandle plan, int rank, long long int *n, long long int *inembed,
long long int istride, long long int idist, long long int *onembed,
long long int ostride, long long int odist, cufftType type,
long long int batch, size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(
cufftHandle, int, long long *, long long *, long long, long long,
long long *, long long, long long, cufftType, long long, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlanMany64");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
odist, type, batch, workSize);
}
cufftResult CUFFTAPI cufftGetSizeMany64(
cufftHandle plan, int rank, long long int *n, long long int *inembed,
long long int istride, long long int idist, long long int *onembed,
long long int ostride, long long int odist, cufftType type,
long long int batch, size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(
cufftHandle, int, long long *, long long *, long long, long long,
long long *, long long, long long, cufftType, long long, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSizeMany64");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
odist, type, batch, workSize);
}
cufftResult CUFFTAPI cufftEstimate1d(int nx, cufftType type, int batch,
size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(int, cufftType, int, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimate1d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(nx, type, batch, workSize);
}
cufftResult CUFFTAPI cufftEstimate2d(int nx, int ny, cufftType type,
size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(int, int, cufftType, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimate2d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(nx, ny, type, workSize);
}
cufftResult CUFFTAPI cufftEstimate3d(int nx, int ny, int nz, cufftType type,
size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(int, int, int, cufftType, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimate3d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(nx, ny, nz, type, workSize);
}
cufftResult CUFFTAPI cufftEstimateMany(int rank, int *n, int *inembed,
int istride, int idist, int *onembed,
int ostride, int odist, cufftType type,
int batch, size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(int, int *, int *, int, int, int *,
int, int, cufftType, int, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimateMany");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(rank, n, inembed, istride, idist, onembed, ostride, odist,
type, batch, workSize);
}
cufftResult CUFFTAPI cufftCreate(cufftHandle *handle) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftCreate");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(handle);
}
cufftResult CUFFTAPI cufftGetSize1d(cufftHandle handle, int nx, cufftType type,
int batch, size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize1d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(handle, nx, type, batch, workSize);
}
cufftResult CUFFTAPI cufftGetSize2d(cufftHandle handle, int nx, int ny,
cufftType type, size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize2d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(handle, nx, ny, type, workSize);
}
cufftResult CUFFTAPI cufftGetSize3d(cufftHandle handle, int nx, int ny, int nz,
cufftType type, size_t *workSize) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize3d");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(handle, nx, ny, nz, type, workSize);
}
cufftResult CUFFTAPI cufftGetSizeMany(cufftHandle handle, int rank, int *n,
int *inembed, int istride, int idist,
int *onembed, int ostride, int odist,
cufftType type, int batch,
size_t *workArea) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *,
int, int, cufftType, int, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSizeMany");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(handle, rank, n, inembed, istride, idist, onembed, ostride,
odist, type, batch, workArea);
}
cufftResult CUFFTAPI cufftGetSize(cufftHandle handle, size_t *workSize) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, size_t *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(handle, workSize);
}
cufftResult CUFFTAPI cufftSetWorkArea(cufftHandle plan, void *workArea) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, void *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetWorkArea");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, workArea);
}
cufftResult CUFFTAPI cufftSetAutoAllocation(cufftHandle plan,
int autoAllocate) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, int);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetAutoAllocation");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, autoAllocate);
}
cufftResult CUFFTAPI cufftExecC2C(cufftHandle plan, cufftComplex *idata,
cufftComplex *odata, int direction) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftComplex *, int);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecC2C");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, idata, odata, direction);
}
cufftResult CUFFTAPI cufftExecR2C(cufftHandle plan, cufftReal *idata,
cufftComplex *odata) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, cufftReal *, cufftComplex *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecR2C");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, idata, odata);
}
cufftResult CUFFTAPI cufftExecC2R(cufftHandle plan, cufftComplex *idata,
cufftReal *odata) {
using FuncPtr =
cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftReal *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecC2R");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, idata, odata);
}
cufftResult CUFFTAPI cufftExecZ2Z(cufftHandle plan, cufftDoubleComplex *idata,
cufftDoubleComplex *odata, int direction) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *,
cufftDoubleComplex *, int);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecZ2Z");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, idata, odata, direction);
}
cufftResult CUFFTAPI cufftExecD2Z(cufftHandle plan, cufftDoubleReal *idata,
cufftDoubleComplex *odata) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleReal *,
cufftDoubleComplex *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecD2Z");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, idata, odata);
}
cufftResult CUFFTAPI cufftExecZ2D(cufftHandle plan, cufftDoubleComplex *idata,
cufftDoubleReal *odata) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *,
cufftDoubleReal *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecZ2D");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, idata, odata);
}
cufftResult CUFFTAPI cufftSetStream(cufftHandle plan, cudaStream_t stream) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cudaStream_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetStream");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, stream);
}
cufftResult CUFFTAPI cufftSetCompatibilityMode(cufftHandle plan,
cufftCompatibility mode) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftCompatibility);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetCompatibilityMode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan, mode);
}
cufftResult CUFFTAPI cufftDestroy(cufftHandle plan) {
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftDestroy");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(plan);
}
cufftResult CUFFTAPI cufftGetVersion(int *version) {
using FuncPtr = cufftResult(CUFFTAPI *)(int *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetVersion");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(version);
}
cufftResult CUFFTAPI cufftGetProperty(libraryPropertyType type, int *value) {
using FuncPtr = cufftResult(CUFFTAPI *)(libraryPropertyType, int *);
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetProperty");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(type, value);
}
} // extern "C"

View File

@ -47,4 +47,9 @@ T LoadSymbol(const char* symbol_name) {
cufftResult GetSymbolNotFoundError() { return CUFFT_INTERNAL_ERROR; }
} // namespace
#if CUFFT_VERSION < 10000
#include "tensorflow/stream_executor/cuda/cufft_9_0.inc"
#else
// All CUDA-10+ implementations use the same API.
#include "tensorflow/stream_executor/cuda/cufft_10_0.inc"
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "third_party/gpus/cuda/include/cuda.h"
#include "third_party/gpus/cuda/include/cusolverDn.h"
#include "tensorflow/stream_executor/lib/env.h"
#include "tensorflow/stream_executor/platform/dso_loader.h"
@ -50,8 +51,16 @@ cusolverStatus_t GetSymbolNotFoundError() {
}
} // namespace
#if CUDA_VERSION < 10010
#if CUDA_VERSION < 10000
#include "tensorflow/stream_executor/cuda/cusolver_dense_9_0.inc"
#elif CUDA_VERSION == 10000
#include "tensorflow/stream_executor/cuda/cusolver_dense_10_0.inc"
#else
#elif CUDA_VERSION == 10010
#include "tensorflow/stream_executor/cuda/cusolver_dense_10_1.inc"
#elif CUDA_VERSION == 10020
#include "tensorflow/stream_executor/cuda/cusolver_dense_10_2.inc"
#elif CUDA_VERSION == 11000
#include "tensorflow/stream_executor/cuda/cusolver_dense_11_0.inc"
#else
#error "We don't have a wrapper for this version."
#endif

View File

@ -116,14 +116,6 @@ cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA,
return func_ptr(descrA, type);
}
cusparseMatrixType_t CUSPARSEAPI
cusparseGetMatType(const cusparseMatDescr_t descrA) {
using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatType");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(descrA);
}
cusparseStatus_t CUSPARSEAPI
cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) {
using FuncPtr =
@ -133,14 +125,6 @@ cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) {
return func_ptr(descrA, fillMode);
}
cusparseFillMode_t CUSPARSEAPI
cusparseGetMatFillMode(const cusparseMatDescr_t descrA) {
using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatFillMode");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(descrA);
}
cusparseStatus_t CUSPARSEAPI
cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) {
using FuncPtr =
@ -150,14 +134,6 @@ cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) {
return func_ptr(descrA, diagType);
}
cusparseDiagType_t CUSPARSEAPI
cusparseGetMatDiagType(const cusparseMatDescr_t descrA) {
using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatDiagType");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(descrA);
}
cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA,
cusparseIndexBase_t base) {
using FuncPtr =
@ -167,14 +143,6 @@ cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA,
return func_ptr(descrA, base);
}
cusparseIndexBase_t CUSPARSEAPI
cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) {
using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatIndexBase");
if (!func_ptr) return GetSymbolNotFoundError();
return func_ptr(descrA);
}
cusparseStatus_t CUSPARSEAPI
cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info) {
using FuncPtr =

File diff suppressed because it is too large Load Diff

View File

@ -4887,7 +4887,7 @@ cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress(
int m, // number of rows
int n, const cusparseMatDescr_t descra,
const double *csrValA, // csr values array-the elements which are below a
// certain tolerance will be removed
// certain tolerance will be remvoed
const int *csrColIndA,
const int *csrRowPtrA, // corresponding input noncompressed row pointer
int nnzA, const int *nnzPerRow, double *csrValC, int *csrColIndC,
@ -4907,7 +4907,7 @@ cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress(
int m, // number of rows
int n, const cusparseMatDescr_t descra,
const cuComplex *csrValA, // csr values array-the elements which are below
// a certain tolerance will be removed
// a certain tolerance will be remvoed
const int *csrColIndA,
const int *csrRowPtrA, // corresponding input noncompressed row pointer
int nnzA, const int *nnzPerRow, cuComplex *csrValC, int *csrColIndC,
@ -4926,8 +4926,9 @@ cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress(
cusparseHandle_t handle,
int m, // number of rows
int n, const cusparseMatDescr_t descra,
const cuDoubleComplex *csrValA, // csr values array-the elements which are
// below a certain tolerance will be removed
const cuDoubleComplex
*csrValA, // csr values array-the elements which are
// below a certain tolerance will be remvoed
const int *csrColIndA,
const int *csrRowPtrA, // corresponding input noncompressed row pointer
int nnzA, const int *nnzPerRow, cuDoubleComplex *csrValC, int *csrColIndC,

View File

@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "third_party/gpus/cuda/include/cuda.h"
#include "third_party/gpus/cuda/include/cusparse.h"
#include "tensorflow/stream_executor/lib/env.h"
#include "tensorflow/stream_executor/platform/dso_loader.h"
@ -52,8 +53,14 @@ cusparseStatus_t GetSymbolNotFoundError() {
#if CUDA_VERSION < 9020
#include "tensorflow/stream_executor/cuda/cusparse_9_0.inc"
#elif CUDA_VERSION < 10010
#elif CUDA_VERSION == 10000
#include "tensorflow/stream_executor/cuda/cusparse_10_0.inc"
#else
#elif CUDA_VERSION == 10010
#include "tensorflow/stream_executor/cuda/cusparse_10_1.inc"
#elif CUDA_VERSION == 10020
#include "tensorflow/stream_executor/cuda/cusparse_10_2.inc"
#elif CUDA_VERSION == 11000
#include "tensorflow/stream_executor/cuda/cusparse_11_0.inc"
#else
#error "We don't have a wrapper for this version."
#endif