Regenerated wrapper includes for all CUDA versions & libraries.
PiperOrigin-RevId: 301883437 Change-Id: I60eb5e45b6eec404c0694a95e091b5e17dd02585
This commit is contained in:
parent
0c53d83001
commit
f29c62f405
@ -0,0 +1,52 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn_init.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "remote.h" // NOLINT
|
||||
#include "rpcmem.h" // NOLINT
|
||||
#include "tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/soc_model.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
// Version 1.14
|
||||
static const int kHexagonNNVersion = 136193;
|
||||
#pragma weak remote_handle_control // Declare it as a weak symbol
|
||||
void hexagon_nn_global_init() {
|
||||
rpcmem_init();
|
||||
// Non-domains QoS invocation
|
||||
struct remote_rpc_control_latency data;
|
||||
data.enable = 1;
|
||||
if (remote_handle_control) { // Check if API is available before invoking
|
||||
remote_handle_control(DSPRPC_CONTROL_LATENCY, (void*)&data, sizeof(data));
|
||||
}
|
||||
}
|
||||
|
||||
void hexagon_nn_global_teardown() { rpcmem_deinit(); }
|
||||
|
||||
bool hexagon_nn_is_device_supported() {
|
||||
return tflite::delegates::getsoc_model().mode != UNSPECIFIED_MODE;
|
||||
}
|
||||
|
||||
int hexagon_nn_hexagon_interface_version() { return kHexagonNNVersion; }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -1,7 +1,7 @@
|
||||
# Description:
|
||||
# CUDA-platform specific StreamExecutor support code.
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_cuda_cc_test")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_copts", "tf_cuda_cc_test")
|
||||
load(
|
||||
"//tensorflow/stream_executor:build_defs.bzl",
|
||||
"stream_executor_friends",
|
||||
@ -9,7 +9,6 @@ load(
|
||||
"tf_additional_cuda_platform_deps",
|
||||
"tf_additional_cudnn_plugin_deps",
|
||||
)
|
||||
load("//tensorflow:tensorflow.bzl", "tf_copts")
|
||||
load(
|
||||
"//tensorflow/core/platform/default:cuda_build_defs.bzl",
|
||||
"if_cuda_is_configured",
|
||||
@ -90,7 +89,7 @@ cc_library(
|
||||
cc_library(
|
||||
name = "cuda_stub",
|
||||
srcs = if_cuda_is_configured(["cuda_stub.cc"]),
|
||||
textual_hdrs = ["cuda_10_0.inc"],
|
||||
textual_hdrs = glob(["cuda_*.inc"]),
|
||||
deps = if_cuda_is_configured([
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
"//tensorflow/stream_executor/lib",
|
||||
@ -271,7 +270,7 @@ cc_library(
|
||||
cc_library(
|
||||
name = "cufft_stub",
|
||||
srcs = if_cuda_is_configured(["cufft_stub.cc"]),
|
||||
textual_hdrs = ["cufft_10_0.inc"],
|
||||
textual_hdrs = glob(["cufft_*.inc"]),
|
||||
deps = if_cuda_is_configured([
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
"//tensorflow/stream_executor/lib",
|
||||
@ -426,7 +425,7 @@ cc_library(
|
||||
cc_library(
|
||||
name = "cusolver_stub",
|
||||
srcs = if_cuda_is_configured(["cusolver_stub.cc"]),
|
||||
textual_hdrs = ["cusolver_dense_10_0.inc"],
|
||||
textual_hdrs = glob(["cusolver_dense_*.inc"]),
|
||||
deps = if_cuda_is_configured([
|
||||
# LINT.IfChange
|
||||
"@local_config_cuda//cuda:cublas_headers",
|
||||
|
5220
tensorflow/stream_executor/cuda/cublas_10_2.inc
Normal file
5220
tensorflow/stream_executor/cuda/cublas_10_2.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -57,11 +57,16 @@ cublasStatus_t GetSymbolNotFoundError() { return CUBLAS_STATUS_INTERNAL_ERROR; }
|
||||
typedef enum {} cublasMath_t;
|
||||
#endif
|
||||
|
||||
// Parameter constness changed in cuBLAS 9.2
|
||||
#if CUDA_VERSION < 9020
|
||||
#include "tensorflow/stream_executor/cuda/cublas_9_0.inc"
|
||||
#elif CUDA_VERSION < 10010
|
||||
#elif CUDA_VERSION == 10000
|
||||
#include "tensorflow/stream_executor/cuda/cublas_10_0.inc"
|
||||
#else
|
||||
#elif CUDA_VERSION == 10010
|
||||
#include "tensorflow/stream_executor/cuda/cublas_10_1.inc"
|
||||
#elif CUDA_VERSION == 10020
|
||||
#include "tensorflow/stream_executor/cuda/cublas_10_2.inc"
|
||||
#elif CUDA_VERSION == 11000
|
||||
#include "tensorflow/stream_executor/cuda/cublas_11_0.inc"
|
||||
#else
|
||||
#error "We have no wrapper for this version."
|
||||
#endif
|
||||
|
@ -1,6 +1,7 @@
|
||||
// Auto-generated, do not edit.
|
||||
|
||||
extern "C" {
|
||||
|
||||
CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGetErrorString");
|
||||
@ -1024,6 +1025,28 @@ CUresult CUDAAPI cuStreamAddCallback(CUstream hStream,
|
||||
return func_ptr(hStream, callback, userData, flags);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUstream);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuStreamBeginCapture");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hStream);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuStreamEndCapture");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hStream, phGraph);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream,
|
||||
CUstreamCaptureStatus *captureStatus) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuStreamIsCapturing");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hStream, captureStatus);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr,
|
||||
size_t length, unsigned int flags) {
|
||||
using FuncPtr =
|
||||
@ -1385,6 +1408,284 @@ __CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc,
|
||||
return func_ptr(hfunc, texunit, hTexRef);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphCreate");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraph, flags);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddKernelNode(
|
||||
CUgraphNode *phGraphNode, CUgraph hGraph, CUgraphNode *dependencies,
|
||||
size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
|
||||
size_t, const CUDA_KERNEL_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddKernelNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
|
||||
nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphKernelNodeGetParams(
|
||||
CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphKernelNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphKernelNodeSetParams(
|
||||
CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphKernelNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph,
|
||||
CUgraphNode *dependencies,
|
||||
size_t numDependencies,
|
||||
const CUDA_MEMCPY3D *copyParams,
|
||||
CUcontext ctx) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
|
||||
size_t, const CUDA_MEMCPY3D *, CUcontext);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddMemcpyNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
|
||||
copyParams, ctx);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode,
|
||||
CUDA_MEMCPY3D *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemcpyNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode,
|
||||
const CUDA_MEMCPY3D *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemcpyNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddMemsetNode(
|
||||
CUgraphNode *phGraphNode, CUgraph hGraph, CUgraphNode *dependencies,
|
||||
size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams,
|
||||
CUcontext ctx) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, size_t,
|
||||
const CUDA_MEMSET_NODE_PARAMS *, CUcontext);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddMemsetNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
|
||||
memsetParams, ctx);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphMemsetNodeGetParams(
|
||||
CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemsetNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphMemsetNodeSetParams(
|
||||
CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphMemsetNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph,
|
||||
CUgraphNode *dependencies,
|
||||
size_t numDependencies,
|
||||
const CUDA_HOST_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
|
||||
size_t, const CUDA_HOST_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddHostNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
|
||||
nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode,
|
||||
CUDA_HOST_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphHostNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphHostNodeSetParams(
|
||||
CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphHostNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, nodeParams);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode,
|
||||
CUgraph hGraph,
|
||||
CUgraphNode *dependencies,
|
||||
size_t numDependencies,
|
||||
CUgraph childGraph) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *,
|
||||
size_t, CUgraph);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddChildGraphNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies,
|
||||
childGraph);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode,
|
||||
CUgraph *phGraph) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphChildGraphNodeGetGraph");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, phGraph);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph,
|
||||
CUgraphNode *dependencies,
|
||||
size_t numDependencies) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, CUgraphNode *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddEmptyNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphNode, hGraph, dependencies, numDependencies);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphClone");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphClone, originalGraph);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode,
|
||||
CUgraphNode hOriginalNode,
|
||||
CUgraph hClonedGraph) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeFindInClone");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phNode, hOriginalNode, hClonedGraph);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeGetType");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, type);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes,
|
||||
size_t *numNodes) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphGetNodes");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraph, nodes, numNodes);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes,
|
||||
size_t *numRootNodes) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphGetRootNodes");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraph, rootNodes, numRootNodes);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from,
|
||||
CUgraphNode *to, size_t *numEdges) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphGetEdges");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraph, from, to, numEdges);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode,
|
||||
CUgraphNode *dependencies,
|
||||
size_t *numDependencies) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeGetDependencies");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, dependencies, numDependencies);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode,
|
||||
CUgraphNode *dependentNodes,
|
||||
size_t *numDependentNodes) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphNodeGetDependentNodes");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode, dependentNodes, numDependentNodes);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, CUgraphNode *from,
|
||||
CUgraphNode *to,
|
||||
size_t numDependencies) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphAddDependencies");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraph, from, to, numDependencies);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, CUgraphNode *from,
|
||||
CUgraphNode *to,
|
||||
size_t numDependencies) {
|
||||
using FuncPtr =
|
||||
CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphRemoveDependencies");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraph, from, to, numDependencies);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphDestroyNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hNode);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph,
|
||||
CUgraphNode *phErrorNode, char *logBuffer,
|
||||
size_t bufferSize) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, CUgraphNode *,
|
||||
char *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphInstantiate");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(phGraphExec, hGraph, phErrorNode, logBuffer, bufferSize);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphLaunch");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraphExec, hStream);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphExecDestroy");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraphExec);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(CUgraph);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cuGraphDestroy");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(hGraph);
|
||||
}
|
||||
|
||||
CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) {
|
||||
using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t);
|
||||
|
2166
tensorflow/stream_executor/cuda/cuda_10_1.inc
Normal file
2166
tensorflow/stream_executor/cuda/cuda_10_1.inc
Normal file
File diff suppressed because it is too large
Load Diff
2328
tensorflow/stream_executor/cuda/cuda_10_2.inc
Normal file
2328
tensorflow/stream_executor/cuda/cuda_10_2.inc
Normal file
File diff suppressed because it is too large
Load Diff
1718
tensorflow/stream_executor/cuda/cuda_9_0.inc
Normal file
1718
tensorflow/stream_executor/cuda/cuda_9_0.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -430,6 +430,14 @@ bool CUDABlas::DoBlasInternalImpl(FuncT cublas_func, Stream *stream,
|
||||
return ret == CUBLAS_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// cublas_func may be overloaded, so we need to figure out which one we really
|
||||
// need to call based on the args. One way to do it is to wrap it in lambda.
|
||||
#define AS_LAMBDA(func) \
|
||||
[](auto &&... args) -> decltype( \
|
||||
func(std::forward<decltype(args)>(args)...)) { \
|
||||
return func(std::forward<decltype(args)>(args)...); \
|
||||
}
|
||||
|
||||
bool CUDABlas::DoBlasAsum(Stream *stream, uint64 elem_count,
|
||||
const DeviceMemory<float> &x, int incx,
|
||||
DeviceMemory<float> *result) {
|
||||
@ -1953,8 +1961,9 @@ bool CUDABlas::DoBlasGemmWithAlgorithmImpl(
|
||||
// essentially reinterpet_cast to __half, which is safe because Eigen::half
|
||||
// inherits from __half.
|
||||
bool result = DoBlasInternalFailureOK(
|
||||
cublasGemmEx, stream, /* pointer_mode_host = */ !alpha.is_pointer(),
|
||||
CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k,
|
||||
AS_LAMBDA(cublasGemmEx), stream,
|
||||
/* pointer_mode_host = */ !alpha.is_pointer(), CUDABlasTranspose(transa),
|
||||
CUDABlasTranspose(transb), m, n, k,
|
||||
alpha.is_pointer() ? GpuMemory(alpha.pointer()) : &alpha.value(),
|
||||
GpuMemory(a), cuda_in_type, lda, GpuMemory(b), cuda_in_type, ldb,
|
||||
beta.is_pointer() ? GpuMemory(beta.pointer()) : &beta.value(),
|
||||
@ -2227,7 +2236,7 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal(
|
||||
reinterpret_cast<void **>(const_cast<CUDA_T **>(GpuMemory(c)));
|
||||
bool ok;
|
||||
ok = DoBlasInternalImpl(
|
||||
cublasGemmBatchedEx, stream, true /* = pointer_mode_host */,
|
||||
AS_LAMBDA(cublasGemmBatchedEx), stream, true /* = pointer_mode_host */,
|
||||
true /* = err_on_failure */, use_tensor_ops, CUDABlasTranspose(transa),
|
||||
CUDABlasTranspose(transb), m, n, k, &alpha, a_void_ptrs, data_type, lda,
|
||||
b_void_ptrs, data_type, ldb, &beta, c_void_ptrs, data_type, ldc,
|
||||
@ -2375,12 +2384,12 @@ bool CUDABlas::DoBlasGemmStridedBatched(
|
||||
cublasGemmAlgo_t algo =
|
||||
(use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT);
|
||||
bool ok = DoBlasInternalImpl(
|
||||
cublasGemmStridedBatchedEx, stream, true /* = pointer_mode_host */,
|
||||
true /* = err_on_failure */, use_tensor_ops,
|
||||
CUDABlasTranspose(transa), CUDABlasTranspose(transb), m, n, k, &alpha,
|
||||
GpuMemory(a), CUDA_R_16F, lda, stride_a, GpuMemory(b), CUDA_R_16F,
|
||||
ldb, stride_b, &beta, GpuMemoryMutable(c), CUDA_R_16F, ldc, stride_c,
|
||||
batch_count, CUDA_R_32F, algo);
|
||||
AS_LAMBDA(cublasGemmStridedBatchedEx), stream,
|
||||
true /* = pointer_mode_host */, true /* = err_on_failure */,
|
||||
use_tensor_ops, CUDABlasTranspose(transa), CUDABlasTranspose(transb),
|
||||
m, n, k, &alpha, GpuMemory(a), CUDA_R_16F, lda, stride_a,
|
||||
GpuMemory(b), CUDA_R_16F, ldb, stride_b, &beta, GpuMemoryMutable(c),
|
||||
CUDA_R_16F, ldc, stride_c, batch_count, CUDA_R_32F, algo);
|
||||
if (ok) {
|
||||
return true;
|
||||
}
|
||||
|
@ -383,6 +383,22 @@ cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr,
|
||||
return func_ptr(stream, devPtr, length, flags);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaStreamBeginCapture(cudaStream_t stream) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaStreamBeginCapture");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(stream);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaStreamEndCapture");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(stream, pGraph);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing(
|
||||
cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) {
|
||||
using FuncPtr =
|
||||
@ -1508,6 +1524,306 @@ cudaRuntimeGetVersion(int *runtimeVersion) {
|
||||
return func_ptr(runtimeVersion);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph,
|
||||
unsigned int flags) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphCreate");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraph, flags);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
|
||||
cudaGraphNode_t *pDependencies, size_t numDependencies,
|
||||
const struct cudaKernelNodeParams *pNodeParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
|
||||
cudaGraphNode_t *, size_t,
|
||||
const struct cudaKernelNodeParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddKernelNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
|
||||
pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams(
|
||||
cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphKernelNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams(
|
||||
cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
|
||||
const struct cudaKernelNodeParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphKernelNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
|
||||
cudaGraphNode_t *pDependencies, size_t numDependencies,
|
||||
const struct cudaMemcpy3DParms *pCopyParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
|
||||
cudaGraphNode_t *, size_t,
|
||||
const struct cudaMemcpy3DParms *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddMemcpyNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
|
||||
pCopyParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams(
|
||||
cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemcpyNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams(
|
||||
cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
|
||||
const struct cudaMemcpy3DParms *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemcpyNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
|
||||
cudaGraphNode_t *pDependencies, size_t numDependencies,
|
||||
const struct cudaMemsetParams *pMemsetParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
|
||||
cudaGraphNode_t *, size_t,
|
||||
const struct cudaMemsetParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddMemsetNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
|
||||
pMemsetParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams(
|
||||
cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemsetNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams(
|
||||
cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
|
||||
const struct cudaMemsetParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphMemsetNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
|
||||
cudaGraphNode_t *pDependencies, size_t numDependencies,
|
||||
const struct cudaHostNodeParams *pNodeParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
|
||||
cudaGraphNode_t *, size_t,
|
||||
const struct cudaHostNodeParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddHostNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
|
||||
pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams(
|
||||
cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphHostNodeGetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams(
|
||||
cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t,
|
||||
const struct cudaHostNodeParams *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphHostNodeSetParams");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pNodeParams);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
|
||||
cudaGraphNode_t *pDependencies,
|
||||
size_t numDependencies, cudaGraph_t childGraph) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(
|
||||
cudaGraphNode_t *, cudaGraph_t, cudaGraphNode_t *, size_t, cudaGraph_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddChildGraphNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphNode, graph, pDependencies, numDependencies,
|
||||
childGraph);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphChildGraphNodeGetGraph");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pGraph);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph,
|
||||
cudaGraphNode_t *pDependencies, size_t numDependencies) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t,
|
||||
cudaGraphNode_t *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddEmptyNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphNode, graph, pDependencies, numDependencies);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphClone");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphClone, originalGraph);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode,
|
||||
cudaGraph_t clonedGraph) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeFindInClone");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pNode, originalNode, clonedGraph);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeGetType");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pType);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph,
|
||||
cudaGraphNode_t *nodes,
|
||||
size_t *numNodes) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphGetNodes");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graph, nodes, numNodes);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes(
|
||||
cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphGetRootNodes");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graph, pRootNodes, pNumRootNodes);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph,
|
||||
cudaGraphNode_t *from,
|
||||
cudaGraphNode_t *to,
|
||||
size_t *numEdges) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *,
|
||||
cudaGraphNode_t *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphGetEdges");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graph, from, to, numEdges);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies(
|
||||
cudaGraphNode_t node, cudaGraphNode_t *pDependencies,
|
||||
size_t *pNumDependencies) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeGetDependencies");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pDependencies, pNumDependencies);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes(
|
||||
cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes,
|
||||
size_t *pNumDependentNodes) {
|
||||
using FuncPtr =
|
||||
cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphNodeGetDependentNodes");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node, pDependentNodes, pNumDependentNodes);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphAddDependencies(cudaGraph_t graph, cudaGraphNode_t *from,
|
||||
cudaGraphNode_t *to, size_t numDependencies) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *,
|
||||
cudaGraphNode_t *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphAddDependencies");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graph, from, to, numDependencies);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphRemoveDependencies(cudaGraph_t graph, cudaGraphNode_t *from,
|
||||
cudaGraphNode_t *to, size_t numDependencies) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *,
|
||||
cudaGraphNode_t *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphRemoveDependencies");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graph, from, to, numDependencies);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphDestroyNode(cudaGraphNode_t node) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphDestroyNode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(node);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphInstantiate(
|
||||
cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode,
|
||||
char *pLogBuffer, size_t bufferSize) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t,
|
||||
cudaGraphNode_t *, char *, size_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphInstantiate");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec,
|
||||
cudaStream_t stream) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphLaunch");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graphExec, stream);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI
|
||||
cudaGraphExecDestroy(cudaGraphExec_t graphExec) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphExecDestroy");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graphExec);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cudaGraphDestroy");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(graph);
|
||||
}
|
||||
|
||||
extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(
|
||||
const void **ppExportTable, const cudaUUID_t *pExportTableId) {
|
||||
using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *);
|
||||
@ -1515,4 +1831,5 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(ppExportTable, pExportTableId);
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
1896
tensorflow/stream_executor/cuda/cuda_runtime_10_2.inc
Normal file
1896
tensorflow/stream_executor/cuda/cuda_runtime_10_2.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -93,7 +93,16 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st
|
||||
CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
|
||||
typedef void(CUDA_CB* CUhostFn)(void* userData);
|
||||
|
||||
// For now only one stub implementation is needed. If a function that is not
|
||||
// available in the given CUDA release, the corresponding wrapper returns
|
||||
// CUDA_ERROR_SHARED_OBJECT_INIT_FAILED.
|
||||
#if CUDA_VERSION <= 9000
|
||||
#include "tensorflow/stream_executor/cuda/cuda_9_0.inc"
|
||||
#elif CUDA_VERSION == 10000
|
||||
#include "tensorflow/stream_executor/cuda/cuda_10_0.inc"
|
||||
#elif CUDA_VERSION <= 10010
|
||||
#include "tensorflow/stream_executor/cuda/cuda_10_1.inc"
|
||||
#elif CUDA_VERSION <= 10020
|
||||
#include "tensorflow/stream_executor/cuda/cuda_10_2.inc"
|
||||
#elif CUDA_VERSION <= 11000
|
||||
#include "tensorflow/stream_executor/cuda/cuda_11_0.inc"
|
||||
#else
|
||||
#error "We have no wrapper for this version."
|
||||
#endif
|
||||
|
@ -53,10 +53,16 @@ cudaError_t GetSymbolNotFoundError() {
|
||||
// A bunch of new symbols were introduced in version 10
|
||||
#if CUDART_VERSION <= 9020
|
||||
#include "tensorflow/stream_executor/cuda/cuda_runtime_9_0.inc"
|
||||
#elif CUDART_VERSION < 10010
|
||||
#elif CUDART_VERSION == 10000
|
||||
#include "tensorflow/stream_executor/cuda/cuda_runtime_10_0.inc"
|
||||
#else
|
||||
#elif CUDART_VERSION == 10010
|
||||
#include "tensorflow/stream_executor/cuda/cuda_runtime_10_1.inc"
|
||||
#elif CUDART_VERSION == 10020
|
||||
#include "tensorflow/stream_executor/cuda/cuda_runtime_10_2.inc"
|
||||
#elif CUDART_VERSION == 11000
|
||||
#include "tensorflow/stream_executor/cuda/cuda_runtime_11_0.inc"
|
||||
#else
|
||||
#error "We have no wrapper for this version."
|
||||
#endif
|
||||
#undef __dv
|
||||
#undef __CUDA_DEPRECATED
|
||||
|
@ -1,6 +1,7 @@
|
||||
// Auto-generated, do not edit.
|
||||
|
||||
extern "C" {
|
||||
|
||||
cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type,
|
||||
int batch) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, cufftType, int);
|
||||
|
307
tensorflow/stream_executor/cuda/cufft_9_0.inc
Normal file
307
tensorflow/stream_executor/cuda/cufft_9_0.inc
Normal file
@ -0,0 +1,307 @@
|
||||
// Auto-generated, do not edit.
|
||||
|
||||
extern "C" {
|
||||
|
||||
cufftResult CUFFTAPI cufftPlan1d(cufftHandle *plan, int nx, cufftType type,
|
||||
int batch) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, cufftType, int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlan1d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, nx, type, batch);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftPlan2d(cufftHandle *plan, int nx, int ny,
|
||||
cufftType type) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int, cufftType);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlan2d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, nx, ny, type);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftPlan3d(cufftHandle *plan, int nx, int ny, int nz,
|
||||
cufftType type) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle *, int, int, int, cufftType);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlan3d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, nx, ny, nz, type);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftPlanMany(cufftHandle *plan, int rank, int *n,
|
||||
int *inembed, int istride, int idist,
|
||||
int *onembed, int ostride, int odist,
|
||||
cufftType type, int batch) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *, int, int *, int *, int,
|
||||
int, int *, int, int, cufftType, int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftPlanMany");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
|
||||
odist, type, batch);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftMakePlan1d(cufftHandle plan, int nx, cufftType type,
|
||||
int batch, size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlan1d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, nx, type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftMakePlan2d(cufftHandle plan, int nx, int ny,
|
||||
cufftType type, size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlan2d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, nx, ny, type, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftMakePlan3d(cufftHandle plan, int nx, int ny, int nz,
|
||||
cufftType type, size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlan3d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, nx, ny, nz, type, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftMakePlanMany(cufftHandle plan, int rank, int *n,
|
||||
int *inembed, int istride, int idist,
|
||||
int *onembed, int ostride, int odist,
|
||||
cufftType type, int batch,
|
||||
size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *,
|
||||
int, int, cufftType, int, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlanMany");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
|
||||
odist, type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftMakePlanMany64(
|
||||
cufftHandle plan, int rank, long long int *n, long long int *inembed,
|
||||
long long int istride, long long int idist, long long int *onembed,
|
||||
long long int ostride, long long int odist, cufftType type,
|
||||
long long int batch, size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(
|
||||
cufftHandle, int, long long *, long long *, long long, long long,
|
||||
long long *, long long, long long, cufftType, long long, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftMakePlanMany64");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
|
||||
odist, type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetSizeMany64(
|
||||
cufftHandle plan, int rank, long long int *n, long long int *inembed,
|
||||
long long int istride, long long int idist, long long int *onembed,
|
||||
long long int ostride, long long int odist, cufftType type,
|
||||
long long int batch, size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(
|
||||
cufftHandle, int, long long *, long long *, long long, long long,
|
||||
long long *, long long, long long, cufftType, long long, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSizeMany64");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, rank, n, inembed, istride, idist, onembed, ostride,
|
||||
odist, type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftEstimate1d(int nx, cufftType type, int batch,
|
||||
size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(int, cufftType, int, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimate1d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(nx, type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftEstimate2d(int nx, int ny, cufftType type,
|
||||
size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(int, int, cufftType, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimate2d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(nx, ny, type, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftEstimate3d(int nx, int ny, int nz, cufftType type,
|
||||
size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(int, int, int, cufftType, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimate3d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(nx, ny, nz, type, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftEstimateMany(int rank, int *n, int *inembed,
|
||||
int istride, int idist, int *onembed,
|
||||
int ostride, int odist, cufftType type,
|
||||
int batch, size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(int, int *, int *, int, int, int *,
|
||||
int, int, cufftType, int, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftEstimateMany");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(rank, n, inembed, istride, idist, onembed, ostride, odist,
|
||||
type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftCreate(cufftHandle *handle) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftCreate");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(handle);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetSize1d(cufftHandle handle, int nx, cufftType type,
|
||||
int batch, size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, cufftType, int, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize1d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(handle, nx, type, batch, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetSize2d(cufftHandle handle, int nx, int ny,
|
||||
cufftType type, size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, int, cufftType, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize2d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(handle, nx, ny, type, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetSize3d(cufftHandle handle, int nx, int ny, int nz,
|
||||
cufftType type, size_t *workSize) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, int, int, cufftType, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize3d");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(handle, nx, ny, nz, type, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetSizeMany(cufftHandle handle, int rank, int *n,
|
||||
int *inembed, int istride, int idist,
|
||||
int *onembed, int ostride, int odist,
|
||||
cufftType type, int batch,
|
||||
size_t *workArea) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, int, int *, int *, int, int, int *,
|
||||
int, int, cufftType, int, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSizeMany");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(handle, rank, n, inembed, istride, idist, onembed, ostride,
|
||||
odist, type, batch, workArea);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetSize(cufftHandle handle, size_t *workSize) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, size_t *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetSize");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(handle, workSize);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftSetWorkArea(cufftHandle plan, void *workArea) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, void *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetWorkArea");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, workArea);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftSetAutoAllocation(cufftHandle plan,
|
||||
int autoAllocate) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetAutoAllocation");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, autoAllocate);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftExecC2C(cufftHandle plan, cufftComplex *idata,
|
||||
cufftComplex *odata, int direction) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftComplex *, int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecC2C");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, idata, odata, direction);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftExecR2C(cufftHandle plan, cufftReal *idata,
|
||||
cufftComplex *odata) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, cufftReal *, cufftComplex *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecR2C");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, idata, odata);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftExecC2R(cufftHandle plan, cufftComplex *idata,
|
||||
cufftReal *odata) {
|
||||
using FuncPtr =
|
||||
cufftResult(CUFFTAPI *)(cufftHandle, cufftComplex *, cufftReal *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecC2R");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, idata, odata);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftExecZ2Z(cufftHandle plan, cufftDoubleComplex *idata,
|
||||
cufftDoubleComplex *odata, int direction) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *,
|
||||
cufftDoubleComplex *, int);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecZ2Z");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, idata, odata, direction);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftExecD2Z(cufftHandle plan, cufftDoubleReal *idata,
|
||||
cufftDoubleComplex *odata) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleReal *,
|
||||
cufftDoubleComplex *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecD2Z");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, idata, odata);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftExecZ2D(cufftHandle plan, cufftDoubleComplex *idata,
|
||||
cufftDoubleReal *odata) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftDoubleComplex *,
|
||||
cufftDoubleReal *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftExecZ2D");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, idata, odata);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftSetStream(cufftHandle plan, cudaStream_t stream) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cudaStream_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetStream");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, stream);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftSetCompatibilityMode(cufftHandle plan,
|
||||
cufftCompatibility mode) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle, cufftCompatibility);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftSetCompatibilityMode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan, mode);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftDestroy(cufftHandle plan) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(cufftHandle);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftDestroy");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(plan);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetVersion(int *version) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(int *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetVersion");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(version);
|
||||
}
|
||||
|
||||
cufftResult CUFFTAPI cufftGetProperty(libraryPropertyType type, int *value) {
|
||||
using FuncPtr = cufftResult(CUFFTAPI *)(libraryPropertyType, int *);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cufftGetProperty");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(type, value);
|
||||
}
|
||||
|
||||
} // extern "C"
|
@ -47,4 +47,9 @@ T LoadSymbol(const char* symbol_name) {
|
||||
cufftResult GetSymbolNotFoundError() { return CUFFT_INTERNAL_ERROR; }
|
||||
} // namespace
|
||||
|
||||
#if CUFFT_VERSION < 10000
|
||||
#include "tensorflow/stream_executor/cuda/cufft_9_0.inc"
|
||||
#else
|
||||
// All CUDA-10+ implementations use the same API.
|
||||
#include "tensorflow/stream_executor/cuda/cufft_10_0.inc"
|
||||
#endif
|
||||
|
3677
tensorflow/stream_executor/cuda/cusolver_dense_10_2.inc
Normal file
3677
tensorflow/stream_executor/cuda/cusolver_dense_10_2.inc
Normal file
File diff suppressed because it is too large
Load Diff
2185
tensorflow/stream_executor/cuda/cusolver_dense_9_0.inc
Normal file
2185
tensorflow/stream_executor/cuda/cusolver_dense_9_0.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "third_party/gpus/cuda/include/cusolverDn.h"
|
||||
#include "tensorflow/stream_executor/lib/env.h"
|
||||
#include "tensorflow/stream_executor/platform/dso_loader.h"
|
||||
@ -50,8 +51,16 @@ cusolverStatus_t GetSymbolNotFoundError() {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
#if CUDA_VERSION < 10010
|
||||
#if CUDA_VERSION < 10000
|
||||
#include "tensorflow/stream_executor/cuda/cusolver_dense_9_0.inc"
|
||||
#elif CUDA_VERSION == 10000
|
||||
#include "tensorflow/stream_executor/cuda/cusolver_dense_10_0.inc"
|
||||
#else
|
||||
#elif CUDA_VERSION == 10010
|
||||
#include "tensorflow/stream_executor/cuda/cusolver_dense_10_1.inc"
|
||||
#elif CUDA_VERSION == 10020
|
||||
#include "tensorflow/stream_executor/cuda/cusolver_dense_10_2.inc"
|
||||
#elif CUDA_VERSION == 11000
|
||||
#include "tensorflow/stream_executor/cuda/cusolver_dense_11_0.inc"
|
||||
#else
|
||||
#error "We don't have a wrapper for this version."
|
||||
#endif
|
||||
|
@ -116,14 +116,6 @@ cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA,
|
||||
return func_ptr(descrA, type);
|
||||
}
|
||||
|
||||
cusparseMatrixType_t CUSPARSEAPI
|
||||
cusparseGetMatType(const cusparseMatDescr_t descrA) {
|
||||
using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatType");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(descrA);
|
||||
}
|
||||
|
||||
cusparseStatus_t CUSPARSEAPI
|
||||
cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) {
|
||||
using FuncPtr =
|
||||
@ -133,14 +125,6 @@ cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) {
|
||||
return func_ptr(descrA, fillMode);
|
||||
}
|
||||
|
||||
cusparseFillMode_t CUSPARSEAPI
|
||||
cusparseGetMatFillMode(const cusparseMatDescr_t descrA) {
|
||||
using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatFillMode");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(descrA);
|
||||
}
|
||||
|
||||
cusparseStatus_t CUSPARSEAPI
|
||||
cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) {
|
||||
using FuncPtr =
|
||||
@ -150,14 +134,6 @@ cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) {
|
||||
return func_ptr(descrA, diagType);
|
||||
}
|
||||
|
||||
cusparseDiagType_t CUSPARSEAPI
|
||||
cusparseGetMatDiagType(const cusparseMatDescr_t descrA) {
|
||||
using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatDiagType");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(descrA);
|
||||
}
|
||||
|
||||
cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA,
|
||||
cusparseIndexBase_t base) {
|
||||
using FuncPtr =
|
||||
@ -167,14 +143,6 @@ cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA,
|
||||
return func_ptr(descrA, base);
|
||||
}
|
||||
|
||||
cusparseIndexBase_t CUSPARSEAPI
|
||||
cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) {
|
||||
using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t);
|
||||
static auto func_ptr = LoadSymbol<FuncPtr>("cusparseGetMatIndexBase");
|
||||
if (!func_ptr) return GetSymbolNotFoundError();
|
||||
return func_ptr(descrA);
|
||||
}
|
||||
|
||||
cusparseStatus_t CUSPARSEAPI
|
||||
cusparseCreateSolveAnalysisInfo(cusparseSolveAnalysisInfo_t *info) {
|
||||
using FuncPtr =
|
||||
|
8226
tensorflow/stream_executor/cuda/cusparse_10_2.inc
Normal file
8226
tensorflow/stream_executor/cuda/cusparse_10_2.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -4887,7 +4887,7 @@ cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress(
|
||||
int m, // number of rows
|
||||
int n, const cusparseMatDescr_t descra,
|
||||
const double *csrValA, // csr values array-the elements which are below a
|
||||
// certain tolerance will be removed
|
||||
// certain tolerance will be remvoed
|
||||
const int *csrColIndA,
|
||||
const int *csrRowPtrA, // corresponding input noncompressed row pointer
|
||||
int nnzA, const int *nnzPerRow, double *csrValC, int *csrColIndC,
|
||||
@ -4907,7 +4907,7 @@ cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress(
|
||||
int m, // number of rows
|
||||
int n, const cusparseMatDescr_t descra,
|
||||
const cuComplex *csrValA, // csr values array-the elements which are below
|
||||
// a certain tolerance will be removed
|
||||
// a certain tolerance will be remvoed
|
||||
const int *csrColIndA,
|
||||
const int *csrRowPtrA, // corresponding input noncompressed row pointer
|
||||
int nnzA, const int *nnzPerRow, cuComplex *csrValC, int *csrColIndC,
|
||||
@ -4926,8 +4926,9 @@ cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress(
|
||||
cusparseHandle_t handle,
|
||||
int m, // number of rows
|
||||
int n, const cusparseMatDescr_t descra,
|
||||
const cuDoubleComplex *csrValA, // csr values array-the elements which are
|
||||
// below a certain tolerance will be removed
|
||||
const cuDoubleComplex
|
||||
*csrValA, // csr values array-the elements which are
|
||||
// below a certain tolerance will be remvoed
|
||||
const int *csrColIndA,
|
||||
const int *csrRowPtrA, // corresponding input noncompressed row pointer
|
||||
int nnzA, const int *nnzPerRow, cuDoubleComplex *csrValC, int *csrColIndC,
|
||||
|
@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "third_party/gpus/cuda/include/cusparse.h"
|
||||
#include "tensorflow/stream_executor/lib/env.h"
|
||||
#include "tensorflow/stream_executor/platform/dso_loader.h"
|
||||
@ -52,8 +53,14 @@ cusparseStatus_t GetSymbolNotFoundError() {
|
||||
|
||||
#if CUDA_VERSION < 9020
|
||||
#include "tensorflow/stream_executor/cuda/cusparse_9_0.inc"
|
||||
#elif CUDA_VERSION < 10010
|
||||
#elif CUDA_VERSION == 10000
|
||||
#include "tensorflow/stream_executor/cuda/cusparse_10_0.inc"
|
||||
#else
|
||||
#elif CUDA_VERSION == 10010
|
||||
#include "tensorflow/stream_executor/cuda/cusparse_10_1.inc"
|
||||
#elif CUDA_VERSION == 10020
|
||||
#include "tensorflow/stream_executor/cuda/cusparse_10_2.inc"
|
||||
#elif CUDA_VERSION == 11000
|
||||
#include "tensorflow/stream_executor/cuda/cusparse_11_0.inc"
|
||||
#else
|
||||
#error "We don't have a wrapper for this version."
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user