Add cuda_configure repository rule to autodetect cuda. (#3966)
This change reimplements the CUDA autoconfiguration mechanism in Skylark, providing a `cuda_configure` workspace rule. We keep the same user interface, the ./configure script, but rather than modifying source files within the source tree, `cuda_configure` generates a `@local_config_cuda` workspace containing: * Symlinks to the CUDA headers and libraries * BUILD files generated with the correct CUDA and cuDNN versions * CROSSTOOL config with CUDA include dirs populated * crosstool_wrapper_driver_is_not_gcc wrapper script with compiler paths and CUDA compute capabilities set. * cuda_config.h header file with CUDA versions and compute capabilities set, which can be `#include`d by source files. This change also makes the following fixes to `Dockerfile.gpu`: * Change the `CUDNN_INSTALL_PATH` to point to `/usr/lib/x86_64-linux-gnu` rather than `/usr/local/cuda` since NVIDIA's image installs `libcudnn.so` under `/usr/lib/x86_64-linux-gnu`. * Add env variable to set the minimum compute capability to 3.0. Fixes #2873
This commit is contained in:
parent
44595c44ee
commit
58b37cf745
8
.gitignore
vendored
8
.gitignore
vendored
@ -9,13 +9,6 @@ node_modules
|
|||||||
/bazel-testlogs
|
/bazel-testlogs
|
||||||
/bazel-tf
|
/bazel-tf
|
||||||
/tensorflow/contrib/cmake/build
|
/tensorflow/contrib/cmake/build
|
||||||
/third_party/gpus/cuda/bin
|
|
||||||
/third_party/gpus/cuda/cuda.config
|
|
||||||
/third_party/gpus/cuda/extras
|
|
||||||
/third_party/gpus/cuda/include
|
|
||||||
/third_party/gpus/cuda/lib
|
|
||||||
/third_party/gpus/cuda/lib64
|
|
||||||
/third_party/gpus/cuda/nvvm
|
|
||||||
/third_party/py/numpy/numpy_include
|
/third_party/py/numpy/numpy_include
|
||||||
/tools/bazel.rc
|
/tools/bazel.rc
|
||||||
/tools/python_bin_path.sh
|
/tools/python_bin_path.sh
|
||||||
@ -25,3 +18,4 @@ node_modules
|
|||||||
/_python_build
|
/_python_build
|
||||||
*.pyc
|
*.pyc
|
||||||
__pycache__
|
__pycache__
|
||||||
|
*.swp
|
||||||
|
73
configure
vendored
73
configure
vendored
@ -80,6 +80,7 @@ while [ "$TF_NEED_CUDA" == "" ]; do
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
export TF_NEED_CUDA
|
||||||
if [ "$TF_NEED_CUDA" == "0" ]; then
|
if [ "$TF_NEED_CUDA" == "0" ]; then
|
||||||
echo "Configuration finished"
|
echo "Configuration finished"
|
||||||
exit
|
exit
|
||||||
@ -97,6 +98,7 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
||||||
|
export CC=$GCC_HOST_COMPILER_PATH
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
||||||
@ -107,7 +109,6 @@ while true; do
|
|||||||
# Retry
|
# Retry
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
# Find out where the CUDA toolkit is installed
|
# Find out where the CUDA toolkit is installed
|
||||||
OSNAME=`uname -s`
|
OSNAME=`uname -s`
|
||||||
|
|
||||||
@ -140,6 +141,8 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
||||||
|
export CUDA_TOOLKIT_PATH
|
||||||
|
export CUDA_VERSION=$TF_CUDA_VERSION
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
||||||
@ -200,13 +203,16 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
||||||
|
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||||
|
export CUDNN_INSTALL_PATH
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$OSNAME" == "Linux" ]; then
|
if [ "$OSNAME" == "Linux" ]; then
|
||||||
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
||||||
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
||||||
CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||||
|
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
@ -225,42 +231,11 @@ while true; do
|
|||||||
CUDNN_INSTALL_PATH=""
|
CUDNN_INSTALL_PATH=""
|
||||||
done
|
done
|
||||||
|
|
||||||
cat > third_party/gpus/cuda/cuda.config <<EOF
|
|
||||||
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit.
|
|
||||||
CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
|
|
||||||
# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
|
|
||||||
# files can be either in this directory, or under include/ and lib64/
|
|
||||||
# directories separately.
|
|
||||||
CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
|
|
||||||
|
|
||||||
# The Cuda SDK version that should be used in this build (empty to use libcudart.so symlink)
|
|
||||||
TF_CUDA_VERSION=$TF_CUDA_VERSION
|
|
||||||
|
|
||||||
# The Cudnn version that should be used in this build
|
|
||||||
TF_CUDNN_VERSION=$TF_CUDNN_VERSION
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# Configure the gcc host compiler to use
|
|
||||||
export WARNING=$DO_NOT_SUBMIT_WARNING
|
|
||||||
perl -pi -e "s,CPU_COMPILER = \('.*'\),# \$ENV{WARNING}\nCPU_COMPILER = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
|
|
||||||
perl -pi -e "s,GCC_HOST_COMPILER_PATH = \('.*'\),# \$ENV{WARNING}\nGCC_HOST_COMPILER_PATH = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
|
|
||||||
|
|
||||||
# Configure the platform name.
|
|
||||||
perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/gpus/cuda/platform.bzl
|
|
||||||
|
|
||||||
# Configure the Cuda toolkit version to work with.
|
|
||||||
perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
|
|
||||||
perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_VERSION\",s" third_party/gpus/cuda/platform.bzl
|
|
||||||
|
|
||||||
# Configure the Cudnn version to work with.
|
|
||||||
perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
|
|
||||||
perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_VERSION\",s" third_party/gpus/cuda/platform.bzl
|
|
||||||
|
|
||||||
|
|
||||||
# Configure the compute capabilities that TensorFlow builds for.
|
# Configure the compute capabilities that TensorFlow builds for.
|
||||||
# Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
|
# Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
|
||||||
while true; do
|
while true; do
|
||||||
fromuser=""
|
fromuser=""
|
||||||
|
default_cuda_compute_capabilities="3.5,5.2"
|
||||||
if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
||||||
cat << EOF
|
cat << EOF
|
||||||
Please specify a list of comma-separated Cuda compute capabilities you want to build with.
|
Please specify a list of comma-separated Cuda compute capabilities you want to build with.
|
||||||
@ -270,6 +245,9 @@ EOF
|
|||||||
read -p "[Default is: \"3.5,5.2\"]: " TF_CUDA_COMPUTE_CAPABILITIES
|
read -p "[Default is: \"3.5,5.2\"]: " TF_CUDA_COMPUTE_CAPABILITIES
|
||||||
fromuser=1
|
fromuser=1
|
||||||
fi
|
fi
|
||||||
|
if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
||||||
|
TF_CUDA_COMPUTE_CAPABILITIES=$default_cuda_compute_capabilities
|
||||||
|
fi
|
||||||
# Check whether all capabilities from the input is valid
|
# Check whether all capabilities from the input is valid
|
||||||
COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES//,/ }
|
COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES//,/ }
|
||||||
ALL_VALID=1
|
ALL_VALID=1
|
||||||
@ -285,34 +263,13 @@ EOF
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
|
export CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
TF_CUDA_COMPUTE_CAPABILITIES=""
|
TF_CUDA_COMPUTE_CAPABILITIES=""
|
||||||
done
|
done
|
||||||
|
|
||||||
if [ ! -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
bazel clean --expunge
|
||||||
export WARNING=$DO_NOT_SUBMIT_WARNING
|
bazel fetch //...
|
||||||
function CudaGenCodeOpts() {
|
|
||||||
OUTPUT=""
|
|
||||||
for CAPABILITY in $@; do
|
|
||||||
OUTPUT=${OUTPUT}" \"${CAPABILITY}\", "
|
|
||||||
done
|
|
||||||
echo $OUTPUT
|
|
||||||
}
|
|
||||||
export CUDA_GEN_CODES_OPTS=$(CudaGenCodeOpts ${TF_CUDA_COMPUTE_CAPABILITIES//,/ })
|
|
||||||
perl -pi -0 -e 's,\n( *)([^\n]*supported_cuda_compute_capabilities\s*=\s*\[).*?(\]),\n\1# $ENV{WARNING}\n\1\2$ENV{CUDA_GEN_CODES_OPTS}\3,s' third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
|
|
||||||
function CudaVersionOpts() {
|
|
||||||
OUTPUT=""
|
|
||||||
for CAPABILITY in $@; do
|
|
||||||
OUTPUT=$OUTPUT"CudaVersion(\"${CAPABILITY}\"), "
|
|
||||||
done
|
|
||||||
echo $OUTPUT
|
|
||||||
}
|
|
||||||
export CUDA_VERSION_OPTS=$(CudaVersionOpts ${TF_CUDA_COMPUTE_CAPABILITIES//,/ })
|
|
||||||
perl -pi -0 -e 's,\n( *)([^\n]*supported_cuda_compute_capabilities\s*=\s*\{).*?(\}),\n\1// $ENV{WARNING}\n\1\2$ENV{CUDA_VERSION_OPTS}\3,s' tensorflow/core/common_runtime/gpu/gpu_device.cc
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Invoke the cuda_config.sh and set up the TensorFlow's canonical view of the Cuda libraries
|
|
||||||
(cd third_party/gpus/cuda; ./cuda_config.sh;) || exit -1
|
|
||||||
|
|
||||||
echo "Configuration finished"
|
echo "Configuration finished"
|
||||||
|
@ -759,10 +759,9 @@ struct CudaVersion {
|
|||||||
int minor_part = -1;
|
int minor_part = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
// "configure" uses the specific name to substitute the following string.
|
|
||||||
// If you change it, make sure you modify "configure" as well.
|
|
||||||
std::vector<CudaVersion> supported_cuda_compute_capabilities = {
|
std::vector<CudaVersion> supported_cuda_compute_capabilities = {
|
||||||
CudaVersion("3.5"), CudaVersion("5.2")};
|
TF_CUDA_CAPABILITIES,
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
|
std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
|
||||||
auto cuda_caps = supported_cuda_compute_capabilities;
|
auto cuda_caps = supported_cuda_compute_capabilities;
|
||||||
|
@ -31,7 +31,7 @@ limitations under the License.
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
#include "tensorflow/core/platform/stream_executor.h"
|
#include "tensorflow/core/platform/stream_executor.h"
|
||||||
#include "tensorflow/core/util/stream_executor_util.h"
|
#include "tensorflow/core/util/stream_executor_util.h"
|
||||||
#endif // GOOGLE_CUDA
|
#endif // GOOGLE_CUDA
|
||||||
|
@ -25,7 +25,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/kernels/fill_functor.h"
|
#include "tensorflow/core/kernels/fill_functor.h"
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
#include "tensorflow/core/platform/stream_executor.h"
|
#include "tensorflow/core/platform/stream_executor.h"
|
||||||
#endif // GOOGLE_CUDA
|
#endif // GOOGLE_CUDA
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ exports_files(["LICENSE"])
|
|||||||
|
|
||||||
load("//tensorflow:tensorflow.bzl", "tf_copts")
|
load("//tensorflow:tensorflow.bzl", "tf_copts")
|
||||||
load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
|
load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
|
||||||
load("//third_party/gpus/cuda:platform.bzl", "cuda_library_path")
|
load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "gtest",
|
name = "gtest",
|
||||||
@ -32,7 +32,7 @@ tf_cuda_library(
|
|||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/stream_executor",
|
"//tensorflow/stream_executor",
|
||||||
] + select({
|
] + select({
|
||||||
"//third_party/gpus/cuda:darwin": ["IOKit"],
|
"@local_config_cuda//cuda:darwin": ["IOKit"],
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
@ -91,20 +91,20 @@ filegroup(
|
|||||||
cc_library(
|
cc_library(
|
||||||
name = "cuda",
|
name = "cuda",
|
||||||
data = [
|
data = [
|
||||||
"//third_party/gpus/cuda:{}".format(cuda_library_path("cudart")),
|
"@local_config_cuda//cuda:{}".format(cuda_library_path("cudart")),
|
||||||
],
|
],
|
||||||
linkopts = select({
|
linkopts = select({
|
||||||
"//third_party/gpus/cuda:darwin": [
|
"@local_config_cuda//cuda:darwin": [
|
||||||
"-Wl,-rpath,third_party/gpus/cuda/lib",
|
"-Wl,-rpath,../local_config_cuda/cuda/lib",
|
||||||
"-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib",
|
"-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib",
|
||||||
],
|
],
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
"-Wl,-rpath,third_party/gpus/cuda/lib64",
|
"-Wl,-rpath,../local_config_cuda/cuda/lib64",
|
||||||
"-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64",
|
"-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib64",
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
deps = [
|
deps = [
|
||||||
"//third_party/gpus/cuda:cudart",
|
"@local_config_cuda//cuda:cudart",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -15,9 +15,9 @@ tf_cuda_library(
|
|||||||
copts = tf_copts(),
|
copts = tf_copts(),
|
||||||
cuda_deps = [
|
cuda_deps = [
|
||||||
"//tensorflow/core:stream_executor",
|
"//tensorflow/core:stream_executor",
|
||||||
"//third_party/gpus/cuda:cuda_headers",
|
"@local_config_cuda//cuda:cuda_headers",
|
||||||
"//third_party/gpus/cuda:cupti_headers",
|
"@local_config_cuda//cuda:cupti_headers",
|
||||||
],
|
],
|
||||||
data = ["//third_party/gpus/cuda:cupti_dsos"],
|
data = ["@local_config_cuda//cuda:cupti_dsos"],
|
||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
@ -21,7 +21,7 @@ limitations under the License.
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
|
#include "cuda/extras/CUPTI/include/cupti.h"
|
||||||
|
|
||||||
namespace perftools {
|
namespace perftools {
|
||||||
namespace gputools {
|
namespace gputools {
|
||||||
|
@ -16,7 +16,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/util/port.h"
|
#include "tensorflow/core/util/port.h"
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
@ -27,9 +27,10 @@ cc_library(
|
|||||||
]),
|
]),
|
||||||
data = [
|
data = [
|
||||||
"//tensorflow/core:cuda",
|
"//tensorflow/core:cuda",
|
||||||
"//third_party/gpus/cuda:cublas",
|
"@local_config_cuda//cuda:cublas",
|
||||||
"//third_party/gpus/cuda:cudnn",
|
"@local_config_cuda//cuda:cudnn",
|
||||||
"//third_party/gpus/cuda:cufft",
|
"@local_config_cuda//cuda:cufft",
|
||||||
|
"@local_config_cuda//cuda:curand",
|
||||||
],
|
],
|
||||||
linkopts = [
|
linkopts = [
|
||||||
"-ldl",
|
"-ldl",
|
||||||
@ -37,7 +38,7 @@ cc_library(
|
|||||||
visibility = ["//visibility:public"],
|
visibility = ["//visibility:public"],
|
||||||
deps = [
|
deps = [
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//third_party/gpus/cuda:cuda_headers",
|
"@local_config_cuda//cuda:cuda_headers",
|
||||||
],
|
],
|
||||||
alwayslink = 1,
|
alwayslink = 1,
|
||||||
)
|
)
|
||||||
|
@ -18,8 +18,8 @@ limitations under the License.
|
|||||||
// cuda.h). This ensures that Eigen's Half.h does not attempt to make its own
|
// cuda.h). This ensures that Eigen's Half.h does not attempt to make its own
|
||||||
// __half typedef if CUDA has already defined one (and conversely, that we do
|
// __half typedef if CUDA has already defined one (and conversely, that we do
|
||||||
// not include <cuda_fp16.h> after Half.h has made its typedef).
|
// not include <cuda_fp16.h> after Half.h has made its typedef).
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
#include "third_party/gpus/cuda/include/cublas_v2.h"
|
#include "cuda/include/cublas_v2.h"
|
||||||
|
|
||||||
#if CUDA_VERSION >= 7050
|
#if CUDA_VERSION >= 7050
|
||||||
#define EIGEN_HAS_CUDA_FP16
|
#define EIGEN_HAS_CUDA_FP16
|
||||||
|
@ -39,7 +39,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/stream_executor/stream.h"
|
#include "tensorflow/stream_executor/stream.h"
|
||||||
#include "tensorflow/stream_executor/stream_executor_pimpl.h"
|
#include "tensorflow/stream_executor/stream_executor_pimpl.h"
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#include "third_party/gpus/cuda/include/cudnn.h"
|
#include "cuda/include/cudnn.h"
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -25,7 +25,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/stream_executor/lib/status.h"
|
#include "tensorflow/stream_executor/lib/status.h"
|
||||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||||
#include "tensorflow/stream_executor/platform/port.h"
|
#include "tensorflow/stream_executor/platform/port.h"
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
|
|
||||||
namespace perftools {
|
namespace perftools {
|
||||||
namespace gputools {
|
namespace gputools {
|
||||||
|
@ -23,7 +23,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/stream_executor/fft.h"
|
#include "tensorflow/stream_executor/fft.h"
|
||||||
#include "tensorflow/stream_executor/platform/port.h"
|
#include "tensorflow/stream_executor/platform/port.h"
|
||||||
#include "tensorflow/stream_executor/plugin_registry.h"
|
#include "tensorflow/stream_executor/plugin_registry.h"
|
||||||
#include "third_party/gpus/cuda/include/cufft.h"
|
#include "cuda/include/cufft.h"
|
||||||
|
|
||||||
namespace perftools {
|
namespace perftools {
|
||||||
namespace gputools {
|
namespace gputools {
|
||||||
|
@ -24,8 +24,8 @@ limitations under the License.
|
|||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
||||||
#include "third_party/gpus/cuda/include/cuComplex.h"
|
#include "cuda/include/cuComplex.h"
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
|
|
||||||
namespace perftools {
|
namespace perftools {
|
||||||
namespace gputools {
|
namespace gputools {
|
||||||
|
@ -28,7 +28,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/stream_executor/lib/casts.h"
|
#include "tensorflow/stream_executor/lib/casts.h"
|
||||||
#include "tensorflow/stream_executor/platform/port.h"
|
#include "tensorflow/stream_executor/platform/port.h"
|
||||||
#include "tensorflow/stream_executor/platform/logging.h"
|
#include "tensorflow/stream_executor/platform/logging.h"
|
||||||
#include "third_party/gpus/cuda/include/cuda.h"
|
#include "cuda/include/cuda.h"
|
||||||
|
|
||||||
#ifdef PLATFORMS_GPUS_CUDA_DYNAMIC_LIBCUDA_DYNAMIC_LIBCUDA_H_
|
#ifdef PLATFORMS_GPUS_CUDA_DYNAMIC_LIBCUDA_DYNAMIC_LIBCUDA_H_
|
||||||
#error \
|
#error \
|
||||||
|
@ -28,7 +28,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/stream_executor/lib/status.h"
|
#include "tensorflow/stream_executor/lib/status.h"
|
||||||
#include "tensorflow/stream_executor/platform/logging.h"
|
#include "tensorflow/stream_executor/platform/logging.h"
|
||||||
#include "tensorflow/stream_executor/rng.h"
|
#include "tensorflow/stream_executor/rng.h"
|
||||||
#include "third_party/gpus/cuda/include/curand.h"
|
#include "cuda/include/curand.h"
|
||||||
|
|
||||||
// Formats curandStatus_t to output prettified values into a log stream.
|
// Formats curandStatus_t to output prettified values into a log stream.
|
||||||
std::ostream &operator<<(std::ostream &in, const curandStatus_t &status) {
|
std::ostream &operator<<(std::ostream &in, const curandStatus_t &status) {
|
||||||
|
@ -28,23 +28,22 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/load_library.h"
|
#include "tensorflow/core/platform/load_library.h"
|
||||||
#include "tensorflow/stream_executor/lib/error.h"
|
#include "tensorflow/stream_executor/lib/error.h"
|
||||||
#include "tensorflow/stream_executor/lib/str_util.h"
|
#include "tensorflow/stream_executor/lib/str_util.h"
|
||||||
|
#include "tensorflow/stream_executor/lib/str_util.h"
|
||||||
#include "tensorflow/stream_executor/lib/strcat.h"
|
#include "tensorflow/stream_executor/lib/strcat.h"
|
||||||
#include "tensorflow/stream_executor/lib/stringprintf.h"
|
#include "tensorflow/stream_executor/lib/stringprintf.h"
|
||||||
#include "tensorflow/stream_executor/platform/logging.h"
|
#include "tensorflow/stream_executor/platform/logging.h"
|
||||||
#include "tensorflow/stream_executor/platform/port.h"
|
#include "tensorflow/stream_executor/platform/port.h"
|
||||||
#include "tensorflow/stream_executor/lib/str_util.h"
|
|
||||||
|
|
||||||
namespace perftools {
|
namespace perftools {
|
||||||
namespace gputools {
|
namespace gputools {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
// TensorFlow OSS configure uses the following lines to configure versions. For
|
string GetCudaVersion() { return TF_CUDA_VERSION; }
|
||||||
// any modifications of the format, please make sure the script still works.
|
string GetCudnnVersion() { return TF_CUDNN_VERSION; }
|
||||||
string GetCudaVersion() { return ""; }
|
|
||||||
string GetCudnnVersion() { return ""; }
|
|
||||||
|
|
||||||
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
|
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
|
||||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cublas", GetCudaVersion()),
|
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||||
|
"cublas", GetCudaVersion()),
|
||||||
GetCudaLibraryDirPath()),
|
GetCudaLibraryDirPath()),
|
||||||
dso_handle);
|
dso_handle);
|
||||||
}
|
}
|
||||||
@ -53,35 +52,38 @@ string GetCudnnVersion() { return ""; }
|
|||||||
// libcudnn is versioned differently than the other libraries and may have a
|
// libcudnn is versioned differently than the other libraries and may have a
|
||||||
// different version number than other CUDA libraries. See b/22397368 for
|
// different version number than other CUDA libraries. See b/22397368 for
|
||||||
// some details about the complications surrounding this.
|
// some details about the complications surrounding this.
|
||||||
return GetDsoHandle(
|
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||||
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cudnn", GetCudnnVersion()),
|
"cudnn", GetCudnnVersion()),
|
||||||
GetCudaLibraryDirPath()),
|
GetCudaLibraryDirPath()),
|
||||||
dso_handle);
|
dso_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
|
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
|
||||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cufft", GetCudaVersion()),
|
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||||
|
"cufft", GetCudaVersion()),
|
||||||
GetCudaLibraryDirPath()),
|
GetCudaLibraryDirPath()),
|
||||||
dso_handle);
|
dso_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
|
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
|
||||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("curand", GetCudaVersion()),
|
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||||
|
"curand", GetCudaVersion()),
|
||||||
GetCudaLibraryDirPath()),
|
GetCudaLibraryDirPath()),
|
||||||
dso_handle);
|
dso_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
|
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
|
||||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
|
return GetDsoHandle(
|
||||||
GetCudaDriverLibraryPath()),
|
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
|
||||||
dso_handle);
|
GetCudaDriverLibraryPath()),
|
||||||
|
dso_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
|
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
|
||||||
return GetDsoHandle(
|
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||||
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cupti", GetCudaVersion()),
|
"cupti", GetCudaVersion()),
|
||||||
GetCudaCuptiLibraryPath()),
|
GetCudaCuptiLibraryPath()),
|
||||||
dso_handle);
|
dso_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ void DsoLoader::RegisterRpath(port::StringPiece path) {
|
/* static */ void DsoLoader::RegisterRpath(port::StringPiece path) {
|
||||||
@ -89,11 +91,9 @@ string GetCudnnVersion() { return ""; }
|
|||||||
GetRpaths()->push_back(path.ToString());
|
GetRpaths()->push_back(path.ToString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* static */ port::Status DsoLoader::GetDsoHandle(port::StringPiece path,
|
/* static */ port::Status DsoLoader::GetDsoHandle(port::StringPiece path,
|
||||||
void** dso_handle,
|
void** dso_handle,
|
||||||
LoadKind load_kind) {
|
LoadKind load_kind) {
|
||||||
|
|
||||||
int dynload_flags =
|
int dynload_flags =
|
||||||
RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
|
RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
|
||||||
string path_string = path.ToString();
|
string path_string = path.ToString();
|
||||||
@ -138,9 +138,9 @@ string GetCudnnVersion() { return ""; }
|
|||||||
static std::vector<string>* CreatePrimordialRpaths() {
|
static std::vector<string>* CreatePrimordialRpaths() {
|
||||||
auto rpaths = new std::vector<string>;
|
auto rpaths = new std::vector<string>;
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
rpaths->push_back("driver/driver_sh.runfiles/org_tensorflow/third_party/gpus/cuda/lib");
|
rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib");
|
||||||
#else
|
#else
|
||||||
rpaths->push_back("driver/driver_sh.runfiles/org_tensorflow/third_party/gpus/cuda/lib64");
|
rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib64");
|
||||||
#endif
|
#endif
|
||||||
return rpaths;
|
return rpaths;
|
||||||
}
|
}
|
||||||
@ -165,7 +165,6 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
|||||||
|
|
||||||
/* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
|
/* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
|
||||||
port::StringPiece runfiles_relpath) {
|
port::StringPiece runfiles_relpath) {
|
||||||
|
|
||||||
// Keep a record of the paths we attempted so we can dump out meaningful
|
// Keep a record of the paths we attempted so we can dump out meaningful
|
||||||
// diagnostics if no path is found.
|
// diagnostics if no path is found.
|
||||||
std::vector<string> attempted;
|
std::vector<string> attempted;
|
||||||
@ -191,29 +190,28 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
|||||||
|
|
||||||
/* static */ string DsoLoader::GetCudaLibraryDirPath() {
|
/* static */ string DsoLoader::GetCudaLibraryDirPath() {
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
return "third_party/gpus/cuda/lib";
|
return "external/local_config_cuda/cuda/lib";
|
||||||
#else
|
#else
|
||||||
return "third_party/gpus/cuda/lib64";
|
return "external/local_config_cuda/cuda/lib64";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
|
/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
return "third_party/gpus/cuda/driver/lib";
|
return "external/local_config_cuda/cuda/driver/lib";
|
||||||
#else
|
#else
|
||||||
return "third_party/gpus/cuda/driver/lib64";
|
return "external/local_config_cuda/cuda/driver/lib64";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
|
/* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
return "third_party/gpus/cuda/extras/CUPTI/lib";
|
return "external/local_config_cuda/cuda/extras/CUPTI/lib";
|
||||||
#else
|
#else
|
||||||
return "third_party/gpus/cuda/extras/CUPTI/lib64";
|
return "external/local_config_cuda/cuda/extras/CUPTI/lib64";
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// -- CachedDsoLoader
|
// -- CachedDsoLoader
|
||||||
|
|
||||||
/* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
|
/* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
|
||||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/stream_executor/platform/port.h"
|
#include "tensorflow/stream_executor/platform/port.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "cuda/cuda_config.h"
|
||||||
#include "tensorflow/stream_executor/lib/status.h"
|
#include "tensorflow/stream_executor/lib/status.h"
|
||||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||||
#include "tensorflow/stream_executor/lib/stringpiece.h"
|
#include "tensorflow/stream_executor/lib/stringpiece.h"
|
||||||
|
@ -32,7 +32,7 @@ load(
|
|||||||
"tf_cuda_tests_tags",
|
"tf_cuda_tests_tags",
|
||||||
)
|
)
|
||||||
load(
|
load(
|
||||||
"//third_party/gpus/cuda:build_defs.bzl",
|
"@local_config_cuda//cuda:build_defs.bzl",
|
||||||
"if_cuda",
|
"if_cuda",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -295,11 +295,11 @@ def tf_cc_tests(tests, deps, linkstatic=0, tags=[], size="medium", args=None,
|
|||||||
tf_cc_test(t, deps, linkstatic, tags=tags, size=size, args=args,
|
tf_cc_test(t, deps, linkstatic, tags=tags, size=size, args=args,
|
||||||
linkopts=linkopts)
|
linkopts=linkopts)
|
||||||
|
|
||||||
def tf_cc_tests_gpu(tests, deps, linkstatic=0, tags=[], size="medium", args=None):
|
def tf_cc_tests_gpu(tests, deps, linkstatic=0, tags=[], size="medium",
|
||||||
|
args=None):
|
||||||
tf_cc_tests(tests, deps, linkstatic, tags=tags, size=size, args=args)
|
tf_cc_tests(tests, deps, linkstatic, tags=tags, size=size, args=args)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def tf_cuda_cc_tests(tests, deps, tags=[], size="medium", linkstatic=0,
|
def tf_cuda_cc_tests(tests, deps, tags=[], size="medium", linkstatic=0,
|
||||||
args=None, linkopts=[]):
|
args=None, linkopts=[]):
|
||||||
for t in tests:
|
for t in tests:
|
||||||
@ -316,29 +316,29 @@ def _cuda_copts():
|
|||||||
common_cuda_opts = ["-x", "cuda", "-DGOOGLE_CUDA=1"]
|
common_cuda_opts = ["-x", "cuda", "-DGOOGLE_CUDA=1"]
|
||||||
return select({
|
return select({
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
"//third_party/gpus/cuda:using_nvcc": (
|
"@local_config_cuda//cuda:using_nvcc": (
|
||||||
common_cuda_opts +
|
common_cuda_opts +
|
||||||
[
|
[
|
||||||
"-nvcc_options=relaxed-constexpr",
|
"-nvcc_options=relaxed-constexpr",
|
||||||
"-nvcc_options=ftz=true",
|
"-nvcc_options=ftz=true",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
"//third_party/gpus/cuda:using_gcudacc": (
|
"@local_config_cuda//cuda:using_gcudacc": (
|
||||||
common_cuda_opts +
|
common_cuda_opts +
|
||||||
["--gcudacc_flag=-ftz=true"]
|
["--gcudacc_flag=-ftz=true"]
|
||||||
),
|
),
|
||||||
"//third_party/gpus/cuda:using_clang": (
|
"@local_config_cuda//cuda:using_clang": (
|
||||||
common_cuda_opts +
|
common_cuda_opts +
|
||||||
[
|
[
|
||||||
"-fcuda-flush-denormals-to-zero",
|
"-fcuda-flush-denormals-to-zero",
|
||||||
"--cuda-path=third_party/gpus/cuda",
|
"--cuda-path=external/local_config_cuda/cuda",
|
||||||
"--cuda-gpu-arch=sm_35",
|
"--cuda-gpu-arch=sm_35",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
}) + select({
|
}) + select({
|
||||||
# Pass -O3 when building CUDA code with clang; some important
|
# Pass -O3 when building CUDA code with clang; some important
|
||||||
# optimizations are not enabled at O2.
|
# optimizations are not enabled at O2.
|
||||||
"//third_party/gpus/cuda:using_clang_opt": ["-O3"],
|
"@local_config_cuda//cuda:using_clang_opt": ["-O3"],
|
||||||
"//conditions:default": [],
|
"//conditions:default": [],
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -409,7 +409,8 @@ def tf_kernel_library(name, prefix=None, srcs=None, gpu_srcs=None, hdrs=None,
|
|||||||
* srcs = ["cwise_op_abs.cc", ..., "cwise_op_tanh.cc"],
|
* srcs = ["cwise_op_abs.cc", ..., "cwise_op_tanh.cc"],
|
||||||
* hdrs = ["cwise_ops.h", "cwise_ops_common.h"],
|
* hdrs = ["cwise_ops.h", "cwise_ops_common.h"],
|
||||||
* gpu_srcs = ["cwise_op_gpu_abs.cu.cc", ..., "cwise_op_gpu_tanh.cu.cc",
|
* gpu_srcs = ["cwise_op_gpu_abs.cu.cc", ..., "cwise_op_gpu_tanh.cu.cc",
|
||||||
"cwise_ops.h", "cwise_ops_common.h", "cwise_ops_gpu_common.cu.h"]
|
"cwise_ops.h", "cwise_ops_common.h",
|
||||||
|
"cwise_ops_gpu_common.cu.h"]
|
||||||
* "cwise_ops_test.cc" is excluded
|
* "cwise_ops_test.cc" is excluded
|
||||||
"""
|
"""
|
||||||
if not srcs:
|
if not srcs:
|
||||||
@ -613,7 +614,7 @@ check_deps = rule(
|
|||||||
def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
|
def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
|
||||||
cuda_deps = [
|
cuda_deps = [
|
||||||
"//tensorflow/core:stream_executor_headers_lib",
|
"//tensorflow/core:stream_executor_headers_lib",
|
||||||
"//third_party/gpus/cuda:cudart_static",
|
"@local_config_cuda//cuda:cudart_static",
|
||||||
]
|
]
|
||||||
deps = deps + tf_custom_op_library_additional_deps()
|
deps = deps + tf_custom_op_library_additional_deps()
|
||||||
if gpu_srcs:
|
if gpu_srcs:
|
||||||
@ -663,7 +664,7 @@ def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
|
|||||||
module_name=module_name,
|
module_name=module_name,
|
||||||
py_module_name=name)
|
py_module_name=name)
|
||||||
extra_linkopts = select({
|
extra_linkopts = select({
|
||||||
"//third_party/gpus/cuda:darwin": [
|
"@local_config_cuda//cuda:darwin": [
|
||||||
"-Wl,-exported_symbols_list",
|
"-Wl,-exported_symbols_list",
|
||||||
"//tensorflow:tf_exported_symbols.lds"
|
"//tensorflow:tf_exported_symbols.lds"
|
||||||
],
|
],
|
||||||
@ -672,7 +673,7 @@ def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
|
|||||||
"//tensorflow:tf_version_script.lds"
|
"//tensorflow:tf_version_script.lds"
|
||||||
]})
|
]})
|
||||||
extra_deps += select({
|
extra_deps += select({
|
||||||
"//third_party/gpus/cuda:darwin": [
|
"@local_config_cuda//cuda:darwin": [
|
||||||
"//tensorflow:tf_exported_symbols.lds"
|
"//tensorflow:tf_exported_symbols.lds"
|
||||||
],
|
],
|
||||||
"//conditions:default": [
|
"//conditions:default": [
|
||||||
@ -746,13 +747,14 @@ def py_tests(name,
|
|||||||
data=data,
|
data=data,
|
||||||
additional_deps=additional_deps)
|
additional_deps=additional_deps)
|
||||||
|
|
||||||
def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[], shard_count=1, tags=[], prefix=""):
|
def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[],
|
||||||
|
shard_count=1, tags=[], prefix=""):
|
||||||
test_tags = tags + tf_cuda_tests_tags()
|
test_tags = tags + tf_cuda_tests_tags()
|
||||||
py_tests(name=name, size=size, srcs=srcs, additional_deps=additional_deps,
|
py_tests(name=name, size=size, srcs=srcs, additional_deps=additional_deps,
|
||||||
data=data, tags=test_tags, shard_count=shard_count,prefix=prefix)
|
data=data, tags=test_tags, shard_count=shard_count,prefix=prefix)
|
||||||
|
|
||||||
# Creates a genrule named <name> for running tools/proto_text's generator to make
|
# Creates a genrule named <name> for running tools/proto_text's generator to
|
||||||
# the proto_text functions, for the protos passed in <srcs>.
|
# make the proto_text functions, for the protos passed in <srcs>.
|
||||||
#
|
#
|
||||||
# Return a struct with fields (hdrs, srcs) containing the names of the
|
# Return a struct with fields (hdrs, srcs) containing the names of the
|
||||||
# generated files.
|
# generated files.
|
||||||
|
@ -22,5 +22,6 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
|
|||||||
|
|
||||||
# Configure the build for our CUDA configuration.
|
# Configure the build for our CUDA configuration.
|
||||||
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
|
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
|
||||||
ENV CUDNN_INSTALL_PATH /usr/local/cuda
|
ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
|
||||||
ENV TF_NEED_CUDA 1
|
ENV TF_NEED_CUDA 1
|
||||||
|
ENV CUDA_COMPUTE_CAPABILITIES 3.0,5.2
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
# TensorFlow external dependencies that can be loaded in WORKSPACE files.
|
# TensorFlow external dependencies that can be loaded in WORKSPACE files.
|
||||||
|
|
||||||
|
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
|
||||||
|
|
||||||
# If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
|
# If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
|
||||||
# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
|
# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
|
||||||
# local_repository rule (e.g. "@tf").
|
# local_repository rule (e.g. "@tf").
|
||||||
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||||
|
cuda_configure(name = "local_config_cuda")
|
||||||
|
|
||||||
# These lines need to be changed when updating Eigen. They are parsed from
|
# These lines need to be changed when updating Eigen. They are parsed from
|
||||||
# this file by the cmake and make builds to determine the eigen version and hash.
|
# this file by the cmake and make builds to determine the eigen version and hash.
|
||||||
|
0
third_party/gpus/BUILD
vendored
Normal file
0
third_party/gpus/BUILD
vendored
Normal file
42
third_party/gpus/crosstool/BUILD
vendored
42
third_party/gpus/crosstool/BUILD
vendored
@ -1,42 +0,0 @@
|
|||||||
licenses(["restricted"])
|
|
||||||
|
|
||||||
package(default_visibility = ["//visibility:public"])
|
|
||||||
|
|
||||||
filegroup(
|
|
||||||
name = "crosstool",
|
|
||||||
srcs = ["CROSSTOOL"],
|
|
||||||
output_licenses = ["unencumbered"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_toolchain(
|
|
||||||
name = "cc-compiler-local",
|
|
||||||
all_files = ":empty",
|
|
||||||
compiler_files = ":empty",
|
|
||||||
cpu = "local",
|
|
||||||
dwp_files = ":empty",
|
|
||||||
dynamic_runtime_libs = [":empty"],
|
|
||||||
linker_files = ":empty",
|
|
||||||
objcopy_files = ":empty",
|
|
||||||
static_runtime_libs = [":empty"],
|
|
||||||
strip_files = ":empty",
|
|
||||||
supports_param_files = 0,
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_toolchain(
|
|
||||||
name = "cc-compiler-darwin",
|
|
||||||
all_files = ":empty",
|
|
||||||
compiler_files = ":empty",
|
|
||||||
cpu = "darwin",
|
|
||||||
dwp_files = ":empty",
|
|
||||||
dynamic_runtime_libs = [":empty"],
|
|
||||||
linker_files = ":empty",
|
|
||||||
objcopy_files = ":empty",
|
|
||||||
static_runtime_libs = [":empty"],
|
|
||||||
strip_files = ":empty",
|
|
||||||
supports_param_files = 0,
|
|
||||||
)
|
|
||||||
|
|
||||||
filegroup(
|
|
||||||
name = "empty",
|
|
||||||
srcs = [],
|
|
||||||
)
|
|
42
third_party/gpus/crosstool/BUILD.tpl
vendored
Normal file
42
third_party/gpus/crosstool/BUILD.tpl
vendored
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
licenses(["restricted"])
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "crosstool",
|
||||||
|
srcs = ["CROSSTOOL"],
|
||||||
|
output_licenses = ["unencumbered"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_toolchain(
|
||||||
|
name = "cc-compiler-local",
|
||||||
|
all_files = ":empty",
|
||||||
|
compiler_files = ":empty",
|
||||||
|
cpu = "local",
|
||||||
|
dwp_files = ":empty",
|
||||||
|
dynamic_runtime_libs = [":empty"],
|
||||||
|
linker_files = ":empty",
|
||||||
|
objcopy_files = ":empty",
|
||||||
|
static_runtime_libs = [":empty"],
|
||||||
|
strip_files = ":empty",
|
||||||
|
supports_param_files = 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_toolchain(
|
||||||
|
name = "cc-compiler-darwin",
|
||||||
|
all_files = ":empty",
|
||||||
|
compiler_files = ":empty",
|
||||||
|
cpu = "darwin",
|
||||||
|
dwp_files = ":empty",
|
||||||
|
dynamic_runtime_libs = [":empty"],
|
||||||
|
linker_files = ":empty",
|
||||||
|
objcopy_files = ":empty",
|
||||||
|
static_runtime_libs = [":empty"],
|
||||||
|
strip_files = ":empty",
|
||||||
|
supports_param_files = 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "empty",
|
||||||
|
srcs = [],
|
||||||
|
)
|
@ -47,7 +47,7 @@ toolchain {
|
|||||||
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||||
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||||
# As part of the TensorFlow release, we place some cuda-related compilation
|
# As part of the TensorFlow release, we place some cuda-related compilation
|
||||||
# files in third_party/gpus/crosstool/clang/bin, and this relative
|
# files in @local_config_cuda//crosstool/clang/bin, and this relative
|
||||||
# path, combined with the rest of our Bazel configuration causes our
|
# path, combined with the rest of our Bazel configuration causes our
|
||||||
# compilation to use those files.
|
# compilation to use those files.
|
||||||
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
|
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
|
||||||
@ -125,6 +125,9 @@ toolchain {
|
|||||||
# linker_flag: "-Wl,--warn-execstack"
|
# linker_flag: "-Wl,--warn-execstack"
|
||||||
# linker_flag: "-Wl,--detect-odr-violations"
|
# linker_flag: "-Wl,--detect-odr-violations"
|
||||||
|
|
||||||
|
# Include directory for cuda headers.
|
||||||
|
cxx_builtin_include_directory: "/usr/local/cuda%{cuda_version}/include"
|
||||||
|
|
||||||
compilation_mode_flags {
|
compilation_mode_flags {
|
||||||
mode: DBG
|
mode: DBG
|
||||||
# Enable debug symbols.
|
# Enable debug symbols.
|
||||||
@ -221,6 +224,9 @@ toolchain {
|
|||||||
# Anticipated future default.
|
# Anticipated future default.
|
||||||
linker_flag: "-no-canonical-prefixes"
|
linker_flag: "-no-canonical-prefixes"
|
||||||
|
|
||||||
|
# Include directory for cuda headers.
|
||||||
|
cxx_builtin_include_directory: "/usr/local/cuda%{cuda_version}/include"
|
||||||
|
|
||||||
compilation_mode_flags {
|
compilation_mode_flags {
|
||||||
mode: DBG
|
mode: DBG
|
||||||
# Enable debug symbols.
|
# Enable debug symbols.
|
@ -45,10 +45,9 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
import pipes
|
import pipes
|
||||||
|
|
||||||
# "configure" uses the specific format to substitute the following string.
|
# Template values set by cuda_autoconf.
|
||||||
# If you change it, make sure you modify "configure" as well.
|
CPU_COMPILER = ('%{cpu_compiler}')
|
||||||
CPU_COMPILER = ('/usr/bin/gcc')
|
GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
|
||||||
GCC_HOST_COMPILER_PATH = ('/usr/bin/gcc')
|
|
||||||
|
|
||||||
CURRENT_DIR = os.path.dirname(sys.argv[0])
|
CURRENT_DIR = os.path.dirname(sys.argv[0])
|
||||||
NVCC_PATH = CURRENT_DIR + '/../../../cuda/bin/nvcc'
|
NVCC_PATH = CURRENT_DIR + '/../../../cuda/bin/nvcc'
|
||||||
@ -229,9 +228,7 @@ def InvokeNvcc(argv, log=False):
|
|||||||
srcs = ' '.join(src_files)
|
srcs = ' '.join(src_files)
|
||||||
out = ' -o ' + out_file[0]
|
out = ' -o ' + out_file[0]
|
||||||
|
|
||||||
# "configure" uses the specific format to substitute the following string.
|
supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ]
|
||||||
# If you change it, make sure you modify "configure" as well.
|
|
||||||
supported_cuda_compute_capabilities = [ "3.5", "5.2" ]
|
|
||||||
nvccopts = ''
|
nvccopts = ''
|
||||||
for capability in supported_cuda_compute_capabilities:
|
for capability in supported_cuda_compute_capabilities:
|
||||||
capability = capability.replace('.', '')
|
capability = capability.replace('.', '')
|
224
third_party/gpus/cuda/BUILD
vendored
224
third_party/gpus/cuda/BUILD
vendored
@ -1,224 +0,0 @@
|
|||||||
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
|
|
||||||
|
|
||||||
load("//third_party/gpus/cuda:build_defs.bzl", "if_cuda")
|
|
||||||
load("platform", "cuda_library_path")
|
|
||||||
load("platform", "cuda_static_library_path")
|
|
||||||
load("platform", "cudnn_library_path")
|
|
||||||
load("platform", "cupti_library_path")
|
|
||||||
load("platform", "readlink_command")
|
|
||||||
|
|
||||||
package(default_visibility = ["//visibility:public"])
|
|
||||||
|
|
||||||
config_setting(
|
|
||||||
name = "using_gcudacc",
|
|
||||||
values = {
|
|
||||||
"define": "using_cuda_gcudacc=true",
|
|
||||||
},
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
config_setting(
|
|
||||||
name = "using_nvcc",
|
|
||||||
values = {
|
|
||||||
"define": "using_cuda_nvcc=true",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
config_setting(
|
|
||||||
name = "using_clang",
|
|
||||||
values = {
|
|
||||||
"define": "using_cuda_clang=true",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Equivalent to using_clang && -c opt.
|
|
||||||
config_setting(
|
|
||||||
name = "using_clang_opt",
|
|
||||||
values = {
|
|
||||||
"define": "using_cuda_clang=true",
|
|
||||||
"compilation_mode": "opt",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
config_setting(
|
|
||||||
name = "darwin",
|
|
||||||
values = {"cpu": "darwin"},
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cuda_headers",
|
|
||||||
hdrs = glob([
|
|
||||||
"**/*.h",
|
|
||||||
]),
|
|
||||||
includes = [
|
|
||||||
".",
|
|
||||||
"include",
|
|
||||||
],
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cudart_static",
|
|
||||||
srcs = [
|
|
||||||
cuda_static_library_path("cudart"),
|
|
||||||
],
|
|
||||||
includes = ["include/"],
|
|
||||||
linkopts = [
|
|
||||||
"-ldl",
|
|
||||||
"-lpthread",
|
|
||||||
] + select({
|
|
||||||
"//tensorflow:darwin": [],
|
|
||||||
"//conditions:default": ["-lrt"],
|
|
||||||
}),
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cudart",
|
|
||||||
srcs = [
|
|
||||||
cuda_library_path("cudart"),
|
|
||||||
],
|
|
||||||
data = [
|
|
||||||
cuda_library_path("cudart"),
|
|
||||||
],
|
|
||||||
includes = ["include/"],
|
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cublas",
|
|
||||||
srcs = [
|
|
||||||
cuda_library_path("cublas"),
|
|
||||||
],
|
|
||||||
data = [
|
|
||||||
cuda_library_path("cublas"),
|
|
||||||
],
|
|
||||||
includes = ["include/"],
|
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cudnn",
|
|
||||||
srcs = [
|
|
||||||
cudnn_library_path(),
|
|
||||||
],
|
|
||||||
data = [
|
|
||||||
cudnn_library_path(),
|
|
||||||
],
|
|
||||||
includes = ["include/"],
|
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cufft",
|
|
||||||
srcs = [
|
|
||||||
cuda_library_path("cufft"),
|
|
||||||
],
|
|
||||||
data = [
|
|
||||||
cuda_library_path("cufft"),
|
|
||||||
],
|
|
||||||
includes = ["include/"],
|
|
||||||
linkstatic = 1,
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cuda",
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
deps = [
|
|
||||||
":cublas",
|
|
||||||
":cuda_headers",
|
|
||||||
":cudart",
|
|
||||||
":cudnn",
|
|
||||||
":cufft",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cupti_headers",
|
|
||||||
hdrs = glob([
|
|
||||||
"**/*.h",
|
|
||||||
]),
|
|
||||||
includes = [
|
|
||||||
".",
|
|
||||||
"extras/CUPTI/include/",
|
|
||||||
],
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_library(
|
|
||||||
name = "cupti_dsos",
|
|
||||||
data = [
|
|
||||||
cupti_library_path(),
|
|
||||||
],
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# TODO(opensource): for now, we have to invoke the cuda_config.sh manually in the source tree.
|
|
||||||
# This rule checks if Cuda libraries in the source tree has been properly configured.
|
|
||||||
# The output list makes bazel runs this rule first if the Cuda files are missing.
|
|
||||||
# This gives us an opportunity to check and print a meaningful error message.
|
|
||||||
# But we will need to create the output file list to make bazel happy in a successful run.
|
|
||||||
genrule(
|
|
||||||
name = "cuda_check",
|
|
||||||
srcs = [
|
|
||||||
"cuda.config",
|
|
||||||
"cuda_config.sh",
|
|
||||||
],
|
|
||||||
outs = [
|
|
||||||
"include/cuda.h",
|
|
||||||
"include/cublas.h",
|
|
||||||
"include/cudnn.h",
|
|
||||||
"extras/CUPTI/include/cupti.h",
|
|
||||||
cuda_static_library_path("cudart"),
|
|
||||||
cuda_library_path("cublas"),
|
|
||||||
cudnn_library_path(),
|
|
||||||
cuda_library_path("cudart"),
|
|
||||||
cuda_library_path("cufft"),
|
|
||||||
cupti_library_path(),
|
|
||||||
],
|
|
||||||
cmd = if_cuda(
|
|
||||||
# Under cuda config, create all the symbolic links to the actual cuda files
|
|
||||||
"OUTPUTDIR=`{} -f $(@D)/../../..`; cd `dirname $(location :cuda_config.sh)`; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;".format(readlink_command()),
|
|
||||||
|
|
||||||
# Under non-cuda config, create all dummy files to make the build go through
|
|
||||||
";".join([
|
|
||||||
"mkdir -p $(@D)/include",
|
|
||||||
"mkdir -p $(@D)/lib64",
|
|
||||||
"mkdir -p $(@D)/extras/CUPTI/include",
|
|
||||||
"mkdir -p $(@D)/extras/CUPTI/lib64",
|
|
||||||
"touch $(@D)/include/cuda.h",
|
|
||||||
"touch $(@D)/include/cublas.h",
|
|
||||||
"touch $(@D)/include/cudnn.h",
|
|
||||||
"touch $(@D)/extras/CUPTI/include/cupti.h",
|
|
||||||
"touch $(@D)/{}".format(cuda_static_library_path("cudart")),
|
|
||||||
"touch $(@D)/{}".format(cuda_library_path("cublas")),
|
|
||||||
"touch $(@D)/{}".format(cudnn_library_path()),
|
|
||||||
"touch $(@D)/{}".format(cuda_library_path("cudart")),
|
|
||||||
"touch $(@D)/{}".format(cuda_library_path("cufft")),
|
|
||||||
"touch $(@D)/{}".format(cupti_library_path()),
|
|
||||||
]),
|
|
||||||
),
|
|
||||||
local = 1,
|
|
||||||
)
|
|
||||||
|
|
||||||
genrule(
|
|
||||||
name = "cuda_config_check",
|
|
||||||
outs = [
|
|
||||||
"cuda.config",
|
|
||||||
],
|
|
||||||
cmd = if_cuda(
|
|
||||||
# Under cuda config, create the symbolic link to the actual cuda.config
|
|
||||||
"configfile=$(location :cuda.config); ln -sf `{} -f $${{configfile#*/*/*/}}` $(@D)/;".format(readlink_command()),
|
|
||||||
|
|
||||||
# Under non-cuda config, create the dummy file
|
|
||||||
";".join([
|
|
||||||
"touch $(@D)/cuda.config",
|
|
||||||
]),
|
|
||||||
),
|
|
||||||
local = 1,
|
|
||||||
)
|
|
172
third_party/gpus/cuda/BUILD.tpl
vendored
Normal file
172
third_party/gpus/cuda/BUILD.tpl
vendored
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
|
||||||
|
|
||||||
|
load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
|
||||||
|
load("@local_config_cuda//cuda:platform.bzl", "cuda_static_library_path")
|
||||||
|
load("@local_config_cuda//cuda:platform.bzl", "cudnn_library_path")
|
||||||
|
load("@local_config_cuda//cuda:platform.bzl", "cupti_library_path")
|
||||||
|
load("@local_config_cuda//cuda:platform.bzl", "readlink_command")
|
||||||
|
|
||||||
|
package(default_visibility = ["//visibility:public"])
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "using_gcudacc",
|
||||||
|
values = {
|
||||||
|
"define": "using_cuda_gcudacc=true",
|
||||||
|
},
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "using_nvcc",
|
||||||
|
values = {
|
||||||
|
"define": "using_cuda_nvcc=true",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "using_clang",
|
||||||
|
values = {
|
||||||
|
"define": "using_cuda_clang=true",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Equivalent to using_clang && -c opt.
|
||||||
|
config_setting(
|
||||||
|
name = "using_clang_opt",
|
||||||
|
values = {
|
||||||
|
"define": "using_cuda_clang=true",
|
||||||
|
"compilation_mode": "opt",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
config_setting(
|
||||||
|
name = "darwin",
|
||||||
|
values = {"cpu": "darwin"},
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cuda_headers",
|
||||||
|
hdrs = glob([
|
||||||
|
"**/*.h",
|
||||||
|
]),
|
||||||
|
includes = [
|
||||||
|
".",
|
||||||
|
"include",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cudart_static",
|
||||||
|
srcs = [
|
||||||
|
cuda_static_library_path("cudart"),
|
||||||
|
],
|
||||||
|
includes = ["include/"],
|
||||||
|
linkopts = [
|
||||||
|
"-ldl",
|
||||||
|
"-lpthread",
|
||||||
|
] + select({
|
||||||
|
"@//tensorflow:darwin": [],
|
||||||
|
"//conditions:default": ["-lrt"],
|
||||||
|
}),
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cudart",
|
||||||
|
srcs = [
|
||||||
|
cuda_library_path("cudart"),
|
||||||
|
],
|
||||||
|
data = [
|
||||||
|
cuda_library_path("cudart"),
|
||||||
|
],
|
||||||
|
includes = ["include/"],
|
||||||
|
linkstatic = 1,
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cublas",
|
||||||
|
srcs = [
|
||||||
|
cuda_library_path("cublas"),
|
||||||
|
],
|
||||||
|
data = [
|
||||||
|
cuda_library_path("cublas"),
|
||||||
|
],
|
||||||
|
includes = ["include/"],
|
||||||
|
linkstatic = 1,
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cudnn",
|
||||||
|
srcs = [
|
||||||
|
cudnn_library_path(),
|
||||||
|
],
|
||||||
|
data = [
|
||||||
|
cudnn_library_path(),
|
||||||
|
],
|
||||||
|
includes = ["include/"],
|
||||||
|
linkstatic = 1,
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cufft",
|
||||||
|
srcs = [
|
||||||
|
cuda_library_path("cufft"),
|
||||||
|
],
|
||||||
|
data = [
|
||||||
|
cuda_library_path("cufft"),
|
||||||
|
],
|
||||||
|
includes = ["include/"],
|
||||||
|
linkstatic = 1,
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "curand",
|
||||||
|
srcs = [
|
||||||
|
cuda_library_path("curand"),
|
||||||
|
],
|
||||||
|
data = [
|
||||||
|
cuda_library_path("curand"),
|
||||||
|
],
|
||||||
|
includes = ["include/"],
|
||||||
|
linkstatic = 1,
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cuda",
|
||||||
|
deps = [
|
||||||
|
":cuda_headers",
|
||||||
|
":cudart",
|
||||||
|
":cublas",
|
||||||
|
":cudnn",
|
||||||
|
":cufft",
|
||||||
|
":curand",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cupti_headers",
|
||||||
|
hdrs = glob([
|
||||||
|
"**/*.h",
|
||||||
|
]),
|
||||||
|
includes = [
|
||||||
|
".",
|
||||||
|
"extras/CUPTI/include/",
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "cupti_dsos",
|
||||||
|
data = [
|
||||||
|
cupti_library_path(),
|
||||||
|
],
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
)
|
@ -8,7 +8,7 @@ def if_cuda(if_true, if_false = []):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
return select({
|
return select({
|
||||||
"//third_party/gpus/cuda:using_nvcc": if_true,
|
"@local_config_cuda//cuda:using_nvcc": if_true,
|
||||||
"//third_party/gpus/cuda:using_gcudacc": if_true,
|
"@local_config_cuda//cuda:using_gcudacc": if_true,
|
||||||
"//conditions:default": if_false
|
"//conditions:default": if_false
|
||||||
})
|
})
|
24
third_party/gpus/cuda/cuda_config.h.tpl
vendored
Normal file
24
third_party/gpus/cuda/cuda_config.h.tpl
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef CUDA_CUDA_CONFIG_H_
|
||||||
|
#define CUDA_CUDA_CONFIG_H_
|
||||||
|
|
||||||
|
#define TF_CUDA_CAPABILITIES %{cuda_compute_capabilities}
|
||||||
|
|
||||||
|
#define TF_CUDA_VERSION "%{cuda_version}"
|
||||||
|
#define TF_CUDNN_VERSION "%{cudnn_version}"
|
||||||
|
|
||||||
|
#endif // CUDA_CUDA_CONFIG_H_
|
234
third_party/gpus/cuda/cuda_config.sh
vendored
234
third_party/gpus/cuda/cuda_config.sh
vendored
@ -1,234 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
# ==============================================================================
|
|
||||||
|
|
||||||
|
|
||||||
# A simple script to configure the Cuda tree needed for the TensorFlow GPU
|
|
||||||
# build. We need both Cuda toolkit $TF_CUDA_VERSION and Cudnn $TF_CUDNN_VERSION.
|
|
||||||
# Useage:
|
|
||||||
# * User edit cuda.config to point both Cuda toolkit and Cudnn libraries to their local path
|
|
||||||
# * run cuda_config.sh to generate symbolic links in the source tree to reflect
|
|
||||||
# * the file organizations needed by TensorFlow.
|
|
||||||
|
|
||||||
print_usage() {
|
|
||||||
cat << EOF
|
|
||||||
Usage: $0 [--check]
|
|
||||||
Configure TensorFlow's canonical view of Cuda libraries using cuda.config.
|
|
||||||
Arguments:
|
|
||||||
--check: Only check that the proper Cuda dependencies has already been
|
|
||||||
properly configured in the source tree. It also creates symbolic links to
|
|
||||||
the files in the gen-tree to make bazel happy.
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
CHECK_ONLY=0
|
|
||||||
# Parse the arguments. Add more arguments as the "case" line when needed.
|
|
||||||
while [[ $# -gt 0 ]]; do
|
|
||||||
argument="$1"
|
|
||||||
shift
|
|
||||||
case $argument in
|
|
||||||
--check)
|
|
||||||
CHECK_ONLY=1
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Error: unknown arguments"
|
|
||||||
print_usage
|
|
||||||
exit -1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
source cuda.config || exit -1
|
|
||||||
|
|
||||||
OUTPUTDIR=${OUTPUTDIR:-../../..}
|
|
||||||
CUDA_TOOLKIT_PATH=${CUDA_TOOLKIT_PATH:-/usr/local/cuda}
|
|
||||||
CUDNN_INSTALL_BASEDIR=${CUDNN_INSTALL_PATH:-/usr/local/cuda}
|
|
||||||
|
|
||||||
if [[ -z "$TF_CUDA_VERSION" ]]; then
|
|
||||||
TF_CUDA_EXT=""
|
|
||||||
else
|
|
||||||
TF_CUDA_EXT=".$TF_CUDA_VERSION"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -z "$TF_CUDNN_VERSION" ]]; then
|
|
||||||
TF_CUDNN_EXT=""
|
|
||||||
else
|
|
||||||
TF_CUDNN_EXT=".$TF_CUDNN_VERSION"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# An error message when the Cuda toolkit is not found
|
|
||||||
function CudaError {
|
|
||||||
echo ERROR: $1
|
|
||||||
cat << EOF
|
|
||||||
##############################################################################
|
|
||||||
##############################################################################
|
|
||||||
Cuda $TF_CUDA_VERSION toolkit is missing.
|
|
||||||
1. Download and install the CUDA $TF_CUDA_VERSION toolkit and CUDNN $TF_CUDNN_VERSION library;
|
|
||||||
2. Run configure from the root of the source tree, before rerunning bazel;
|
|
||||||
Please refer to README.md for more details.
|
|
||||||
##############################################################################
|
|
||||||
##############################################################################
|
|
||||||
EOF
|
|
||||||
exit -1
|
|
||||||
}
|
|
||||||
|
|
||||||
# An error message when CUDNN is not found
|
|
||||||
function CudnnError {
|
|
||||||
echo ERROR: $1
|
|
||||||
cat << EOF
|
|
||||||
##############################################################################
|
|
||||||
##############################################################################
|
|
||||||
Cudnn $TF_CUDNN_VERSION is missing.
|
|
||||||
1. Download and install the CUDA $TF_CUDA_VERSION toolkit and CUDNN $TF_CUDNN_VERSION library;
|
|
||||||
2. Run configure from the root of the source tree, before rerunning bazel;
|
|
||||||
Please refer to README.md for more details.
|
|
||||||
##############################################################################
|
|
||||||
##############################################################################
|
|
||||||
EOF
|
|
||||||
exit -1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check that Cuda libraries has already been properly configured in the source tree.
|
|
||||||
# We still need to create links to the gen-tree to make bazel happy.
|
|
||||||
function CheckAndLinkToSrcTree {
|
|
||||||
ERROR_FUNC=$1
|
|
||||||
FILE=$2
|
|
||||||
if test ! -e $FILE; then
|
|
||||||
$ERROR_FUNC "$PWD/$FILE cannot be found"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Link the output file to the source tree, avoiding self links if they are
|
|
||||||
# the same. This could happen if invoked from the source tree by accident.
|
|
||||||
if [ ! $($READLINK_CMD -f $PWD) == $($READLINK_CMD -f $OUTPUTDIR/third_party/gpus/cuda) ]; then
|
|
||||||
mkdir -p $(dirname $OUTPUTDIR/third_party/gpus/cuda/$FILE)
|
|
||||||
ln -sf $PWD/$FILE $OUTPUTDIR/third_party/gpus/cuda/$FILE
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
OSNAME=`uname -s`
|
|
||||||
if [ "$OSNAME" == "Linux" ]; then
|
|
||||||
CUDA_LIB_PATH="lib64"
|
|
||||||
CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib64"
|
|
||||||
CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}"
|
|
||||||
CUDA_RT_LIB_STATIC_PATH="lib64/libcudart_static.a"
|
|
||||||
CUDA_BLAS_LIB_PATH="lib64/libcublas.so${TF_CUDA_EXT}"
|
|
||||||
CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}"
|
|
||||||
CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}"
|
|
||||||
CUDA_FFT_LIB_PATH="lib64/libcufft.so${TF_CUDA_EXT}"
|
|
||||||
CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib64/libcupti.so${TF_CUDA_EXT}"
|
|
||||||
READLINK_CMD="readlink"
|
|
||||||
elif [ "$OSNAME" == "Darwin" ]; then
|
|
||||||
CUDA_LIB_PATH="lib"
|
|
||||||
CUDA_CUPTI_LIB_DIR="extras/CUPTI/lib"
|
|
||||||
CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib"
|
|
||||||
CUDA_RT_LIB_STATIC_PATH="lib/libcudart_static.a"
|
|
||||||
CUDA_BLAS_LIB_PATH="lib/libcublas${TF_CUDA_EXT}.dylib"
|
|
||||||
CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}.dylib"
|
|
||||||
CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}.dylib"
|
|
||||||
CUDA_FFT_LIB_PATH="lib/libcufft${TF_CUDA_EXT}.dylib"
|
|
||||||
CUDA_CUPTI_LIB_PATH="extras/CUPTI/lib/libcupti${TF_CUDA_EXT}.dylib"
|
|
||||||
READLINK_CMD="greadlink"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$CHECK_ONLY" == "1" ]; then
|
|
||||||
CheckAndLinkToSrcTree CudaError include/cuda.h
|
|
||||||
CheckAndLinkToSrcTree CudaError include/cublas.h
|
|
||||||
CheckAndLinkToSrcTree CudnnError include/cudnn.h
|
|
||||||
CheckAndLinkToSrcTree CudaError extras/CUPTI/include/cupti.h
|
|
||||||
CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_STATIC_PATH
|
|
||||||
CheckAndLinkToSrcTree CudaError $CUDA_BLAS_LIB_PATH
|
|
||||||
CheckAndLinkToSrcTree CudnnError $CUDA_DNN_LIB_PATH
|
|
||||||
CheckAndLinkToSrcTree CudaError $CUDA_RT_LIB_PATH
|
|
||||||
CheckAndLinkToSrcTree CudaError $CUDA_FFT_LIB_PATH
|
|
||||||
CheckAndLinkToSrcTree CudaError $CUDA_CUPTI_LIB_PATH
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Actually configure the source tree for TensorFlow's canonical view of Cuda
|
|
||||||
# libraries.
|
|
||||||
|
|
||||||
if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}; then
|
|
||||||
CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test ! -e ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}; then
|
|
||||||
CudaError "cannot find ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_PATH}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test ! -d ${CUDNN_INSTALL_BASEDIR}; then
|
|
||||||
CudnnError "cannot find dir: ${CUDNN_INSTALL_BASEDIR}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Locate cudnn.h
|
|
||||||
if test -e ${CUDNN_INSTALL_BASEDIR}/cudnn.h; then
|
|
||||||
CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}
|
|
||||||
elif test -e ${CUDNN_INSTALL_BASEDIR}/include/cudnn.h; then
|
|
||||||
CUDNN_HEADER_DIR=${CUDNN_INSTALL_BASEDIR}/include
|
|
||||||
elif test -e /usr/include/cudnn.h; then
|
|
||||||
CUDNN_HEADER_DIR=/usr/include
|
|
||||||
else
|
|
||||||
CudnnError "cannot find cudnn.h under: ${CUDNN_INSTALL_BASEDIR}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Locate libcudnn
|
|
||||||
if test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}; then
|
|
||||||
CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_PATH}
|
|
||||||
elif test -e ${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}; then
|
|
||||||
CUDNN_LIB_INSTALL_PATH=${CUDNN_INSTALL_BASEDIR}/${CUDA_DNN_LIB_ALT_PATH}
|
|
||||||
else
|
|
||||||
CudnnError "cannot find ${CUDA_DNN_LIB_PATH} or ${CUDA_DNN_LIB_ALT_PATH} under: ${CUDNN_INSTALL_BASEDIR}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Helper function to build symbolic links for all files under a directory.
|
|
||||||
function LinkOneDir {
|
|
||||||
SRC_PREFIX=$1
|
|
||||||
DST_PREFIX=$2
|
|
||||||
SRC_DIR=$3
|
|
||||||
DST_DIR=$(echo $SRC_DIR | sed "s,^$SRC_PREFIX,$DST_PREFIX,")
|
|
||||||
mkdir -p $DST_DIR
|
|
||||||
FILE_LIST=$(find -L $SRC_DIR -maxdepth 1 -type f)
|
|
||||||
if test "$FILE_LIST" != ""; then
|
|
||||||
ln -sf $FILE_LIST $DST_DIR/ || exit -1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
export -f LinkOneDir
|
|
||||||
|
|
||||||
# Build links for all files under the directory, including subdirectoreis.
|
|
||||||
function LinkAllFiles {
|
|
||||||
SRC_DIR=$1
|
|
||||||
DST_DIR=$2
|
|
||||||
find -L $SRC_DIR -type d | xargs -I {} bash -c "LinkOneDir $SRC_DIR $DST_DIR {}" || exit -1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set up the symbolic links for cuda toolkit. We link at individual file level,
|
|
||||||
# not at the directory level.
|
|
||||||
# This is because the external library may have different file layout from our desired structure.
|
|
||||||
mkdir -p $OUTPUTDIR/third_party/gpus/cuda
|
|
||||||
echo "Setting up Cuda include"
|
|
||||||
LinkAllFiles ${CUDA_TOOLKIT_PATH}/include $OUTPUTDIR/third_party/gpus/cuda/include || exit -1
|
|
||||||
echo "Setting up Cuda ${CUDA_LIB_PATH}"
|
|
||||||
LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_LIB_PATH} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_LIB_PATH} || exit -1
|
|
||||||
echo "Setting up Cuda bin"
|
|
||||||
LinkAllFiles ${CUDA_TOOLKIT_PATH}/bin $OUTPUTDIR/third_party/gpus/cuda/bin || exit -1
|
|
||||||
echo "Setting up Cuda nvvm"
|
|
||||||
LinkAllFiles ${CUDA_TOOLKIT_PATH}/nvvm $OUTPUTDIR/third_party/gpus/cuda/nvvm || exit -1
|
|
||||||
echo "Setting up CUPTI include"
|
|
||||||
LinkAllFiles ${CUDA_TOOLKIT_PATH}/extras/CUPTI/include $OUTPUTDIR/third_party/gpus/cuda/extras/CUPTI/include || exit -1
|
|
||||||
echo "Setting up CUPTI lib64"
|
|
||||||
LinkAllFiles ${CUDA_TOOLKIT_PATH}/${CUDA_CUPTI_LIB_DIR} $OUTPUTDIR/third_party/gpus/cuda/${CUDA_CUPTI_LIB_DIR} || exit -1
|
|
||||||
|
|
||||||
# Set up symbolic link for cudnn
|
|
||||||
ln -sf $CUDNN_HEADER_DIR/cudnn.h $OUTPUTDIR/third_party/gpus/cuda/include/cudnn.h || exit -1
|
|
||||||
ln -sf $CUDNN_LIB_INSTALL_PATH $OUTPUTDIR/third_party/gpus/cuda/$CUDA_DNN_LIB_PATH || exit -1
|
|
@ -1,6 +1,6 @@
|
|||||||
CUDA_VERSION = ""
|
CUDA_VERSION = "%{cuda_version}"
|
||||||
CUDNN_VERSION = ""
|
CUDNN_VERSION = "%{cudnn_version}"
|
||||||
PLATFORM = ""
|
PLATFORM = "%{platform}"
|
||||||
|
|
||||||
def cuda_sdk_version():
|
def cuda_sdk_version():
|
||||||
return CUDA_VERSION
|
return CUDA_VERSION
|
423
third_party/gpus/cuda_configure.bzl
vendored
Normal file
423
third_party/gpus/cuda_configure.bzl
vendored
Normal file
@ -0,0 +1,423 @@
|
|||||||
|
# -*- Python -*-
|
||||||
|
"""Repository rule for CUDA autoconfiguration.
|
||||||
|
|
||||||
|
`cuda_configure` depends on the following environment variables:
|
||||||
|
|
||||||
|
* `ENABLE_CUDA`: Whether to enable building with CUDA.
|
||||||
|
* `CC`: The GCC host compiler path
|
||||||
|
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
||||||
|
`/usr/local/cuda`.
|
||||||
|
* `CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
||||||
|
use the system default.
|
||||||
|
* `CUDNN_VERSION`: The version of the cuDNN library.
|
||||||
|
* `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
|
||||||
|
`/usr/local/cuda`.
|
||||||
|
* `CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
||||||
|
`3.5,5.2`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_CUDA_VERSION = ""
|
||||||
|
_DEFAULT_CUDNN_VERSION = ""
|
||||||
|
_DEFAULT_CUDA_TOOLKIT_PATH = "/usr/local/cuda"
|
||||||
|
_DEFAULT_CUDNN_INSTALL_PATH = "/usr/local/cuda"
|
||||||
|
_DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(dzc): Once these functions have been factored out of Bazel's
|
||||||
|
# cc_configure.bzl, load them from @bazel_tools instead.
|
||||||
|
# BEGIN cc_configure common functions.
|
||||||
|
def find_cc(repository_ctx):
|
||||||
|
"""Find the C++ compiler."""
|
||||||
|
cc_name = "gcc"
|
||||||
|
if "CC" in repository_ctx.os.environ:
|
||||||
|
cc_name = repository_ctx.os.environ["CC"].strip()
|
||||||
|
if not cc_name:
|
||||||
|
cc_name = "gcc"
|
||||||
|
if cc_name.startswith("/"):
|
||||||
|
# Absolute path, maybe we should make this suported by our which function.
|
||||||
|
return cc_name
|
||||||
|
cc = repository_ctx.which(cc_name)
|
||||||
|
if cc == None:
|
||||||
|
fail(
|
||||||
|
"Cannot find gcc, either correct your path or set the CC" +
|
||||||
|
" environment variable")
|
||||||
|
return cc
|
||||||
|
|
||||||
|
|
||||||
|
_INC_DIR_MARKER_BEGIN = "#include <...>"
|
||||||
|
|
||||||
|
|
||||||
|
# OSX add " (framework directory)" at the end of line, strip it.
|
||||||
|
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
|
||||||
|
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
|
||||||
|
def _cxx_inc_convert(path):
|
||||||
|
"""Convert path returned by cc -E xc++ in a complete path."""
|
||||||
|
path = path.strip()
|
||||||
|
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
|
||||||
|
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def get_cxx_inc_directories(repository_ctx, cc):
|
||||||
|
"""Compute the list of default C++ include directories."""
|
||||||
|
result = repository_ctx.execute([cc, "-E", "-xc++", "-", "-v"])
|
||||||
|
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
|
||||||
|
if index1 == -1:
|
||||||
|
return []
|
||||||
|
index1 = result.stderr.find("\n", index1)
|
||||||
|
if index1 == -1:
|
||||||
|
return []
|
||||||
|
index2 = result.stderr.rfind("\n ")
|
||||||
|
if index2 == -1 or index2 < index1:
|
||||||
|
return []
|
||||||
|
index2 = result.stderr.find("\n", index2 + 1)
|
||||||
|
if index2 == -1:
|
||||||
|
inc_dirs = result.stderr[index1 + 1:]
|
||||||
|
else:
|
||||||
|
inc_dirs = result.stderr[index1 + 1:index2].strip()
|
||||||
|
|
||||||
|
return [repository_ctx.path(_cxx_inc_convert(p))
|
||||||
|
for p in inc_dirs.split("\n")]
|
||||||
|
|
||||||
|
# END cc_configure common functions (see TODO above).
|
||||||
|
|
||||||
|
|
||||||
|
def _enable_cuda(repository_ctx):
|
||||||
|
if "TF_NEED_CUDA" in repository_ctx.os.environ:
|
||||||
|
enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
|
||||||
|
return enable_cuda == "1"
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _cuda_toolkit_path(repository_ctx):
|
||||||
|
"""Finds the cuda toolkit directory."""
|
||||||
|
cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
|
||||||
|
if "CUDA_TOOLKIT_PATH" in repository_ctx.os.environ:
|
||||||
|
cuda_toolkit_path = repository_ctx.os.environ["CUDA_TOOLKIT_PATH"].strip()
|
||||||
|
if not repository_ctx.path(cuda_toolkit_path).exists:
|
||||||
|
fail("Cannot find cuda toolkit path.")
|
||||||
|
return cuda_toolkit_path
|
||||||
|
|
||||||
|
|
||||||
|
def _cudnn_install_basedir(repository_ctx):
|
||||||
|
"""Finds the cudnn install directory."""
|
||||||
|
cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
|
||||||
|
if "CUDNN_INSTALL_PATH" in repository_ctx.os.environ:
|
||||||
|
cudnn_install_path = repository_ctx.os.environ["CUDNN_INSTALL_PATH"].strip()
|
||||||
|
if not repository_ctx.path(cudnn_install_path).exists:
|
||||||
|
fail("Cannot find cudnn install path.")
|
||||||
|
return cudnn_install_path
|
||||||
|
|
||||||
|
|
||||||
|
def _cuda_version(repository_ctx):
|
||||||
|
"""Detects the cuda version."""
|
||||||
|
if "CUDA_VERSION" in repository_ctx.os.environ:
|
||||||
|
return repository_ctx.os.environ["CUDA_VERSION"].strip()
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _cudnn_version(repository_ctx):
|
||||||
|
"""Detects the cudnn version."""
|
||||||
|
if "CUDNN_VERSION" in repository_ctx.os.environ:
|
||||||
|
return repository_ctx.os.environ["CUDNN_VERSION"].strip()
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_capabilities(repository_ctx):
|
||||||
|
"""Returns a list of strings representing cuda compute capabilities."""
|
||||||
|
if "CUDA_COMPUTE_CAPABILITIES" not in repository_ctx.os.environ:
|
||||||
|
return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
|
||||||
|
capabilities_str = repository_ctx.os.environ["CUDA_COMPUTE_CAPABILITIES"]
|
||||||
|
capabilities = capabilities_str.split(",")
|
||||||
|
for capability in capabilities:
|
||||||
|
# Workaround for Skylark's lack of support for regex. This check should
|
||||||
|
# be equivalent to checking:
|
||||||
|
# if re.match("[0-9]+.[0-9]+", capability) == None:
|
||||||
|
parts = capability.split(".")
|
||||||
|
if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
|
||||||
|
fail("Invalid compute capability: %s" % capability)
|
||||||
|
return capabilities
|
||||||
|
|
||||||
|
|
||||||
|
def _cpu_value(repository_ctx):
|
||||||
|
result = repository_ctx.execute(["uname", "-s"])
|
||||||
|
return result.stdout.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _cuda_symlink_files(cpu_value, cuda_version, cudnn_version):
|
||||||
|
"""Returns a struct containing platform-specific paths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cpu_value: The string representing the host OS.
|
||||||
|
cuda_version: The cuda version as returned by _cuda_version
|
||||||
|
cudnn_version: The cudnn version as returned by _cudnn_version
|
||||||
|
"""
|
||||||
|
cuda_ext = ".%s" % cuda_version if cuda_version else ""
|
||||||
|
cudnn_ext = ".%s" % cudnn_version if cudnn_version else ""
|
||||||
|
if cpu_value == "Linux":
|
||||||
|
return struct(
|
||||||
|
cuda_lib_path = "lib64",
|
||||||
|
cuda_rt_lib = "lib64/libcudart.so%s" % cuda_ext,
|
||||||
|
cuda_rt_lib_static = "lib64/libcudart_static.a",
|
||||||
|
cuda_blas_lib = "lib64/libcublas.so%s" % cuda_ext,
|
||||||
|
cuda_dnn_lib = "lib64/libcudnn.so%s" % cudnn_ext,
|
||||||
|
cuda_dnn_lib_alt = "libcudnn.so%s" % cudnn_ext,
|
||||||
|
cuda_rand_lib = "lib64/libcurand.so%s" % cuda_ext,
|
||||||
|
cuda_fft_lib = "lib64/libcufft.so%s" % cuda_ext,
|
||||||
|
cuda_cupti_lib = "extras/CUPTI/lib64/libcupti.so%s" % cuda_ext)
|
||||||
|
elif cpu_value == "Darwin":
|
||||||
|
return struct(
|
||||||
|
cuda_lib_path = "lib",
|
||||||
|
cuda_rt_lib = "lib/libcudart%s.dylib" % cuda_ext,
|
||||||
|
cuda_rt_lib_static = "lib/libcudart_static.a",
|
||||||
|
cuda_blas_lib = "lib/libcublas%s.dylib" % cuda_ext,
|
||||||
|
cuda_dnn_lib = "lib/libcudnn%s.dylib" % cudnn_ext,
|
||||||
|
cuda_dnn_lib_alt = "libcudnn%s.dylib" % cudnn_ext,
|
||||||
|
cuda_rand_lib = "lib/libcurand%s.dylib" % cuda_ext,
|
||||||
|
cuda_fft_lib = "lib/libcufft%s.dylib" % cuda_ext,
|
||||||
|
cuda_cupti_lib = "extras/CUPTI/lib/libcupti%s.dylib" % cuda_ext)
|
||||||
|
else:
|
||||||
|
fail("Not supported CPU value %s" % cpu_value)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_lib(repository_ctx, cuda_toolkit_path, cuda_lib):
|
||||||
|
"""Checks if cuda_lib exists under cuda_toolkit_path or fail if it doesn't.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repository_ctx: The repository context.
|
||||||
|
cuda_toolkit_path: The cuda toolkit directory containing the cuda libraries.
|
||||||
|
cuda_lib: The library to look for under cuda_toolkit_path.
|
||||||
|
"""
|
||||||
|
lib_path = cuda_toolkit_path + "/" + cuda_lib
|
||||||
|
if not repository_ctx.path(lib_path).exists:
|
||||||
|
fail("Cannot find %s" % lib_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_dir(repository_ctx, directory):
|
||||||
|
"""Checks whether the directory exists and fail if it does not.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repository_ctx: The repository context.
|
||||||
|
directory: The directory to check the existence of.
|
||||||
|
"""
|
||||||
|
if not repository_ctx.path(directory).exists:
|
||||||
|
fail("Cannot find dir: %s" % directory)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
|
||||||
|
"""Returns the path to the directory containing cudnn.h
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repository_ctx: The repository context.
|
||||||
|
cudnn_install_basedir: The cudnn install directory as returned by
|
||||||
|
_cudnn_install_basedir.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The path of the directory containing the cudnn header.
|
||||||
|
"""
|
||||||
|
if repository_ctx.path(cudnn_install_basedir + "/cudnn.h").exists:
|
||||||
|
return cudnn_install_basedir
|
||||||
|
if repository_ctx.path(cudnn_install_basedir + "/include/cudnn.h").exists:
|
||||||
|
return cudnn_install_basedir + "/include"
|
||||||
|
if repository_ctx.path("/usr/include/cudnn.h").exists:
|
||||||
|
return "/usr/include"
|
||||||
|
fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir, symlink_files):
|
||||||
|
"""Returns the path to the directory containing libcudnn
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repository_ctx: The repository context.
|
||||||
|
cudnn_install_basedir: The cudnn install dir as returned by
|
||||||
|
_cudnn_install_basedir.
|
||||||
|
symlink_files: The symlink files as returned by _cuda_symlink_files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The path of the directory containing the cudnn libraries.
|
||||||
|
"""
|
||||||
|
lib_dir = cudnn_install_basedir + "/" + symlink_files.cuda_dnn_lib
|
||||||
|
if repository_ctx.path(lib_dir).exists:
|
||||||
|
return lib_dir
|
||||||
|
alt_lib_dir = cudnn_install_basedir + "/" + symlink_files.cuda_dnn_lib_alt
|
||||||
|
if repository_ctx.path(alt_lib_dir).exists:
|
||||||
|
return alt_lib_dir
|
||||||
|
|
||||||
|
fail("Cannot find %s or %s under %s" %
|
||||||
|
(symlink_files.cuda_dnn_lib, symlink_files.cuda_dnn_lib_alt,
|
||||||
|
cudnn_install_basedir))
|
||||||
|
|
||||||
|
|
||||||
|
def _tpl(repository_ctx, tpl, substitutions={}, out=None):
|
||||||
|
if not out:
|
||||||
|
out = tpl.replace(":", "/")
|
||||||
|
repository_ctx.template(
|
||||||
|
out,
|
||||||
|
Label("//third_party/gpus/%s.tpl" % tpl),
|
||||||
|
substitutions)
|
||||||
|
|
||||||
|
|
||||||
|
def _file(repository_ctx, label):
|
||||||
|
repository_ctx.template(
|
||||||
|
label.replace(":", "/"),
|
||||||
|
Label("//third_party/gpus/%s.tpl" % label),
|
||||||
|
{})
|
||||||
|
|
||||||
|
|
||||||
|
def _create_dummy_repository(repository_ctx):
|
||||||
|
cpu_value = _cpu_value(repository_ctx)
|
||||||
|
symlink_files = _cuda_symlink_files(cpu_value, _DEFAULT_CUDA_VERSION,
|
||||||
|
_DEFAULT_CUDNN_VERSION)
|
||||||
|
|
||||||
|
# Set up BUILD file for cuda/.
|
||||||
|
_file(repository_ctx, "cuda:BUILD")
|
||||||
|
_file(repository_ctx, "cuda:build_defs.bzl")
|
||||||
|
_tpl(repository_ctx, "cuda:platform.bzl",
|
||||||
|
{
|
||||||
|
"%{cuda_version}": _DEFAULT_CUDA_VERSION,
|
||||||
|
"%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
|
||||||
|
"%{platform}": cpu_value,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create dummy files for the CUDA toolkit since they are still required by
|
||||||
|
# tensorflow/core/platform/default/build_config:cuda.
|
||||||
|
repository_ctx.file("cuda/include/cuda.h", "")
|
||||||
|
repository_ctx.file("cuda/include/cublas.h", "")
|
||||||
|
repository_ctx.file("cuda/include/cudnn.h", "")
|
||||||
|
repository_ctx.file("cuda/extras/CUPTI/include/cupti.h", "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_rt_lib, "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_rt_lib_static, "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_blas_lib, "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_dnn_lib, "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_rand_lib, "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_fft_lib, "")
|
||||||
|
repository_ctx.file("cuda/%s" % symlink_files.cuda_cupti_lib, "")
|
||||||
|
|
||||||
|
# Set up cuda_config.h, which is used by
|
||||||
|
# tensorflow/stream_executor/dso_loader.cc.
|
||||||
|
_tpl(repository_ctx, "cuda:cuda_config.h",
|
||||||
|
{
|
||||||
|
"%{cuda_version}": _DEFAULT_CUDA_VERSION,
|
||||||
|
"%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
|
||||||
|
"%{cuda_compute_capabilities}": ",".join([
|
||||||
|
"CudaVersion(\"%s\")" % c
|
||||||
|
for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES]),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _symlink_dir(repository_ctx, src_dir, dest_dir):
|
||||||
|
"""Symlinks all the files in a directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repository_ctx: The repository context.
|
||||||
|
src_dir: The source directory.
|
||||||
|
dest_dir: The destination directory to create the symlinks in.
|
||||||
|
"""
|
||||||
|
files = repository_ctx.path(src_dir).readdir()
|
||||||
|
for src_file in files:
|
||||||
|
repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_cuda_repository(repository_ctx):
|
||||||
|
"""Creates the repository containing files set up to build with CUDA."""
|
||||||
|
cuda_toolkit_path = _cuda_toolkit_path(repository_ctx)
|
||||||
|
cuda_version = _cuda_version(repository_ctx)
|
||||||
|
cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
|
||||||
|
cudnn_version = _cudnn_version(repository_ctx)
|
||||||
|
compute_capabilities = _compute_capabilities(repository_ctx)
|
||||||
|
|
||||||
|
cpu_value = _cpu_value(repository_ctx)
|
||||||
|
symlink_files = _cuda_symlink_files(cpu_value, cuda_version, cudnn_version)
|
||||||
|
_check_lib(repository_ctx, cuda_toolkit_path, symlink_files.cuda_rt_lib)
|
||||||
|
_check_lib(repository_ctx, cuda_toolkit_path, symlink_files.cuda_cupti_lib)
|
||||||
|
_check_dir(repository_ctx, cudnn_install_basedir)
|
||||||
|
|
||||||
|
cudnn_header_dir = _find_cudnn_header_dir(repository_ctx,
|
||||||
|
cudnn_install_basedir)
|
||||||
|
cudnn_lib_path = _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir,
|
||||||
|
symlink_files)
|
||||||
|
|
||||||
|
# Set up symbolic links for the cuda toolkit. We link at the individual file
|
||||||
|
# level not at the directory level. This is because the external library may
|
||||||
|
# have a different file layout from our desired structure.
|
||||||
|
_symlink_dir(repository_ctx, cuda_toolkit_path + "/include", "cuda/include")
|
||||||
|
_symlink_dir(repository_ctx,
|
||||||
|
cuda_toolkit_path + "/" + symlink_files.cuda_lib_path,
|
||||||
|
"cuda/" + symlink_files.cuda_lib_path)
|
||||||
|
_symlink_dir(repository_ctx, cuda_toolkit_path + "/bin", "cuda/bin")
|
||||||
|
_symlink_dir(repository_ctx, cuda_toolkit_path + "/nvvm", "cuda/nvvm")
|
||||||
|
_symlink_dir(repository_ctx, cuda_toolkit_path + "/extras/CUPTI/include",
|
||||||
|
"cuda/extras/CUPTI/include")
|
||||||
|
repository_ctx.symlink(cuda_toolkit_path + "/" + symlink_files.cuda_cupti_lib,
|
||||||
|
"cuda/" + symlink_files.cuda_cupti_lib)
|
||||||
|
|
||||||
|
# Set up the symbolic links for cudnn if cudnn was was not installed to
|
||||||
|
# CUDA_TOOLKIT_PATH.
|
||||||
|
if not repository_ctx.path("cuda/include/cudnn.h").exists:
|
||||||
|
repository_ctx.symlink(cudnn_header_dir + "/cudnn.h",
|
||||||
|
"cuda/include/cudnn.h")
|
||||||
|
if not repository_ctx.path("cuda/" + symlink_files.cuda_dnn_lib).exists:
|
||||||
|
repository_ctx.symlink(cudnn_lib_path, "cuda/" + symlink_files.cuda_dnn_lib)
|
||||||
|
|
||||||
|
# Set up BUILD file for cuda/
|
||||||
|
_file(repository_ctx, "cuda:BUILD")
|
||||||
|
_file(repository_ctx, "cuda:build_defs.bzl")
|
||||||
|
_tpl(repository_ctx, "cuda:platform.bzl",
|
||||||
|
{
|
||||||
|
"%{cuda_version}": cuda_version,
|
||||||
|
"%{cudnn_version}": cudnn_version,
|
||||||
|
"%{platform}": cpu_value,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Set up crosstool/
|
||||||
|
_file(repository_ctx, "crosstool:BUILD")
|
||||||
|
_tpl(repository_ctx, "crosstool:CROSSTOOL",
|
||||||
|
{
|
||||||
|
"%{cuda_version}": ("-%s" % cuda_version) if cuda_version else "",
|
||||||
|
})
|
||||||
|
_tpl(repository_ctx,
|
||||||
|
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
||||||
|
{
|
||||||
|
"%{cpu_compiler}": str(find_cc(repository_ctx)),
|
||||||
|
"%{gcc_host_compiler_path}": str(find_cc(repository_ctx)),
|
||||||
|
"%{cuda_compute_capabilities}": ", ".join(
|
||||||
|
["\"%s\"" % c for c in compute_capabilities]),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Set up cuda_config.h, which is used by
|
||||||
|
# tensorflow/stream_executor/dso_loader.cc.
|
||||||
|
_tpl(repository_ctx, "cuda:cuda_config.h",
|
||||||
|
{
|
||||||
|
"%{cuda_version}": cuda_version,
|
||||||
|
"%{cudnn_version}": cudnn_version,
|
||||||
|
"%{cuda_compute_capabilities}": ",".join(
|
||||||
|
["CudaVersion(\"%s\")" % c for c in compute_capabilities]),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _cuda_autoconf_impl(repository_ctx):
|
||||||
|
"""Implementation of the cuda_autoconf repository rule."""
|
||||||
|
if not _enable_cuda(repository_ctx):
|
||||||
|
_create_dummy_repository(repository_ctx)
|
||||||
|
else:
|
||||||
|
_create_cuda_repository(repository_ctx)
|
||||||
|
|
||||||
|
|
||||||
|
cuda_configure = repository_rule(
|
||||||
|
implementation = _cuda_autoconf_impl,
|
||||||
|
local = True,
|
||||||
|
)
|
||||||
|
"""Detects and configures the local CUDA toolchain.
|
||||||
|
|
||||||
|
Add the following to your WORKSPACE FILE:
|
||||||
|
|
||||||
|
```python
|
||||||
|
cuda_configure(name = "local_config_cuda")
|
||||||
|
```
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: A unique name for this workspace rule.
|
||||||
|
"""
|
@ -1,4 +1,4 @@
|
|||||||
build:cuda --crosstool_top=//third_party/gpus/crosstool
|
build:cuda --crosstool_top=@local_config_cuda//crosstool
|
||||||
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
||||||
|
|
||||||
build --force_python=py$PYTHON_MAJOR_VERSION
|
build --force_python=py$PYTHON_MAJOR_VERSION
|
||||||
|
Loading…
Reference in New Issue
Block a user