Merge changes from github.
Change: 131310818
This commit is contained in:
parent
1fa09b5560
commit
2c598e874e
73
configure
vendored
73
configure
vendored
@ -80,6 +80,7 @@ while [ "$TF_NEED_CUDA" == "" ]; do
|
||||
esac
|
||||
done
|
||||
|
||||
export TF_NEED_CUDA
|
||||
if [ "$TF_NEED_CUDA" == "0" ]; then
|
||||
echo "Configuration finished"
|
||||
exit
|
||||
@ -97,6 +98,7 @@ while true; do
|
||||
fi
|
||||
fi
|
||||
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
||||
export CC=$GCC_HOST_COMPILER_PATH
|
||||
break
|
||||
fi
|
||||
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
||||
@ -107,7 +109,6 @@ while true; do
|
||||
# Retry
|
||||
done
|
||||
|
||||
|
||||
# Find out where the CUDA toolkit is installed
|
||||
OSNAME=`uname -s`
|
||||
|
||||
@ -140,6 +141,8 @@ while true; do
|
||||
fi
|
||||
|
||||
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
||||
export CUDA_TOOLKIT_PATH
|
||||
export CUDA_VERSION=$TF_CUDA_VERSION
|
||||
break
|
||||
fi
|
||||
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
||||
@ -200,13 +203,16 @@ while true; do
|
||||
fi
|
||||
|
||||
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
||||
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||
export CUDNN_INSTALL_PATH
|
||||
break
|
||||
fi
|
||||
|
||||
if [ "$OSNAME" == "Linux" ]; then
|
||||
CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
||||
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
||||
CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
||||
export CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
@ -225,42 +231,11 @@ while true; do
|
||||
CUDNN_INSTALL_PATH=""
|
||||
done
|
||||
|
||||
cat > third_party/gpus/cuda/cuda.config <<EOF
|
||||
# CUDA_TOOLKIT_PATH refers to the CUDA toolkit.
|
||||
CUDA_TOOLKIT_PATH="$CUDA_TOOLKIT_PATH"
|
||||
# CUDNN_INSTALL_PATH refers to the cuDNN toolkit. The cuDNN header and library
|
||||
# files can be either in this directory, or under include/ and lib64/
|
||||
# directories separately.
|
||||
CUDNN_INSTALL_PATH="$CUDNN_INSTALL_PATH"
|
||||
|
||||
# The Cuda SDK version that should be used in this build (empty to use libcudart.so symlink)
|
||||
TF_CUDA_VERSION=$TF_CUDA_VERSION
|
||||
|
||||
# The Cudnn version that should be used in this build
|
||||
TF_CUDNN_VERSION=$TF_CUDNN_VERSION
|
||||
EOF
|
||||
|
||||
# Configure the gcc host compiler to use
|
||||
export WARNING=$DO_NOT_SUBMIT_WARNING
|
||||
perl -pi -e "s,CPU_COMPILER = \('.*'\),# \$ENV{WARNING}\nCPU_COMPILER = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
|
||||
perl -pi -e "s,GCC_HOST_COMPILER_PATH = \('.*'\),# \$ENV{WARNING}\nGCC_HOST_COMPILER_PATH = ('$GCC_HOST_COMPILER_PATH'),s" third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
|
||||
|
||||
# Configure the platform name.
|
||||
perl -pi -e "s,PLATFORM = \".*\",PLATFORM = \"$OSNAME\",s" third_party/gpus/cuda/platform.bzl
|
||||
|
||||
# Configure the Cuda toolkit version to work with.
|
||||
perl -pi -e "s,(GetCudaVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDA_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
|
||||
perl -pi -e "s,CUDA_VERSION = \"[0-9\.]*\",CUDA_VERSION = \"$TF_CUDA_VERSION\",s" third_party/gpus/cuda/platform.bzl
|
||||
|
||||
# Configure the Cudnn version to work with.
|
||||
perl -pi -e "s,(GetCudnnVersion.*return )\"[0-9\.]*\",\1\"$TF_CUDNN_VERSION\",s" tensorflow/stream_executor/dso_loader.cc
|
||||
perl -pi -e "s,CUDNN_VERSION = \"[0-9\.]*\",CUDNN_VERSION = \"$TF_CUDNN_VERSION\",s" third_party/gpus/cuda/platform.bzl
|
||||
|
||||
|
||||
# Configure the compute capabilities that TensorFlow builds for.
|
||||
# Since Cuda toolkit is not backward-compatible, this is not guaranteed to work.
|
||||
while true; do
|
||||
fromuser=""
|
||||
default_cuda_compute_capabilities="3.5,5.2"
|
||||
if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
||||
cat << EOF
|
||||
Please specify a list of comma-separated Cuda compute capabilities you want to build with.
|
||||
@ -270,6 +245,9 @@ EOF
|
||||
read -p "[Default is: \"3.5,5.2\"]: " TF_CUDA_COMPUTE_CAPABILITIES
|
||||
fromuser=1
|
||||
fi
|
||||
if [ -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
||||
TF_CUDA_COMPUTE_CAPABILITIES=$default_cuda_compute_capabilities
|
||||
fi
|
||||
# Check whether all capabilities from the input is valid
|
||||
COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES//,/ }
|
||||
ALL_VALID=1
|
||||
@ -285,34 +263,13 @@ EOF
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
export CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES
|
||||
break
|
||||
fi
|
||||
TF_CUDA_COMPUTE_CAPABILITIES=""
|
||||
done
|
||||
|
||||
if [ ! -z "$TF_CUDA_COMPUTE_CAPABILITIES" ]; then
|
||||
export WARNING=$DO_NOT_SUBMIT_WARNING
|
||||
function CudaGenCodeOpts() {
|
||||
OUTPUT=""
|
||||
for CAPABILITY in $@; do
|
||||
OUTPUT=${OUTPUT}" \"${CAPABILITY}\", "
|
||||
done
|
||||
echo $OUTPUT
|
||||
}
|
||||
export CUDA_GEN_CODES_OPTS=$(CudaGenCodeOpts ${TF_CUDA_COMPUTE_CAPABILITIES//,/ })
|
||||
perl -pi -0 -e 's,\n( *)([^\n]*supported_cuda_compute_capabilities\s*=\s*\[).*?(\]),\n\1# $ENV{WARNING}\n\1\2$ENV{CUDA_GEN_CODES_OPTS}\3,s' third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc
|
||||
function CudaVersionOpts() {
|
||||
OUTPUT=""
|
||||
for CAPABILITY in $@; do
|
||||
OUTPUT=$OUTPUT"CudaVersion(\"${CAPABILITY}\"), "
|
||||
done
|
||||
echo $OUTPUT
|
||||
}
|
||||
export CUDA_VERSION_OPTS=$(CudaVersionOpts ${TF_CUDA_COMPUTE_CAPABILITIES//,/ })
|
||||
perl -pi -0 -e 's,\n( *)([^\n]*supported_cuda_compute_capabilities\s*=\s*\{).*?(\}),\n\1// $ENV{WARNING}\n\1\2$ENV{CUDA_VERSION_OPTS}\3,s' tensorflow/core/common_runtime/gpu/gpu_device.cc
|
||||
fi
|
||||
|
||||
# Invoke the cuda_config.sh and set up the TensorFlow's canonical view of the Cuda libraries
|
||||
(cd third_party/gpus/cuda; ./cuda_config.sh;) || exit -1
|
||||
bazel clean --expunge
|
||||
bazel fetch //...
|
||||
|
||||
echo "Configuration finished"
|
||||
|
@ -785,10 +785,8 @@ struct CudaVersion {
|
||||
int minor_part = -1;
|
||||
};
|
||||
|
||||
// "configure" uses the specific name to substitute the following string.
|
||||
// If you change it, make sure you modify "configure" as well.
|
||||
std::vector<CudaVersion> supported_cuda_compute_capabilities = {
|
||||
CudaVersion("3.5"), CudaVersion("5.2")};
|
||||
TF_CUDA_CAPABILITIES,};
|
||||
|
||||
std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
|
||||
auto cuda_caps = supported_cuda_compute_capabilities;
|
||||
|
@ -31,7 +31,7 @@ limitations under the License.
|
||||
#endif
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
#include "tensorflow/core/platform/stream_executor.h"
|
||||
#include "tensorflow/core/util/stream_executor_util.h"
|
||||
#endif // GOOGLE_CUDA
|
||||
|
@ -25,7 +25,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/fill_functor.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
#include "tensorflow/core/platform/stream_executor.h"
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
|
@ -4680,6 +4680,42 @@ op {
|
||||
summary: "Decode a PNG-encoded image to a uint8 or uint16 tensor."
|
||||
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the PNG-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the PNG-encoded image is transformed to match the requested number\nof color channels."
|
||||
}
|
||||
op {
|
||||
name: "DecodeGif"
|
||||
input_arg {
|
||||
name: "contents"
|
||||
description: "0-D. The GIF-encoded image."
|
||||
type: DT_STRING
|
||||
}
|
||||
output_arg {
|
||||
name: "image"
|
||||
description: "3-D with shape `[height, width, channels]`."
|
||||
type_attr: "dtype"
|
||||
}
|
||||
attr {
|
||||
name: "channels"
|
||||
type: "int"
|
||||
default_value {
|
||||
i: 0
|
||||
}
|
||||
description: "Number of color channels for the decoded image."
|
||||
}
|
||||
attr {
|
||||
name: "dtype"
|
||||
type: "type"
|
||||
default_value {
|
||||
type: DT_UINT8
|
||||
}
|
||||
allowed_values {
|
||||
list {
|
||||
type: DT_UINT8
|
||||
type: DT_UINT16
|
||||
}
|
||||
}
|
||||
}
|
||||
summary: "Decode a GIF-encoded image to a uint8 or uint16 tensor."
|
||||
description: "The attr `channels` indicates the desired number of color channels for the\ndecoded image.\n\nAccepted values are:\n\n* 0: Use the number of channels in the GIF-encoded image.\n* 1: output a grayscale image.\n* 3: output an RGB image.\n* 4: output an RGBA image.\n\nIf needed, the GIF-encoded image is transformed to match the requested number\nof color channels."
|
||||
}
|
||||
op {
|
||||
name: "DecodeRaw"
|
||||
input_arg {
|
||||
|
@ -9,7 +9,7 @@ exports_files(["LICENSE"])
|
||||
|
||||
load("//tensorflow:tensorflow.bzl", "tf_copts")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
|
||||
load("//third_party/gpus/cuda:platform.bzl", "cuda_library_path")
|
||||
load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
|
||||
|
||||
cc_library(
|
||||
name = "gtest",
|
||||
@ -32,7 +32,7 @@ tf_cuda_library(
|
||||
deps = [
|
||||
"//tensorflow/stream_executor",
|
||||
] + select({
|
||||
"//third_party/gpus/cuda:darwin": ["IOKit"],
|
||||
"@local_config_cuda//cuda:darwin": ["IOKit"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
@ -91,20 +91,20 @@ filegroup(
|
||||
cc_library(
|
||||
name = "cuda",
|
||||
data = [
|
||||
"//third_party/gpus/cuda:{}".format(cuda_library_path("cudart")),
|
||||
"@local_config_cuda//cuda:{}".format(cuda_library_path("cudart")),
|
||||
],
|
||||
linkopts = select({
|
||||
"//third_party/gpus/cuda:darwin": [
|
||||
"-Wl,-rpath,third_party/gpus/cuda/lib",
|
||||
"-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib",
|
||||
"@local_config_cuda//cuda:darwin": [
|
||||
"-Wl,-rpath,../local_config_cuda/cuda/lib",
|
||||
"-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib",
|
||||
],
|
||||
"//conditions:default": [
|
||||
"-Wl,-rpath,third_party/gpus/cuda/lib64",
|
||||
"-Wl,-rpath,third_party/gpus/cuda/extras/CUPTI/lib64",
|
||||
"-Wl,-rpath,../local_config_cuda/cuda/lib64",
|
||||
"-Wl,-rpath,../local_config_cuda/cuda/extras/CUPTI/lib64",
|
||||
],
|
||||
}),
|
||||
deps = [
|
||||
"//third_party/gpus/cuda:cudart",
|
||||
"@local_config_cuda//cuda:cudart",
|
||||
],
|
||||
)
|
||||
|
||||
|
@ -15,9 +15,9 @@ tf_cuda_library(
|
||||
copts = tf_copts(),
|
||||
cuda_deps = [
|
||||
"//tensorflow/core:stream_executor",
|
||||
"//third_party/gpus/cuda:cuda_headers",
|
||||
"//third_party/gpus/cuda:cupti_headers",
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
"@local_config_cuda//cuda:cupti_headers",
|
||||
],
|
||||
data = ["//third_party/gpus/cuda:cupti_dsos"],
|
||||
data = ["@local_config_cuda//cuda:cupti_dsos"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
@ -21,7 +21,7 @@ limitations under the License.
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "third_party/gpus/cuda/extras/CUPTI/include/cupti.h"
|
||||
#include "cuda/extras/CUPTI/include/cupti.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
|
@ -16,7 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/util/port.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
#endif
|
||||
|
||||
namespace tensorflow {
|
||||
|
@ -184,9 +184,8 @@ applies gradients.
|
||||
|
||||
### Gating Gradients
|
||||
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradient` argument
|
||||
that controls the degree of parallelism during the application of the
|
||||
gradients.
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients` argument
|
||||
that controls the degree of parallelism during the application of the gradients.
|
||||
|
||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||
|
||||
|
@ -204,9 +204,8 @@ applies gradients.
|
||||
|
||||
### Gating Gradients
|
||||
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradient` argument
|
||||
that controls the degree of parallelism during the application of the
|
||||
gradients.
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients` argument
|
||||
that controls the degree of parallelism during the application of the gradients.
|
||||
|
||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||
|
||||
|
@ -7,11 +7,10 @@ github source.
|
||||
|
||||
The TensorFlow Python API supports Python 2.7 and Python 3.3+.
|
||||
|
||||
The GPU version (Linux only) works best with Cuda Toolkit 7.5 and
|
||||
cuDNN v4. other versions are supported (Cuda toolkit >= 7.0 and
|
||||
cuDNN 6.5(v2), 7.0(v3), v5) only when installing from sources.
|
||||
Please see [Cuda installation](#optional-install-cuda-gpus-on-linux)
|
||||
for details.
|
||||
The GPU version (Linux & Mac OS X only) works best with Cuda Toolkit 7.5 and
|
||||
cuDNN v4. other versions are supported (Cuda toolkit >= 7.0 and cuDNN 6.5(v2),
|
||||
7.0(v3), v5) only when installing from sources. Please see [Cuda installation]
|
||||
(#optional-install-cuda-gpus-on-linux) for details.
|
||||
|
||||
## Overview
|
||||
|
||||
|
@ -48,6 +48,18 @@ def ctc_loss(inputs, labels, sequence_length,
|
||||
<= sequence_length(b) for all b.
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
This class performs the softmax operation for you, so inputs should
|
||||
be e.g. linear projections of outputs by an LSTM.
|
||||
|
||||
The `inputs` Tensor's innermost dimension size, `num_classes`, represents
|
||||
`num_labels + 1` classes, where num_labels is the number of true labels, and
|
||||
the largest value `(num_classes - 1)` is reserved for the blank label.
|
||||
|
||||
For example, for a vocabulary containing 3 labels `[a, b, c]`,
|
||||
`num_classes = 4` and the labels indexing is `{a: 0, b: 1, c: 2, blank: 3}`.
|
||||
|
||||
Regarding the arguments `preprocess_collapse_repeated` and
|
||||
`ctc_merge_repeated`:
|
||||
|
||||
@ -87,7 +99,9 @@ def ctc_loss(inputs, labels, sequence_length,
|
||||
`[max_time x batch_size x num_classes]`. The logits.
|
||||
labels: An `int32` `SparseTensor`.
|
||||
`labels.indices[i, :] == [b, t]` means `labels.values[i]` stores
|
||||
the id for (batch b, time t). See `core/ops/ctc_ops.cc` for more details.
|
||||
the id for (batch b, time t).
|
||||
`labels.values[i]` must take on values in `[0, num_labels)`.
|
||||
See `core/ops/ctc_ops.cc` for more details.
|
||||
sequence_length: 1-D `int32` vector, size `[batch_size]`.
|
||||
The sequence lengths.
|
||||
preprocess_collapse_repeated: Boolean. Default: False.
|
||||
|
@ -1036,8 +1036,7 @@ def report_uninitialized_variables(var_list=None,
|
||||
|
||||
Returns:
|
||||
A 1-D tensor containing names of the uninitialized variables, or an empty
|
||||
1-D
|
||||
tensor if there are no variables or no uninitialized variables.
|
||||
1-D tensor if there are no variables or no uninitialized variables.
|
||||
"""
|
||||
if var_list is None:
|
||||
var_list = all_variables() + local_variables()
|
||||
|
@ -89,9 +89,9 @@ class Optimizer(object):
|
||||
|
||||
### Gating Gradients
|
||||
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradient` argument
|
||||
that controls the degree of parallelism during the application of the
|
||||
gradients.
|
||||
Both `minimize()` and `compute_gradients()` accept a `gate_gradients`
|
||||
argument that controls the degree of parallelism during the application of
|
||||
the gradients.
|
||||
|
||||
The possible values are: `GATE_NONE`, `GATE_OP`, and `GATE_GRAPH`.
|
||||
|
||||
|
@ -27,9 +27,10 @@ cc_library(
|
||||
]),
|
||||
data = [
|
||||
"//tensorflow/core:cuda",
|
||||
"//third_party/gpus/cuda:cublas",
|
||||
"//third_party/gpus/cuda:cudnn",
|
||||
"//third_party/gpus/cuda:cufft",
|
||||
"@local_config_cuda//cuda:cublas",
|
||||
"@local_config_cuda//cuda:cudnn",
|
||||
"@local_config_cuda//cuda:cufft",
|
||||
"@local_config_cuda//cuda:curand",
|
||||
],
|
||||
linkopts = [
|
||||
"-ldl",
|
||||
@ -37,7 +38,7 @@ cc_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/core:lib",
|
||||
"//third_party/gpus/cuda:cuda_headers",
|
||||
"@local_config_cuda//cuda:cuda_headers",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
@ -18,8 +18,8 @@ limitations under the License.
|
||||
// cuda.h). This ensures that Eigen's Half.h does not attempt to make its own
|
||||
// __half typedef if CUDA has already defined one (and conversely, that we do
|
||||
// not include <cuda_fp16.h> after Half.h has made its typedef).
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "third_party/gpus/cuda/include/cublas_v2.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
#include "cuda/include/cublas_v2.h"
|
||||
|
||||
#if CUDA_VERSION >= 7050
|
||||
#define EIGEN_HAS_CUDA_FP16
|
||||
|
@ -39,7 +39,7 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/stream.h"
|
||||
#include "tensorflow/stream_executor/stream_executor_pimpl.h"
|
||||
// clang-format off
|
||||
#include "third_party/gpus/cuda/include/cudnn.h"
|
||||
#include "cuda/include/cudnn.h"
|
||||
// clang-format on
|
||||
|
||||
namespace {
|
||||
|
@ -25,7 +25,7 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/lib/status.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
|
@ -23,7 +23,7 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/fft.h"
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/plugin_registry.h"
|
||||
#include "third_party/gpus/cuda/include/cufft.h"
|
||||
#include "cuda/include/cufft.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
|
@ -24,8 +24,8 @@ limitations under the License.
|
||||
#include <stddef.h>
|
||||
#include <complex>
|
||||
|
||||
#include "third_party/gpus/cuda/include/cuComplex.h"
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "cuda/include/cuComplex.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
|
@ -28,7 +28,7 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/lib/casts.h"
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/platform/logging.h"
|
||||
#include "third_party/gpus/cuda/include/cuda.h"
|
||||
#include "cuda/include/cuda.h"
|
||||
|
||||
#ifdef PLATFORMS_GPUS_CUDA_DYNAMIC_LIBCUDA_DYNAMIC_LIBCUDA_H_
|
||||
#error \
|
||||
|
@ -28,7 +28,7 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/lib/status.h"
|
||||
#include "tensorflow/stream_executor/platform/logging.h"
|
||||
#include "tensorflow/stream_executor/rng.h"
|
||||
#include "third_party/gpus/cuda/include/curand.h"
|
||||
#include "cuda/include/curand.h"
|
||||
|
||||
// Formats curandStatus_t to output prettified values into a log stream.
|
||||
std::ostream &operator<<(std::ostream &in, const curandStatus_t &status) {
|
||||
|
@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// TODO(jhen): Replace hardcoded, platform specific path strings in GetXXXPath()
|
||||
// with a function in e.g. cuda.h.
|
||||
|
||||
#include "tensorflow/stream_executor/dso_loader.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
@ -32,19 +35,17 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/lib/stringprintf.h"
|
||||
#include "tensorflow/stream_executor/platform/logging.h"
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include "tensorflow/stream_executor/lib/str_util.h"
|
||||
|
||||
namespace perftools {
|
||||
namespace gputools {
|
||||
namespace internal {
|
||||
|
||||
// TensorFlow OSS configure uses the following lines to configure versions. For
|
||||
// any modifications of the format, please make sure the script still works.
|
||||
string GetCudaVersion() { return ""; }
|
||||
string GetCudnnVersion() { return ""; }
|
||||
string GetCudaVersion() { return TF_CUDA_VERSION; }
|
||||
string GetCudnnVersion() { return TF_CUDNN_VERSION; }
|
||||
|
||||
/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cublas", GetCudaVersion()),
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
"cublas", GetCudaVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
@ -53,33 +54,36 @@ string GetCudnnVersion() { return ""; }
|
||||
// libcudnn is versioned differently than the other libraries and may have a
|
||||
// different version number than other CUDA libraries. See b/22397368 for
|
||||
// some details about the complications surrounding this.
|
||||
return GetDsoHandle(
|
||||
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cudnn", GetCudnnVersion()),
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
"cudnn", GetCudnnVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cufft", GetCudaVersion()),
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
"cufft", GetCudaVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("curand", GetCudaVersion()),
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
"curand", GetCudaVersion()),
|
||||
GetCudaLibraryDirPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
|
||||
return GetDsoHandle(
|
||||
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cuda", "1"),
|
||||
GetCudaDriverLibraryPath()),
|
||||
dso_handle);
|
||||
}
|
||||
|
||||
/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
|
||||
return GetDsoHandle(
|
||||
FindDsoPath(tensorflow::internal::FormatLibraryFileName("cupti", GetCudaVersion()),
|
||||
return GetDsoHandle(FindDsoPath(tensorflow::internal::FormatLibraryFileName(
|
||||
"cupti", GetCudaVersion()),
|
||||
GetCudaCuptiLibraryPath()),
|
||||
dso_handle);
|
||||
}
|
||||
@ -89,11 +93,9 @@ string GetCudnnVersion() { return ""; }
|
||||
GetRpaths()->push_back(path.ToString());
|
||||
}
|
||||
|
||||
|
||||
/* static */ port::Status DsoLoader::GetDsoHandle(port::StringPiece path,
|
||||
void** dso_handle,
|
||||
LoadKind load_kind) {
|
||||
|
||||
int dynload_flags =
|
||||
RTLD_LAZY | (load_kind == LoadKind::kLocal ? RTLD_LOCAL : RTLD_GLOBAL);
|
||||
string path_string = path.ToString();
|
||||
@ -138,9 +140,9 @@ string GetCudnnVersion() { return ""; }
|
||||
static std::vector<string>* CreatePrimordialRpaths() {
|
||||
auto rpaths = new std::vector<string>;
|
||||
#if defined(__APPLE__)
|
||||
rpaths->push_back("driver/driver_sh.runfiles/org_tensorflow/third_party/gpus/cuda/lib");
|
||||
rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib");
|
||||
#else
|
||||
rpaths->push_back("driver/driver_sh.runfiles/org_tensorflow/third_party/gpus/cuda/lib64");
|
||||
rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib64");
|
||||
#endif
|
||||
return rpaths;
|
||||
}
|
||||
@ -165,7 +167,6 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
||||
|
||||
/* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
|
||||
port::StringPiece runfiles_relpath) {
|
||||
|
||||
// Keep a record of the paths we attempted so we can dump out meaningful
|
||||
// diagnostics if no path is found.
|
||||
std::vector<string> attempted;
|
||||
@ -191,29 +192,28 @@ static std::vector<string>* CreatePrimordialRpaths() {
|
||||
|
||||
/* static */ string DsoLoader::GetCudaLibraryDirPath() {
|
||||
#if defined(__APPLE__)
|
||||
return "third_party/gpus/cuda/lib";
|
||||
return "external/local_config_cuda/cuda/lib";
|
||||
#else
|
||||
return "third_party/gpus/cuda/lib64";
|
||||
return "external/local_config_cuda/cuda/lib64";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* static */ string DsoLoader::GetCudaDriverLibraryPath() {
|
||||
#if defined(__APPLE__)
|
||||
return "third_party/gpus/cuda/driver/lib";
|
||||
return "external/local_config_cuda/cuda/driver/lib";
|
||||
#else
|
||||
return "third_party/gpus/cuda/driver/lib64";
|
||||
return "external/local_config_cuda/cuda/driver/lib64";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
|
||||
#if defined(__APPLE__)
|
||||
return "third_party/gpus/cuda/extras/CUPTI/lib";
|
||||
return "external/local_config_cuda/cuda/extras/CUPTI/lib";
|
||||
#else
|
||||
return "third_party/gpus/cuda/extras/CUPTI/lib64";
|
||||
return "external/local_config_cuda/cuda/extras/CUPTI/lib64";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// -- CachedDsoLoader
|
||||
|
||||
/* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/platform/port.h"
|
||||
#include <vector>
|
||||
|
||||
#include "cuda/cuda_config.h"
|
||||
#include "tensorflow/stream_executor/lib/status.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
#include "tensorflow/stream_executor/lib/stringpiece.h"
|
||||
|
@ -32,6 +32,7 @@ import os
|
||||
import re
|
||||
|
||||
from six import BytesIO
|
||||
from six import StringIO
|
||||
from six.moves import BaseHTTPServer
|
||||
from six.moves import urllib
|
||||
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||
@ -276,7 +277,7 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
values = self._multiplexer.Scalars(run, tag)
|
||||
|
||||
if query_params.get('format') == _OutputFormat.CSV:
|
||||
string_io = BytesIO()
|
||||
string_io = StringIO()
|
||||
writer = csv.writer(string_io)
|
||||
writer.writerow(['Wall time', 'Step', 'Value'])
|
||||
writer.writerows(values)
|
||||
@ -353,7 +354,7 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
run = query_params.get('run')
|
||||
compressed_histograms = self._multiplexer.CompressedHistograms(run, tag)
|
||||
if query_params.get('format') == _OutputFormat.CSV:
|
||||
string_io = BytesIO()
|
||||
string_io = StringIO()
|
||||
writer = csv.writer(string_io)
|
||||
|
||||
# Build the headers; we have two columns for timing and two columns for
|
||||
|
@ -32,7 +32,7 @@ load(
|
||||
"tf_cuda_tests_tags",
|
||||
)
|
||||
load(
|
||||
"//third_party/gpus/cuda:build_defs.bzl",
|
||||
"@local_config_cuda//cuda:build_defs.bzl",
|
||||
"if_cuda",
|
||||
)
|
||||
|
||||
@ -324,11 +324,11 @@ def tf_cc_tests(tests, deps, linkstatic=0, tags=[], size="medium", args=None,
|
||||
tf_cc_test(t, deps, linkstatic, tags=tags, size=size, args=args,
|
||||
linkopts=linkopts)
|
||||
|
||||
def tf_cc_tests_gpu(tests, deps, linkstatic=0, tags=[], size="medium", args=None):
|
||||
def tf_cc_tests_gpu(tests, deps, linkstatic=0, tags=[], size="medium",
|
||||
args=None):
|
||||
tf_cc_tests(tests, deps, linkstatic, tags=tags, size=size, args=args)
|
||||
|
||||
|
||||
|
||||
def tf_cuda_cc_tests(tests, deps, tags=[], size="medium", linkstatic=0,
|
||||
args=None, linkopts=[]):
|
||||
for t in tests:
|
||||
@ -345,29 +345,29 @@ def _cuda_copts():
|
||||
common_cuda_opts = ["-x", "cuda", "-DGOOGLE_CUDA=1"]
|
||||
return select({
|
||||
"//conditions:default": [],
|
||||
"//third_party/gpus/cuda:using_nvcc": (
|
||||
"@local_config_cuda//cuda:using_nvcc": (
|
||||
common_cuda_opts +
|
||||
[
|
||||
"-nvcc_options=relaxed-constexpr",
|
||||
"-nvcc_options=ftz=true",
|
||||
]
|
||||
),
|
||||
"//third_party/gpus/cuda:using_gcudacc": (
|
||||
"@local_config_cuda//cuda:using_gcudacc": (
|
||||
common_cuda_opts +
|
||||
["--gcudacc_flag=-ftz=true"]
|
||||
),
|
||||
"//third_party/gpus/cuda:using_clang": (
|
||||
"@local_config_cuda//cuda:using_clang": (
|
||||
common_cuda_opts +
|
||||
[
|
||||
"-fcuda-flush-denormals-to-zero",
|
||||
"--cuda-path=third_party/gpus/cuda",
|
||||
"--cuda-path=external/local_config_cuda/cuda",
|
||||
"--cuda-gpu-arch=sm_35",
|
||||
]
|
||||
),
|
||||
}) + select({
|
||||
# Pass -O3 when building CUDA code with clang; some important
|
||||
# optimizations are not enabled at O2.
|
||||
"//third_party/gpus/cuda:using_clang_opt": ["-O3"],
|
||||
"@local_config_cuda//cuda:using_clang_opt": ["-O3"],
|
||||
"//conditions:default": [],
|
||||
})
|
||||
|
||||
@ -438,7 +438,8 @@ def tf_kernel_library(name, prefix=None, srcs=None, gpu_srcs=None, hdrs=None,
|
||||
* srcs = ["cwise_op_abs.cc", ..., "cwise_op_tanh.cc"],
|
||||
* hdrs = ["cwise_ops.h", "cwise_ops_common.h"],
|
||||
* gpu_srcs = ["cwise_op_gpu_abs.cu.cc", ..., "cwise_op_gpu_tanh.cu.cc",
|
||||
"cwise_ops.h", "cwise_ops_common.h", "cwise_ops_gpu_common.cu.h"]
|
||||
"cwise_ops.h", "cwise_ops_common.h",
|
||||
"cwise_ops_gpu_common.cu.h"]
|
||||
* "cwise_ops_test.cc" is excluded
|
||||
"""
|
||||
if not srcs:
|
||||
@ -642,7 +643,7 @@ check_deps = rule(
|
||||
def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[]):
|
||||
cuda_deps = [
|
||||
"//tensorflow/core:stream_executor_headers_lib",
|
||||
"//third_party/gpus/cuda:cudart_static",
|
||||
"@local_config_cuda//cuda:cudart_static",
|
||||
]
|
||||
deps = deps + tf_custom_op_library_additional_deps()
|
||||
if gpu_srcs:
|
||||
@ -692,7 +693,7 @@ def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
|
||||
module_name=module_name,
|
||||
py_module_name=name)
|
||||
extra_linkopts = select({
|
||||
"//third_party/gpus/cuda:darwin": [
|
||||
"@local_config_cuda//cuda:darwin": [
|
||||
"-Wl,-exported_symbols_list",
|
||||
"//tensorflow:tf_exported_symbols.lds"
|
||||
],
|
||||
@ -701,7 +702,7 @@ def tf_py_wrap_cc(name, srcs, swig_includes=[], deps=[], copts=[], **kwargs):
|
||||
"//tensorflow:tf_version_script.lds"
|
||||
]})
|
||||
extra_deps += select({
|
||||
"//third_party/gpus/cuda:darwin": [
|
||||
"@local_config_cuda//cuda:darwin": [
|
||||
"//tensorflow:tf_exported_symbols.lds"
|
||||
],
|
||||
"//conditions:default": [
|
||||
@ -775,13 +776,14 @@ def py_tests(name,
|
||||
data=data,
|
||||
additional_deps=additional_deps)
|
||||
|
||||
def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[], shard_count=1, tags=[], prefix=""):
|
||||
def cuda_py_tests(name, srcs, size="medium", additional_deps=[], data=[],
|
||||
shard_count=1, tags=[], prefix=""):
|
||||
test_tags = tags + tf_cuda_tests_tags()
|
||||
py_tests(name=name, size=size, srcs=srcs, additional_deps=additional_deps,
|
||||
data=data, tags=test_tags, shard_count=shard_count,prefix=prefix)
|
||||
|
||||
# Creates a genrule named <name> for running tools/proto_text's generator to make
|
||||
# the proto_text functions, for the protos passed in <srcs>.
|
||||
# Creates a genrule named <name> for running tools/proto_text's generator to
|
||||
# make the proto_text functions, for the protos passed in <srcs>.
|
||||
#
|
||||
# Return a struct with fields (hdrs, srcs) containing the names of the
|
||||
# generated files.
|
||||
|
@ -22,5 +22,6 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64
|
||||
|
||||
# Configure the build for our CUDA configuration.
|
||||
ENV CUDA_TOOLKIT_PATH /usr/local/cuda
|
||||
ENV CUDNN_INSTALL_PATH /usr/local/cuda
|
||||
ENV CUDNN_INSTALL_PATH /usr/lib/x86_64-linux-gnu
|
||||
ENV TF_NEED_CUDA 1
|
||||
ENV CUDA_COMPUTE_CAPABILITIES 3.0,5.2
|
||||
|
@ -35,7 +35,7 @@ For example:
|
||||
|
||||
export TF_DIST_GCLOUD_PROJECT="tensorflow-testing"
|
||||
export TF_DIST_GCLOUD_COMPUTE_ZONE="us-central1-f"
|
||||
export CONTAINER_CLUSTER="test-cluster-1"
|
||||
export TF_DIST_CONTAINER_CLUSTER="test-cluster-1"
|
||||
export TF_DIST_GCLOUD_KEY_FILE_DIR="/tmp/gcloud-secrets"
|
||||
./remote_test.sh
|
||||
|
||||
|
@ -1,9 +1,12 @@
|
||||
# TensorFlow external dependencies that can be loaded in WORKSPACE files.
|
||||
|
||||
load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
|
||||
|
||||
# If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
|
||||
# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
|
||||
# local_repository rule (e.g. "@tf").
|
||||
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
cuda_configure(name = "local_config_cuda")
|
||||
|
||||
# These lines need to be changed when updating Eigen. They are parsed from
|
||||
# this file by the cmake and make builds to determine the eigen version and hash.
|
||||
|
0
third_party/gpus/BUILD
vendored
Normal file
0
third_party/gpus/BUILD
vendored
Normal file
42
third_party/gpus/crosstool/BUILD.tpl
vendored
Normal file
42
third_party/gpus/crosstool/BUILD.tpl
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
licenses(["restricted"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
filegroup(
|
||||
name = "crosstool",
|
||||
srcs = ["CROSSTOOL"],
|
||||
output_licenses = ["unencumbered"],
|
||||
)
|
||||
|
||||
cc_toolchain(
|
||||
name = "cc-compiler-local",
|
||||
all_files = ":empty",
|
||||
compiler_files = ":empty",
|
||||
cpu = "local",
|
||||
dwp_files = ":empty",
|
||||
dynamic_runtime_libs = [":empty"],
|
||||
linker_files = ":empty",
|
||||
objcopy_files = ":empty",
|
||||
static_runtime_libs = [":empty"],
|
||||
strip_files = ":empty",
|
||||
supports_param_files = 0,
|
||||
)
|
||||
|
||||
cc_toolchain(
|
||||
name = "cc-compiler-darwin",
|
||||
all_files = ":empty",
|
||||
compiler_files = ":empty",
|
||||
cpu = "darwin",
|
||||
dwp_files = ":empty",
|
||||
dynamic_runtime_libs = [":empty"],
|
||||
linker_files = ":empty",
|
||||
objcopy_files = ":empty",
|
||||
static_runtime_libs = [":empty"],
|
||||
strip_files = ":empty",
|
||||
supports_param_files = 0,
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "empty",
|
||||
srcs = [],
|
||||
)
|
254
third_party/gpus/crosstool/CROSSTOOL.tpl
vendored
Normal file
254
third_party/gpus/crosstool/CROSSTOOL.tpl
vendored
Normal file
@ -0,0 +1,254 @@
|
||||
major_version: "local"
|
||||
minor_version: ""
|
||||
default_target_cpu: "same_as_host"
|
||||
|
||||
default_toolchain {
|
||||
cpu: "k8"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "piii"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "arm"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "darwin"
|
||||
toolchain_identifier: "local_darwin"
|
||||
}
|
||||
default_toolchain {
|
||||
cpu: "ppc"
|
||||
toolchain_identifier: "local_linux"
|
||||
}
|
||||
|
||||
toolchain {
|
||||
abi_version: "local"
|
||||
abi_libc_version: "local"
|
||||
builtin_sysroot: ""
|
||||
compiler: "compiler"
|
||||
host_system_name: "local"
|
||||
needsPic: true
|
||||
supports_gold_linker: false
|
||||
supports_incremental_linker: false
|
||||
supports_fission: false
|
||||
supports_interface_shared_objects: false
|
||||
supports_normalizing_ar: false
|
||||
supports_start_end_lib: false
|
||||
supports_thin_archives: false
|
||||
target_libc: "local"
|
||||
target_cpu: "local"
|
||||
target_system_name: "local"
|
||||
toolchain_identifier: "local_linux"
|
||||
|
||||
tool_path { name: "ar" path: "/usr/bin/ar" }
|
||||
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||
# As part of the TensorFlow release, we place some cuda-related compilation
|
||||
# files in @local_config_cuda//crosstool/clang/bin, and this relative
|
||||
# path, combined with the rest of our Bazel configuration causes our
|
||||
# compilation to use those files.
|
||||
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
|
||||
# Use "-std=c++11" for nvcc. For consistency, force both the host compiler
|
||||
# and the device compiler to use "-std=c++11".
|
||||
cxx_flag: "-std=c++11"
|
||||
linker_flag: "-lstdc++"
|
||||
linker_flag: "-B/usr/bin/"
|
||||
|
||||
# TODO(bazel-team): In theory, the path here ought to exactly match the path
|
||||
# used by gcc. That works because bazel currently doesn't track files at
|
||||
# absolute locations and has no remote execution, yet. However, this will need
|
||||
# to be fixed, maybe with auto-detection?
|
||||
cxx_builtin_include_directory: "/usr/lib/gcc/"
|
||||
cxx_builtin_include_directory: "/usr/local/include"
|
||||
cxx_builtin_include_directory: "/usr/include"
|
||||
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||
|
||||
# C(++) compiles invoke the compiler (as that is the one knowing where
|
||||
# to find libraries), but we provide LD so other rules can invoke the linker.
|
||||
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||
|
||||
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||
objcopy_embed_flag: "-I"
|
||||
objcopy_embed_flag: "binary"
|
||||
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||
|
||||
# Anticipated future default.
|
||||
unfiltered_cxx_flag: "-no-canonical-prefixes"
|
||||
|
||||
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||
# compiler symbols.
|
||||
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
|
||||
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
|
||||
|
||||
# Security hardening on by default.
|
||||
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
|
||||
# We need to undef it before redefining it as some distributions now have
|
||||
# it enabled by default.
|
||||
compiler_flag: "-U_FORTIFY_SOURCE"
|
||||
compiler_flag: "-D_FORTIFY_SOURCE=1"
|
||||
compiler_flag: "-fstack-protector"
|
||||
compiler_flag: "-fPIE"
|
||||
linker_flag: "-pie"
|
||||
linker_flag: "-Wl,-z,relro,-z,now"
|
||||
|
||||
# Enable coloring even if there's no attached terminal. Bazel removes the
|
||||
# escape sequences if --nocolor is specified. This isn't supported by gcc
|
||||
# on Ubuntu 14.04.
|
||||
# compiler_flag: "-fcolor-diagnostics"
|
||||
|
||||
# All warnings are enabled. Maybe enable -Werror as well?
|
||||
compiler_flag: "-Wall"
|
||||
# Enable a few more warnings that aren't part of -Wall.
|
||||
compiler_flag: "-Wunused-but-set-parameter"
|
||||
# But disable some that are problematic.
|
||||
compiler_flag: "-Wno-free-nonheap-object" # has false positives
|
||||
|
||||
# Keep stack frames for debugging, even in opt mode.
|
||||
compiler_flag: "-fno-omit-frame-pointer"
|
||||
|
||||
# Anticipated future default.
|
||||
linker_flag: "-no-canonical-prefixes"
|
||||
unfiltered_cxx_flag: "-fno-canonical-system-headers"
|
||||
# Have gcc return the exit code from ld.
|
||||
linker_flag: "-pass-exit-codes"
|
||||
# Stamp the binary with a unique identifier.
|
||||
linker_flag: "-Wl,--build-id=md5"
|
||||
linker_flag: "-Wl,--hash-style=gnu"
|
||||
# Gold linker only? Can we enable this by default?
|
||||
# linker_flag: "-Wl,--warn-execstack"
|
||||
# linker_flag: "-Wl,--detect-odr-violations"
|
||||
|
||||
# Include directory for cuda headers.
|
||||
cxx_builtin_include_directory: "/usr/local/cuda%{cuda_version}/include"
|
||||
|
||||
compilation_mode_flags {
|
||||
mode: DBG
|
||||
# Enable debug symbols.
|
||||
compiler_flag: "-g"
|
||||
}
|
||||
compilation_mode_flags {
|
||||
mode: OPT
|
||||
|
||||
# No debug symbols.
|
||||
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
|
||||
# even generally? However, that can't happen here, as it requires special
|
||||
# handling in Bazel.
|
||||
compiler_flag: "-g0"
|
||||
|
||||
# Conservative choice for -O
|
||||
# -O3 can increase binary size and even slow down the resulting binaries.
|
||||
# Profile first and / or use FDO if you need better performance than this.
|
||||
compiler_flag: "-O2"
|
||||
|
||||
# Disable assertions
|
||||
compiler_flag: "-DNDEBUG"
|
||||
|
||||
# Removal of unused code and data at link time (can this increase binary size in some cases?).
|
||||
compiler_flag: "-ffunction-sections"
|
||||
compiler_flag: "-fdata-sections"
|
||||
linker_flag: "-Wl,--gc-sections"
|
||||
}
|
||||
linking_mode_flags { mode: DYNAMIC }
|
||||
}
|
||||
|
||||
toolchain {
|
||||
abi_version: "local"
|
||||
abi_libc_version: "local"
|
||||
builtin_sysroot: ""
|
||||
compiler: "compiler"
|
||||
host_system_name: "local"
|
||||
needsPic: true
|
||||
target_libc: "macosx"
|
||||
target_cpu: "darwin"
|
||||
target_system_name: "local"
|
||||
toolchain_identifier: "local_darwin"
|
||||
|
||||
tool_path { name: "ar" path: "/usr/bin/libtool" }
|
||||
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
|
||||
cxx_flag: "-std=c++11"
|
||||
ar_flag: "-static"
|
||||
ar_flag: "-s"
|
||||
ar_flag: "-o"
|
||||
linker_flag: "-lc++"
|
||||
linker_flag: "-undefined"
|
||||
linker_flag: "dynamic_lookup"
|
||||
# TODO(ulfjack): This is wrong on so many levels. Figure out a way to auto-detect the proper
|
||||
# setting from the local compiler, and also how to make incremental builds correct.
|
||||
cxx_builtin_include_directory: "/"
|
||||
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||
objcopy_embed_flag: "-I"
|
||||
objcopy_embed_flag: "binary"
|
||||
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||
|
||||
# Anticipated future default.
|
||||
unfiltered_cxx_flag: "-no-canonical-prefixes"
|
||||
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||
# compiler symbols.
|
||||
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
|
||||
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
|
||||
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
|
||||
|
||||
# Security hardening on by default.
|
||||
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
|
||||
compiler_flag: "-D_FORTIFY_SOURCE=1"
|
||||
compiler_flag: "-fstack-protector"
|
||||
|
||||
# Enable coloring even if there's no attached terminal. Bazel removes the
|
||||
# escape sequences if --nocolor is specified.
|
||||
compiler_flag: "-fcolor-diagnostics"
|
||||
|
||||
# All warnings are enabled. Maybe enable -Werror as well?
|
||||
compiler_flag: "-Wall"
|
||||
# Enable a few more warnings that aren't part of -Wall.
|
||||
compiler_flag: "-Wthread-safety"
|
||||
compiler_flag: "-Wself-assign"
|
||||
|
||||
# Keep stack frames for debugging, even in opt mode.
|
||||
compiler_flag: "-fno-omit-frame-pointer"
|
||||
|
||||
# Anticipated future default.
|
||||
linker_flag: "-no-canonical-prefixes"
|
||||
|
||||
# Include directory for cuda headers.
|
||||
cxx_builtin_include_directory: "/usr/local/cuda%{cuda_version}/include"
|
||||
|
||||
compilation_mode_flags {
|
||||
mode: DBG
|
||||
# Enable debug symbols.
|
||||
compiler_flag: "-g"
|
||||
}
|
||||
compilation_mode_flags {
|
||||
mode: OPT
|
||||
# No debug symbols.
|
||||
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or even generally?
|
||||
# However, that can't happen here, as it requires special handling in Bazel.
|
||||
compiler_flag: "-g0"
|
||||
|
||||
# Conservative choice for -O
|
||||
# -O3 can increase binary size and even slow down the resulting binaries.
|
||||
# Profile first and / or use FDO if you need better performance than this.
|
||||
compiler_flag: "-O2"
|
||||
|
||||
# Disable assertions
|
||||
compiler_flag: "-DNDEBUG"
|
||||
|
||||
# Removal of unused code and data at link time (can this increase binary size in some cases?).
|
||||
compiler_flag: "-ffunction-sections"
|
||||
compiler_flag: "-fdata-sections"
|
||||
}
|
||||
}
|
315
third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
vendored
Executable file
315
third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
vendored
Executable file
@ -0,0 +1,315 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""Crosstool wrapper for compiling CUDA programs.
|
||||
|
||||
SYNOPSIS:
|
||||
crosstool_wrapper_is_not_gcc [options passed in by cc_library()
|
||||
or cc_binary() rule]
|
||||
|
||||
DESCRIPTION:
|
||||
This script is expected to be called by the cc_library() or cc_binary() bazel
|
||||
rules. When the option "-x cuda" is present in the list of arguments passed
|
||||
to this script, it invokes the nvcc CUDA compiler. Most arguments are passed
|
||||
as is as a string to --compiler-options of nvcc. When "-x cuda" is not
|
||||
present, this wrapper invokes hybrid_driver_is_not_gcc with the input
|
||||
arguments as is.
|
||||
|
||||
NOTES:
|
||||
Changes to the contents of this file must be propagated from
|
||||
//third_party/gpus/crosstool/crosstool_wrapper_is_not_gcc to
|
||||
//third_party/gpus/crosstool/v*/*/clang/bin/crosstool_wrapper_is_not_gcc
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
__author__ = 'keveman@google.com (Manjunath Kudlur)'
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
import pipes
|
||||
|
||||
# Template values set by cuda_autoconf.
|
||||
CPU_COMPILER = ('%{cpu_compiler}')
|
||||
GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
|
||||
|
||||
CURRENT_DIR = os.path.dirname(sys.argv[0])
|
||||
NVCC_PATH = CURRENT_DIR + '/../../../cuda/bin/nvcc'
|
||||
LLVM_HOST_COMPILER_PATH = ('/usr/bin/gcc')
|
||||
PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
|
||||
|
||||
def Log(s):
|
||||
print('gpus/crosstool: {0}'.format(s))
|
||||
|
||||
|
||||
def GetOptionValue(argv, option):
|
||||
"""Extract the list of values for option from the argv list.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
option: The option whose value to extract, without the leading '-'.
|
||||
|
||||
Returns:
|
||||
A list of values, either directly following the option,
|
||||
(eg., -opt val1 val2) or values collected from multiple occurrences of
|
||||
the option (eg., -opt val1 -opt val2).
|
||||
"""
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-' + option, nargs='*', action='append')
|
||||
args, _ = parser.parse_known_args(argv)
|
||||
if not args or not vars(args)[option]:
|
||||
return []
|
||||
else:
|
||||
return sum(vars(args)[option], [])
|
||||
|
||||
|
||||
def GetHostCompilerOptions(argv):
|
||||
"""Collect the -isystem, -iquote, and --sysroot option values from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
|
||||
Returns:
|
||||
The string that can be used as the --compiler-options to nvcc.
|
||||
"""
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-isystem', nargs='*', action='append')
|
||||
parser.add_argument('-iquote', nargs='*', action='append')
|
||||
parser.add_argument('--sysroot', nargs=1)
|
||||
parser.add_argument('-g', nargs='*', action='append')
|
||||
|
||||
args, _ = parser.parse_known_args(argv)
|
||||
|
||||
opts = ''
|
||||
|
||||
if args.isystem:
|
||||
opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
|
||||
if args.iquote:
|
||||
opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
|
||||
if args.g:
|
||||
opts += ' -g' + ' -g'.join(sum(args.g, []))
|
||||
if args.sysroot:
|
||||
opts += ' --sysroot ' + args.sysroot[0]
|
||||
|
||||
return opts
|
||||
|
||||
def GetNvccOptions(argv):
|
||||
"""Collect the -nvcc_options values from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
|
||||
Returns:
|
||||
The string that can be passed directly to nvcc.
|
||||
"""
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-nvcc_options', nargs='*', action='append')
|
||||
|
||||
args, _ = parser.parse_known_args(argv)
|
||||
|
||||
if args.nvcc_options:
|
||||
return ' '.join(['--'+a for a in sum(args.nvcc_options, [])])
|
||||
return ''
|
||||
|
||||
|
||||
def StripAndTransformNvccOptions(argv):
|
||||
"""Strips the -nvcc_options values from argv and transforms define-macros.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
|
||||
Returns:
|
||||
A list of strings that can be passed directly to gcudacc.
|
||||
"""
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-nvcc_options', nargs='*', action='store')
|
||||
args, leftover = parser.parse_known_args(argv)
|
||||
if args.nvcc_options:
|
||||
for option in args.nvcc_options:
|
||||
(flag, _, value) = option.partition('=')
|
||||
if 'define-macro' in flag:
|
||||
leftover.append('-D' + value)
|
||||
return leftover
|
||||
|
||||
|
||||
def InvokeGcudacc(argv, gcudacc_version, gcudacc_flags, log=False):
|
||||
"""Call gcudacc with arguments assembled from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
gcudacc_version: The version of gcudacc; this is a subdirectory name under
|
||||
the gcudacc bin/ directory.
|
||||
gcudacc_flags: A list of extra arguments passed just for gcudacc.
|
||||
log: True if logging is requested.
|
||||
|
||||
Returns:
|
||||
The return value of calling os.system('gcudacc ' + args)
|
||||
"""
|
||||
|
||||
gcudacc_cmd = os.path.join(GCUDACC_PATH_BASE, gcudacc_version, 'gcudacc.par')
|
||||
gcudacc_cmd = (
|
||||
gcudacc_cmd +
|
||||
' --google_host_compiler={0} '.format(LLVM_HOST_COMPILER_PATH) +
|
||||
' '.join(sum(gcudacc_flags, [])) +
|
||||
' -- ' +
|
||||
' '.join(StripAndTransformNvccOptions(argv)))
|
||||
if log: Log(gcudacc_cmd)
|
||||
return os.system(gcudacc_cmd)
|
||||
|
||||
|
||||
def InvokeNvcc(argv, log=False):
|
||||
"""Call nvcc with arguments assembled from argv.
|
||||
|
||||
Args:
|
||||
argv: A list of strings, possibly the argv passed to main().
|
||||
log: True if logging is requested.
|
||||
|
||||
Returns:
|
||||
The return value of calling os.system('nvcc ' + args)
|
||||
"""
|
||||
|
||||
host_compiler_options = GetHostCompilerOptions(argv)
|
||||
nvcc_compiler_options = GetNvccOptions(argv)
|
||||
opt_option = GetOptionValue(argv, 'O')
|
||||
m_options = GetOptionValue(argv, 'm')
|
||||
m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
|
||||
include_options = GetOptionValue(argv, 'I')
|
||||
out_file = GetOptionValue(argv, 'o')
|
||||
depfiles = GetOptionValue(argv, 'MF')
|
||||
defines = GetOptionValue(argv, 'D')
|
||||
defines = ''.join([' -D' + define for define in defines])
|
||||
undefines = GetOptionValue(argv, 'U')
|
||||
undefines = ''.join([' -U' + define for define in undefines])
|
||||
std_options = GetOptionValue(argv, 'std')
|
||||
# currently only c++11 is supported by Cuda 7.0 std argument
|
||||
nvcc_allowed_std_options = ["c++11"]
|
||||
std_options = ''.join([' -std=' + define
|
||||
for define in std_options if define in nvcc_allowed_std_options])
|
||||
|
||||
# The list of source files get passed after the -c option. I don't know of
|
||||
# any other reliable way to just get the list of source files to be compiled.
|
||||
src_files = GetOptionValue(argv, 'c')
|
||||
|
||||
if len(src_files) == 0:
|
||||
return 1
|
||||
if len(out_file) != 1:
|
||||
return 1
|
||||
|
||||
opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
|
||||
else ' -g -G')
|
||||
|
||||
includes = (' -I ' + ' -I '.join(include_options)
|
||||
if len(include_options) > 0
|
||||
else '')
|
||||
|
||||
# Unfortunately, there are other options that have -c prefix too.
|
||||
# So allowing only those look like C/C++ files.
|
||||
src_files = [f for f in src_files if
|
||||
re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)]
|
||||
srcs = ' '.join(src_files)
|
||||
out = ' -o ' + out_file[0]
|
||||
|
||||
supported_cuda_compute_capabilities = [ %{cuda_compute_capabilities} ]
|
||||
nvccopts = ''
|
||||
for capability in supported_cuda_compute_capabilities:
|
||||
capability = capability.replace('.', '')
|
||||
nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s,compute_%s\" ' % (
|
||||
capability, capability, capability)
|
||||
nvccopts += ' ' + nvcc_compiler_options
|
||||
nvccopts += undefines
|
||||
nvccopts += defines
|
||||
nvccopts += std_options
|
||||
nvccopts += m_options
|
||||
|
||||
if depfiles:
|
||||
# Generate the dependency file
|
||||
depfile = depfiles[0]
|
||||
cmd = (NVCC_PATH + ' ' + nvccopts +
|
||||
' --compiler-options "' + host_compiler_options + '"' +
|
||||
' --compiler-bindir=' + GCC_HOST_COMPILER_PATH +
|
||||
' -I .' +
|
||||
' -x cu ' + includes + ' ' + srcs + ' -M -o ' + depfile)
|
||||
if log: Log(cmd)
|
||||
exit_status = os.system(cmd)
|
||||
if exit_status != 0:
|
||||
return exit_status
|
||||
|
||||
cmd = (NVCC_PATH + ' ' + nvccopts +
|
||||
' --compiler-options "' + host_compiler_options + ' -fPIC"' +
|
||||
' --compiler-bindir=' + GCC_HOST_COMPILER_PATH +
|
||||
' -I .' +
|
||||
' -x cu ' + opt + includes + ' -c ' + srcs + out)
|
||||
|
||||
# TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
|
||||
# Need to investigate and fix.
|
||||
cmd = 'PATH=' + PREFIX_DIR + ' ' + cmd
|
||||
if log: Log(cmd)
|
||||
return os.system(cmd)
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('-x', nargs=1)
|
||||
parser.add_argument('--cuda_log', action='store_true')
|
||||
parser.add_argument('--use_gcudacc', action='store_true')
|
||||
parser.add_argument('--gcudacc_version', action='store', default='v8')
|
||||
parser.add_argument('--gcudacc_flag', nargs='*', action='append', default=[])
|
||||
args, leftover = parser.parse_known_args(sys.argv[1:])
|
||||
|
||||
if args.x and args.x[0] == 'cuda':
|
||||
if args.cuda_log: Log('-x cuda')
|
||||
leftover = [pipes.quote(s) for s in leftover]
|
||||
if args.use_gcudacc:
|
||||
if args.cuda_log: Log('using gcudacc')
|
||||
return InvokeGcudacc(argv=leftover,
|
||||
gcudacc_version=args.gcudacc_version,
|
||||
gcudacc_flags=args.gcudacc_flag,
|
||||
log=args.cuda_log)
|
||||
if args.cuda_log: Log('using nvcc')
|
||||
return InvokeNvcc(leftover, log=args.cuda_log)
|
||||
|
||||
# Strip our flags before passing through to the CPU compiler for files which
|
||||
# are not -x cuda. We can't just pass 'leftover' because it also strips -x.
|
||||
# We not only want to pass -x to the CPU compiler, but also keep it in its
|
||||
# relative location in the argv list (the compiler is actually sensitive to
|
||||
# this).
|
||||
cpu_compiler_flags = [flag for flag in sys.argv[1:]
|
||||
if not flag.startswith(('--cuda_log',
|
||||
'--use_gcudacc',
|
||||
'--gcudacc_version',
|
||||
'--gcudacc_flag'))]
|
||||
if args.use_gcudacc:
|
||||
# This macro is defined for TUs that are not marked with "-x cuda" but are
|
||||
# built as part of a -config=cuda --use_gcudacc compilation. They are
|
||||
# compiled with the default CPU compiler. Since the objects built from
|
||||
# these TUs are later linked with objects that come from gcudacc, some
|
||||
# parts of the code need to be marked for these special cases. For example,
|
||||
# some types have to be defined similarly for gcudacc-compiled TUs and
|
||||
# default CPU compiler-compiled TUs linked with them, but differently when
|
||||
# nvcc is used.
|
||||
# TODO(eliben): rename to a more descriptive name.
|
||||
cpu_compiler_flags.append('-D__GCUDACC_HOST__')
|
||||
|
||||
return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
224
third_party/gpus/cuda/BUILD
vendored
224
third_party/gpus/cuda/BUILD
vendored
@ -1,224 +0,0 @@
|
||||
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
|
||||
|
||||
load("//third_party/gpus/cuda:build_defs.bzl", "if_cuda")
|
||||
load("platform", "cuda_library_path")
|
||||
load("platform", "cuda_static_library_path")
|
||||
load("platform", "cudnn_library_path")
|
||||
load("platform", "cupti_library_path")
|
||||
load("platform", "readlink_command")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
config_setting(
|
||||
name = "using_gcudacc",
|
||||
values = {
|
||||
"define": "using_cuda_gcudacc=true",
|
||||
},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "using_nvcc",
|
||||
values = {
|
||||
"define": "using_cuda_nvcc=true",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "using_clang",
|
||||
values = {
|
||||
"define": "using_cuda_clang=true",
|
||||
},
|
||||
)
|
||||
|
||||
# Equivalent to using_clang && -c opt.
|
||||
config_setting(
|
||||
name = "using_clang_opt",
|
||||
values = {
|
||||
"define": "using_cuda_clang=true",
|
||||
"compilation_mode": "opt",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "darwin",
|
||||
values = {"cpu": "darwin"},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cuda_headers",
|
||||
hdrs = glob([
|
||||
"**/*.h",
|
||||
]),
|
||||
includes = [
|
||||
".",
|
||||
"include",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cudart_static",
|
||||
srcs = [
|
||||
cuda_static_library_path("cudart"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkopts = [
|
||||
"-ldl",
|
||||
"-lpthread",
|
||||
] + select({
|
||||
"//tensorflow:darwin": [],
|
||||
"//conditions:default": ["-lrt"],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cudart",
|
||||
srcs = [
|
||||
cuda_library_path("cudart"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("cudart"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cublas",
|
||||
srcs = [
|
||||
cuda_library_path("cublas"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("cublas"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cudnn",
|
||||
srcs = [
|
||||
cudnn_library_path(),
|
||||
],
|
||||
data = [
|
||||
cudnn_library_path(),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cufft",
|
||||
srcs = [
|
||||
cuda_library_path("cufft"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("cufft"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cuda",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":cublas",
|
||||
":cuda_headers",
|
||||
":cudart",
|
||||
":cudnn",
|
||||
":cufft",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cupti_headers",
|
||||
hdrs = glob([
|
||||
"**/*.h",
|
||||
]),
|
||||
includes = [
|
||||
".",
|
||||
"extras/CUPTI/include/",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cupti_dsos",
|
||||
data = [
|
||||
cupti_library_path(),
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
# TODO(opensource): for now, we have to invoke the cuda_config.sh manually in the source tree.
|
||||
# This rule checks if Cuda libraries in the source tree has been properly configured.
|
||||
# The output list makes bazel runs this rule first if the Cuda files are missing.
|
||||
# This gives us an opportunity to check and print a meaningful error message.
|
||||
# But we will need to create the output file list to make bazel happy in a successful run.
|
||||
genrule(
|
||||
name = "cuda_check",
|
||||
srcs = [
|
||||
"cuda.config",
|
||||
"cuda_config.sh",
|
||||
],
|
||||
outs = [
|
||||
"include/cuda.h",
|
||||
"include/cublas.h",
|
||||
"include/cudnn.h",
|
||||
"extras/CUPTI/include/cupti.h",
|
||||
cuda_static_library_path("cudart"),
|
||||
cuda_library_path("cublas"),
|
||||
cudnn_library_path(),
|
||||
cuda_library_path("cudart"),
|
||||
cuda_library_path("cufft"),
|
||||
cupti_library_path(),
|
||||
],
|
||||
cmd = if_cuda(
|
||||
# Under cuda config, create all the symbolic links to the actual cuda files
|
||||
"OUTPUTDIR=`{} -f $(@D)/../../..`; cd `dirname $(location :cuda_config.sh)`; OUTPUTDIR=$$OUTPUTDIR ./cuda_config.sh --check;".format(readlink_command()),
|
||||
|
||||
# Under non-cuda config, create all dummy files to make the build go through
|
||||
";".join([
|
||||
"mkdir -p $(@D)/include",
|
||||
"mkdir -p $(@D)/lib64",
|
||||
"mkdir -p $(@D)/extras/CUPTI/include",
|
||||
"mkdir -p $(@D)/extras/CUPTI/lib64",
|
||||
"touch $(@D)/include/cuda.h",
|
||||
"touch $(@D)/include/cublas.h",
|
||||
"touch $(@D)/include/cudnn.h",
|
||||
"touch $(@D)/extras/CUPTI/include/cupti.h",
|
||||
"touch $(@D)/{}".format(cuda_static_library_path("cudart")),
|
||||
"touch $(@D)/{}".format(cuda_library_path("cublas")),
|
||||
"touch $(@D)/{}".format(cudnn_library_path()),
|
||||
"touch $(@D)/{}".format(cuda_library_path("cudart")),
|
||||
"touch $(@D)/{}".format(cuda_library_path("cufft")),
|
||||
"touch $(@D)/{}".format(cupti_library_path()),
|
||||
]),
|
||||
),
|
||||
local = 1,
|
||||
)
|
||||
|
||||
genrule(
|
||||
name = "cuda_config_check",
|
||||
outs = [
|
||||
"cuda.config",
|
||||
],
|
||||
cmd = if_cuda(
|
||||
# Under cuda config, create the symbolic link to the actual cuda.config
|
||||
"configfile=$(location :cuda.config); ln -sf `{} -f $${{configfile#*/*/*/}}` $(@D)/;".format(readlink_command()),
|
||||
|
||||
# Under non-cuda config, create the dummy file
|
||||
";".join([
|
||||
"touch $(@D)/cuda.config",
|
||||
]),
|
||||
),
|
||||
local = 1,
|
||||
)
|
172
third_party/gpus/cuda/BUILD.tpl
vendored
Normal file
172
third_party/gpus/cuda/BUILD.tpl
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like
|
||||
|
||||
load("@local_config_cuda//cuda:platform.bzl", "cuda_library_path")
|
||||
load("@local_config_cuda//cuda:platform.bzl", "cuda_static_library_path")
|
||||
load("@local_config_cuda//cuda:platform.bzl", "cudnn_library_path")
|
||||
load("@local_config_cuda//cuda:platform.bzl", "cupti_library_path")
|
||||
load("@local_config_cuda//cuda:platform.bzl", "readlink_command")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
config_setting(
|
||||
name = "using_gcudacc",
|
||||
values = {
|
||||
"define": "using_cuda_gcudacc=true",
|
||||
},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "using_nvcc",
|
||||
values = {
|
||||
"define": "using_cuda_nvcc=true",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "using_clang",
|
||||
values = {
|
||||
"define": "using_cuda_clang=true",
|
||||
},
|
||||
)
|
||||
|
||||
# Equivalent to using_clang && -c opt.
|
||||
config_setting(
|
||||
name = "using_clang_opt",
|
||||
values = {
|
||||
"define": "using_cuda_clang=true",
|
||||
"compilation_mode": "opt",
|
||||
},
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "darwin",
|
||||
values = {"cpu": "darwin"},
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cuda_headers",
|
||||
hdrs = glob([
|
||||
"**/*.h",
|
||||
]),
|
||||
includes = [
|
||||
".",
|
||||
"include",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cudart_static",
|
||||
srcs = [
|
||||
cuda_static_library_path("cudart"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkopts = [
|
||||
"-ldl",
|
||||
"-lpthread",
|
||||
] + select({
|
||||
"@//tensorflow:darwin": [],
|
||||
"//conditions:default": ["-lrt"],
|
||||
}),
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cudart",
|
||||
srcs = [
|
||||
cuda_library_path("cudart"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("cudart"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cublas",
|
||||
srcs = [
|
||||
cuda_library_path("cublas"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("cublas"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cudnn",
|
||||
srcs = [
|
||||
cudnn_library_path(),
|
||||
],
|
||||
data = [
|
||||
cudnn_library_path(),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cufft",
|
||||
srcs = [
|
||||
cuda_library_path("cufft"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("cufft"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "curand",
|
||||
srcs = [
|
||||
cuda_library_path("curand"),
|
||||
],
|
||||
data = [
|
||||
cuda_library_path("curand"),
|
||||
],
|
||||
includes = ["include/"],
|
||||
linkstatic = 1,
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cuda",
|
||||
deps = [
|
||||
":cuda_headers",
|
||||
":cudart",
|
||||
":cublas",
|
||||
":cudnn",
|
||||
":cufft",
|
||||
":curand",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cupti_headers",
|
||||
hdrs = glob([
|
||||
"**/*.h",
|
||||
]),
|
||||
includes = [
|
||||
".",
|
||||
"extras/CUPTI/include/",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cupti_dsos",
|
||||
data = [
|
||||
cupti_library_path(),
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
14
third_party/gpus/cuda/build_defs.bzl.tpl
vendored
Normal file
14
third_party/gpus/cuda/build_defs.bzl.tpl
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
# Macros for building CUDA code.
|
||||
|
||||
def if_cuda(if_true, if_false = []):
|
||||
"""Shorthand for select()'ing on whether we're building with CUDA.
|
||||
|
||||
Returns a select statement which evaluates to if_true if we're building
|
||||
with CUDA enabled. Otherwise, the select statement evaluates to if_false.
|
||||
|
||||
"""
|
||||
return select({
|
||||
"@local_config_cuda//cuda:using_nvcc": if_true,
|
||||
"@local_config_cuda//cuda:using_gcudacc": if_true,
|
||||
"//conditions:default": if_false
|
||||
})
|
24
third_party/gpus/cuda/cuda_config.h.tpl
vendored
Normal file
24
third_party/gpus/cuda/cuda_config.h.tpl
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef CUDA_CUDA_CONFIG_H_
|
||||
#define CUDA_CUDA_CONFIG_H_
|
||||
|
||||
#define TF_CUDA_CAPABILITIES %{cuda_compute_capabilities}
|
||||
|
||||
#define TF_CUDA_VERSION "%{cuda_version}"
|
||||
#define TF_CUDNN_VERSION "%{cudnn_version}"
|
||||
|
||||
#endif // CUDA_CUDA_CONFIG_H_
|
57
third_party/gpus/cuda/platform.bzl.tpl
vendored
Normal file
57
third_party/gpus/cuda/platform.bzl.tpl
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
CUDA_VERSION = "%{cuda_version}"
|
||||
CUDNN_VERSION = "%{cudnn_version}"
|
||||
PLATFORM = "%{platform}"
|
||||
|
||||
def cuda_sdk_version():
|
||||
return CUDA_VERSION
|
||||
|
||||
def cudnn_sdk_version():
|
||||
return CUDNN_VERSION
|
||||
|
||||
def cuda_library_path(name, version = cuda_sdk_version()):
|
||||
if PLATFORM == "Darwin":
|
||||
if not version:
|
||||
return "lib/lib{}.dylib".format(name)
|
||||
else:
|
||||
return "lib/lib{}.{}.dylib".format(name, version)
|
||||
else:
|
||||
if not version:
|
||||
return "lib64/lib{}.so".format(name)
|
||||
else:
|
||||
return "lib64/lib{}.so.{}".format(name, version)
|
||||
|
||||
def cuda_static_library_path(name):
|
||||
if PLATFORM == "Darwin":
|
||||
return "lib/lib{}_static.a".format(name)
|
||||
else:
|
||||
return "lib64/lib{}_static.a".format(name)
|
||||
|
||||
def cudnn_library_path(version = cudnn_sdk_version()):
|
||||
if PLATFORM == "Darwin":
|
||||
if not version:
|
||||
return "lib/libcudnn.dylib"
|
||||
else:
|
||||
return "lib/libcudnn.{}.dylib".format(version)
|
||||
else:
|
||||
if not version:
|
||||
return "lib64/libcudnn.so"
|
||||
else:
|
||||
return "lib64/libcudnn.so.{}".format(version)
|
||||
|
||||
def cupti_library_path(version = cuda_sdk_version()):
|
||||
if PLATFORM == "Darwin":
|
||||
if not version:
|
||||
return "extras/CUPTI/lib/libcupti.dylib"
|
||||
else:
|
||||
return "extras/CUPTI/lib/libcupti.{}.dylib".format(version)
|
||||
else:
|
||||
if not version:
|
||||
return "extras/CUPTI/lib64/libcupti.so"
|
||||
else:
|
||||
return "extras/CUPTI/lib64/libcupti.so.{}".format(version)
|
||||
|
||||
def readlink_command():
|
||||
if PLATFORM == "Darwin":
|
||||
return "greadlink"
|
||||
else:
|
||||
return "readlink"
|
423
third_party/gpus/cuda_configure.bzl
vendored
Normal file
423
third_party/gpus/cuda_configure.bzl
vendored
Normal file
@ -0,0 +1,423 @@
|
||||
# -*- Python -*-
|
||||
"""Repository rule for CUDA autoconfiguration.
|
||||
|
||||
`cuda_configure` depends on the following environment variables:
|
||||
|
||||
* `ENABLE_CUDA`: Whether to enable building with CUDA.
|
||||
* `CC`: The GCC host compiler path
|
||||
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
||||
`/usr/local/cuda`.
|
||||
* `CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
||||
use the system default.
|
||||
* `CUDNN_VERSION`: The version of the cuDNN library.
|
||||
* `CUDNN_INSTALL_PATH`: The path to the cuDNN library. Default is
|
||||
`/usr/local/cuda`.
|
||||
* `CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
||||
`3.5,5.2`.
|
||||
"""
|
||||
|
||||
|
||||
_DEFAULT_CUDA_VERSION = ""
|
||||
_DEFAULT_CUDNN_VERSION = ""
|
||||
_DEFAULT_CUDA_TOOLKIT_PATH = "/usr/local/cuda"
|
||||
_DEFAULT_CUDNN_INSTALL_PATH = "/usr/local/cuda"
|
||||
_DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
|
||||
|
||||
|
||||
# TODO(dzc): Once these functions have been factored out of Bazel's
|
||||
# cc_configure.bzl, load them from @bazel_tools instead.
|
||||
# BEGIN cc_configure common functions.
|
||||
def find_cc(repository_ctx):
|
||||
"""Find the C++ compiler."""
|
||||
cc_name = "gcc"
|
||||
if "CC" in repository_ctx.os.environ:
|
||||
cc_name = repository_ctx.os.environ["CC"].strip()
|
||||
if not cc_name:
|
||||
cc_name = "gcc"
|
||||
if cc_name.startswith("/"):
|
||||
# Absolute path, maybe we should make this suported by our which function.
|
||||
return cc_name
|
||||
cc = repository_ctx.which(cc_name)
|
||||
if cc == None:
|
||||
fail(
|
||||
"Cannot find gcc, either correct your path or set the CC" +
|
||||
" environment variable")
|
||||
return cc
|
||||
|
||||
|
||||
_INC_DIR_MARKER_BEGIN = "#include <...>"
|
||||
|
||||
|
||||
# OSX add " (framework directory)" at the end of line, strip it.
|
||||
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
|
||||
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
|
||||
def _cxx_inc_convert(path):
|
||||
"""Convert path returned by cc -E xc++ in a complete path."""
|
||||
path = path.strip()
|
||||
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
|
||||
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
|
||||
return path
|
||||
|
||||
|
||||
def get_cxx_inc_directories(repository_ctx, cc):
|
||||
"""Compute the list of default C++ include directories."""
|
||||
result = repository_ctx.execute([cc, "-E", "-xc++", "-", "-v"])
|
||||
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
|
||||
if index1 == -1:
|
||||
return []
|
||||
index1 = result.stderr.find("\n", index1)
|
||||
if index1 == -1:
|
||||
return []
|
||||
index2 = result.stderr.rfind("\n ")
|
||||
if index2 == -1 or index2 < index1:
|
||||
return []
|
||||
index2 = result.stderr.find("\n", index2 + 1)
|
||||
if index2 == -1:
|
||||
inc_dirs = result.stderr[index1 + 1:]
|
||||
else:
|
||||
inc_dirs = result.stderr[index1 + 1:index2].strip()
|
||||
|
||||
return [repository_ctx.path(_cxx_inc_convert(p))
|
||||
for p in inc_dirs.split("\n")]
|
||||
|
||||
# END cc_configure common functions (see TODO above).
|
||||
|
||||
|
||||
def _enable_cuda(repository_ctx):
|
||||
if "TF_NEED_CUDA" in repository_ctx.os.environ:
|
||||
enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip()
|
||||
return enable_cuda == "1"
|
||||
return False
|
||||
|
||||
|
||||
def _cuda_toolkit_path(repository_ctx):
|
||||
"""Finds the cuda toolkit directory."""
|
||||
cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH
|
||||
if "CUDA_TOOLKIT_PATH" in repository_ctx.os.environ:
|
||||
cuda_toolkit_path = repository_ctx.os.environ["CUDA_TOOLKIT_PATH"].strip()
|
||||
if not repository_ctx.path(cuda_toolkit_path).exists:
|
||||
fail("Cannot find cuda toolkit path.")
|
||||
return cuda_toolkit_path
|
||||
|
||||
|
||||
def _cudnn_install_basedir(repository_ctx):
|
||||
"""Finds the cudnn install directory."""
|
||||
cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH
|
||||
if "CUDNN_INSTALL_PATH" in repository_ctx.os.environ:
|
||||
cudnn_install_path = repository_ctx.os.environ["CUDNN_INSTALL_PATH"].strip()
|
||||
if not repository_ctx.path(cudnn_install_path).exists:
|
||||
fail("Cannot find cudnn install path.")
|
||||
return cudnn_install_path
|
||||
|
||||
|
||||
def _cuda_version(repository_ctx):
|
||||
"""Detects the cuda version."""
|
||||
if "CUDA_VERSION" in repository_ctx.os.environ:
|
||||
return repository_ctx.os.environ["CUDA_VERSION"].strip()
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def _cudnn_version(repository_ctx):
|
||||
"""Detects the cudnn version."""
|
||||
if "CUDNN_VERSION" in repository_ctx.os.environ:
|
||||
return repository_ctx.os.environ["CUDNN_VERSION"].strip()
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def _compute_capabilities(repository_ctx):
|
||||
"""Returns a list of strings representing cuda compute capabilities."""
|
||||
if "CUDA_COMPUTE_CAPABILITIES" not in repository_ctx.os.environ:
|
||||
return _DEFAULT_CUDA_COMPUTE_CAPABILITIES
|
||||
capabilities_str = repository_ctx.os.environ["CUDA_COMPUTE_CAPABILITIES"]
|
||||
capabilities = capabilities_str.split(",")
|
||||
for capability in capabilities:
|
||||
# Workaround for Skylark's lack of support for regex. This check should
|
||||
# be equivalent to checking:
|
||||
# if re.match("[0-9]+.[0-9]+", capability) == None:
|
||||
parts = capability.split(".")
|
||||
if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit():
|
||||
fail("Invalid compute capability: %s" % capability)
|
||||
return capabilities
|
||||
|
||||
|
||||
def _cpu_value(repository_ctx):
|
||||
result = repository_ctx.execute(["uname", "-s"])
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def _cuda_symlink_files(cpu_value, cuda_version, cudnn_version):
|
||||
"""Returns a struct containing platform-specific paths.
|
||||
|
||||
Args:
|
||||
cpu_value: The string representing the host OS.
|
||||
cuda_version: The cuda version as returned by _cuda_version
|
||||
cudnn_version: The cudnn version as returned by _cudnn_version
|
||||
"""
|
||||
cuda_ext = ".%s" % cuda_version if cuda_version else ""
|
||||
cudnn_ext = ".%s" % cudnn_version if cudnn_version else ""
|
||||
if cpu_value == "Linux":
|
||||
return struct(
|
||||
cuda_lib_path = "lib64",
|
||||
cuda_rt_lib = "lib64/libcudart.so%s" % cuda_ext,
|
||||
cuda_rt_lib_static = "lib64/libcudart_static.a",
|
||||
cuda_blas_lib = "lib64/libcublas.so%s" % cuda_ext,
|
||||
cuda_dnn_lib = "lib64/libcudnn.so%s" % cudnn_ext,
|
||||
cuda_dnn_lib_alt = "libcudnn.so%s" % cudnn_ext,
|
||||
cuda_rand_lib = "lib64/libcurand.so%s" % cuda_ext,
|
||||
cuda_fft_lib = "lib64/libcufft.so%s" % cuda_ext,
|
||||
cuda_cupti_lib = "extras/CUPTI/lib64/libcupti.so%s" % cuda_ext)
|
||||
elif cpu_value == "Darwin":
|
||||
return struct(
|
||||
cuda_lib_path = "lib",
|
||||
cuda_rt_lib = "lib/libcudart%s.dylib" % cuda_ext,
|
||||
cuda_rt_lib_static = "lib/libcudart_static.a",
|
||||
cuda_blas_lib = "lib/libcublas%s.dylib" % cuda_ext,
|
||||
cuda_dnn_lib = "lib/libcudnn%s.dylib" % cudnn_ext,
|
||||
cuda_dnn_lib_alt = "libcudnn%s.dylib" % cudnn_ext,
|
||||
cuda_rand_lib = "lib/libcurand%s.dylib" % cuda_ext,
|
||||
cuda_fft_lib = "lib/libcufft%s.dylib" % cuda_ext,
|
||||
cuda_cupti_lib = "extras/CUPTI/lib/libcupti%s.dylib" % cuda_ext)
|
||||
else:
|
||||
fail("Not supported CPU value %s" % cpu_value)
|
||||
|
||||
|
||||
def _check_lib(repository_ctx, cuda_toolkit_path, cuda_lib):
|
||||
"""Checks if cuda_lib exists under cuda_toolkit_path or fail if it doesn't.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
cuda_toolkit_path: The cuda toolkit directory containing the cuda libraries.
|
||||
cuda_lib: The library to look for under cuda_toolkit_path.
|
||||
"""
|
||||
lib_path = cuda_toolkit_path + "/" + cuda_lib
|
||||
if not repository_ctx.path(lib_path).exists:
|
||||
fail("Cannot find %s" % lib_path)
|
||||
|
||||
|
||||
def _check_dir(repository_ctx, directory):
|
||||
"""Checks whether the directory exists and fail if it does not.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
directory: The directory to check the existence of.
|
||||
"""
|
||||
if not repository_ctx.path(directory).exists:
|
||||
fail("Cannot find dir: %s" % directory)
|
||||
|
||||
|
||||
def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir):
|
||||
"""Returns the path to the directory containing cudnn.h
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
cudnn_install_basedir: The cudnn install directory as returned by
|
||||
_cudnn_install_basedir.
|
||||
|
||||
Returns:
|
||||
The path of the directory containing the cudnn header.
|
||||
"""
|
||||
if repository_ctx.path(cudnn_install_basedir + "/cudnn.h").exists:
|
||||
return cudnn_install_basedir
|
||||
if repository_ctx.path(cudnn_install_basedir + "/include/cudnn.h").exists:
|
||||
return cudnn_install_basedir + "/include"
|
||||
if repository_ctx.path("/usr/include/cudnn.h").exists:
|
||||
return "/usr/include"
|
||||
fail("Cannot find cudnn.h under %s" % cudnn_install_basedir)
|
||||
|
||||
|
||||
def _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir, symlink_files):
|
||||
"""Returns the path to the directory containing libcudnn
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
cudnn_install_basedir: The cudnn install dir as returned by
|
||||
_cudnn_install_basedir.
|
||||
symlink_files: The symlink files as returned by _cuda_symlink_files.
|
||||
|
||||
Returns:
|
||||
The path of the directory containing the cudnn libraries.
|
||||
"""
|
||||
lib_dir = cudnn_install_basedir + "/" + symlink_files.cuda_dnn_lib
|
||||
if repository_ctx.path(lib_dir).exists:
|
||||
return lib_dir
|
||||
alt_lib_dir = cudnn_install_basedir + "/" + symlink_files.cuda_dnn_lib_alt
|
||||
if repository_ctx.path(alt_lib_dir).exists:
|
||||
return alt_lib_dir
|
||||
|
||||
fail("Cannot find %s or %s under %s" %
|
||||
(symlink_files.cuda_dnn_lib, symlink_files.cuda_dnn_lib_alt,
|
||||
cudnn_install_basedir))
|
||||
|
||||
|
||||
def _tpl(repository_ctx, tpl, substitutions={}, out=None):
|
||||
if not out:
|
||||
out = tpl.replace(":", "/")
|
||||
repository_ctx.template(
|
||||
out,
|
||||
Label("//third_party/gpus/%s.tpl" % tpl),
|
||||
substitutions)
|
||||
|
||||
|
||||
def _file(repository_ctx, label):
|
||||
repository_ctx.template(
|
||||
label.replace(":", "/"),
|
||||
Label("//third_party/gpus/%s.tpl" % label),
|
||||
{})
|
||||
|
||||
|
||||
def _create_dummy_repository(repository_ctx):
|
||||
cpu_value = _cpu_value(repository_ctx)
|
||||
symlink_files = _cuda_symlink_files(cpu_value, _DEFAULT_CUDA_VERSION,
|
||||
_DEFAULT_CUDNN_VERSION)
|
||||
|
||||
# Set up BUILD file for cuda/.
|
||||
_file(repository_ctx, "cuda:BUILD")
|
||||
_file(repository_ctx, "cuda:build_defs.bzl")
|
||||
_tpl(repository_ctx, "cuda:platform.bzl",
|
||||
{
|
||||
"%{cuda_version}": _DEFAULT_CUDA_VERSION,
|
||||
"%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
|
||||
"%{platform}": cpu_value,
|
||||
})
|
||||
|
||||
# Create dummy files for the CUDA toolkit since they are still required by
|
||||
# tensorflow/core/platform/default/build_config:cuda.
|
||||
repository_ctx.file("cuda/include/cuda.h", "")
|
||||
repository_ctx.file("cuda/include/cublas.h", "")
|
||||
repository_ctx.file("cuda/include/cudnn.h", "")
|
||||
repository_ctx.file("cuda/extras/CUPTI/include/cupti.h", "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_rt_lib, "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_rt_lib_static, "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_blas_lib, "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_dnn_lib, "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_rand_lib, "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_fft_lib, "")
|
||||
repository_ctx.file("cuda/%s" % symlink_files.cuda_cupti_lib, "")
|
||||
|
||||
# Set up cuda_config.h, which is used by
|
||||
# tensorflow/stream_executor/dso_loader.cc.
|
||||
_tpl(repository_ctx, "cuda:cuda_config.h",
|
||||
{
|
||||
"%{cuda_version}": _DEFAULT_CUDA_VERSION,
|
||||
"%{cudnn_version}": _DEFAULT_CUDNN_VERSION,
|
||||
"%{cuda_compute_capabilities}": ",".join([
|
||||
"CudaVersion(\"%s\")" % c
|
||||
for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES]),
|
||||
})
|
||||
|
||||
|
||||
def _symlink_dir(repository_ctx, src_dir, dest_dir):
|
||||
"""Symlinks all the files in a directory.
|
||||
|
||||
Args:
|
||||
repository_ctx: The repository context.
|
||||
src_dir: The source directory.
|
||||
dest_dir: The destination directory to create the symlinks in.
|
||||
"""
|
||||
files = repository_ctx.path(src_dir).readdir()
|
||||
for src_file in files:
|
||||
repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
|
||||
|
||||
|
||||
def _create_cuda_repository(repository_ctx):
|
||||
"""Creates the repository containing files set up to build with CUDA."""
|
||||
cuda_toolkit_path = _cuda_toolkit_path(repository_ctx)
|
||||
cuda_version = _cuda_version(repository_ctx)
|
||||
cudnn_install_basedir = _cudnn_install_basedir(repository_ctx)
|
||||
cudnn_version = _cudnn_version(repository_ctx)
|
||||
compute_capabilities = _compute_capabilities(repository_ctx)
|
||||
|
||||
cpu_value = _cpu_value(repository_ctx)
|
||||
symlink_files = _cuda_symlink_files(cpu_value, cuda_version, cudnn_version)
|
||||
_check_lib(repository_ctx, cuda_toolkit_path, symlink_files.cuda_rt_lib)
|
||||
_check_lib(repository_ctx, cuda_toolkit_path, symlink_files.cuda_cupti_lib)
|
||||
_check_dir(repository_ctx, cudnn_install_basedir)
|
||||
|
||||
cudnn_header_dir = _find_cudnn_header_dir(repository_ctx,
|
||||
cudnn_install_basedir)
|
||||
cudnn_lib_path = _find_cudnn_lib_path(repository_ctx, cudnn_install_basedir,
|
||||
symlink_files)
|
||||
|
||||
# Set up symbolic links for the cuda toolkit. We link at the individual file
|
||||
# level not at the directory level. This is because the external library may
|
||||
# have a different file layout from our desired structure.
|
||||
_symlink_dir(repository_ctx, cuda_toolkit_path + "/include", "cuda/include")
|
||||
_symlink_dir(repository_ctx,
|
||||
cuda_toolkit_path + "/" + symlink_files.cuda_lib_path,
|
||||
"cuda/" + symlink_files.cuda_lib_path)
|
||||
_symlink_dir(repository_ctx, cuda_toolkit_path + "/bin", "cuda/bin")
|
||||
_symlink_dir(repository_ctx, cuda_toolkit_path + "/nvvm", "cuda/nvvm")
|
||||
_symlink_dir(repository_ctx, cuda_toolkit_path + "/extras/CUPTI/include",
|
||||
"cuda/extras/CUPTI/include")
|
||||
repository_ctx.symlink(cuda_toolkit_path + "/" + symlink_files.cuda_cupti_lib,
|
||||
"cuda/" + symlink_files.cuda_cupti_lib)
|
||||
|
||||
# Set up the symbolic links for cudnn if cudnn was was not installed to
|
||||
# CUDA_TOOLKIT_PATH.
|
||||
if not repository_ctx.path("cuda/include/cudnn.h").exists:
|
||||
repository_ctx.symlink(cudnn_header_dir + "/cudnn.h",
|
||||
"cuda/include/cudnn.h")
|
||||
if not repository_ctx.path("cuda/" + symlink_files.cuda_dnn_lib).exists:
|
||||
repository_ctx.symlink(cudnn_lib_path, "cuda/" + symlink_files.cuda_dnn_lib)
|
||||
|
||||
# Set up BUILD file for cuda/
|
||||
_file(repository_ctx, "cuda:BUILD")
|
||||
_file(repository_ctx, "cuda:build_defs.bzl")
|
||||
_tpl(repository_ctx, "cuda:platform.bzl",
|
||||
{
|
||||
"%{cuda_version}": cuda_version,
|
||||
"%{cudnn_version}": cudnn_version,
|
||||
"%{platform}": cpu_value,
|
||||
})
|
||||
|
||||
# Set up crosstool/
|
||||
_file(repository_ctx, "crosstool:BUILD")
|
||||
_tpl(repository_ctx, "crosstool:CROSSTOOL",
|
||||
{
|
||||
"%{cuda_version}": ("-%s" % cuda_version) if cuda_version else "",
|
||||
})
|
||||
_tpl(repository_ctx,
|
||||
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
||||
{
|
||||
"%{cpu_compiler}": str(find_cc(repository_ctx)),
|
||||
"%{gcc_host_compiler_path}": str(find_cc(repository_ctx)),
|
||||
"%{cuda_compute_capabilities}": ", ".join(
|
||||
["\"%s\"" % c for c in compute_capabilities]),
|
||||
})
|
||||
|
||||
# Set up cuda_config.h, which is used by
|
||||
# tensorflow/stream_executor/dso_loader.cc.
|
||||
_tpl(repository_ctx, "cuda:cuda_config.h",
|
||||
{
|
||||
"%{cuda_version}": cuda_version,
|
||||
"%{cudnn_version}": cudnn_version,
|
||||
"%{cuda_compute_capabilities}": ",".join(
|
||||
["CudaVersion(\"%s\")" % c for c in compute_capabilities]),
|
||||
})
|
||||
|
||||
|
||||
def _cuda_autoconf_impl(repository_ctx):
|
||||
"""Implementation of the cuda_autoconf repository rule."""
|
||||
if not _enable_cuda(repository_ctx):
|
||||
_create_dummy_repository(repository_ctx)
|
||||
else:
|
||||
_create_cuda_repository(repository_ctx)
|
||||
|
||||
|
||||
cuda_configure = repository_rule(
|
||||
implementation = _cuda_autoconf_impl,
|
||||
local = True,
|
||||
)
|
||||
"""Detects and configures the local CUDA toolchain.
|
||||
|
||||
Add the following to your WORKSPACE FILE:
|
||||
|
||||
```python
|
||||
cuda_configure(name = "local_config_cuda")
|
||||
```
|
||||
|
||||
Args:
|
||||
name: A unique name for this workspace rule.
|
||||
"""
|
@ -1,4 +1,4 @@
|
||||
build:cuda --crosstool_top=//third_party/gpus/crosstool
|
||||
build:cuda --crosstool_top=@local_config_cuda//crosstool
|
||||
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
||||
|
||||
build --force_python=py$PYTHON_MAJOR_VERSION
|
||||
|
Loading…
Reference in New Issue
Block a user