Add cuda_clang build configuration that allows to use clang as a CUDA compiler.
Change: 151705528
This commit is contained in:
parent
6e5f92ffc7
commit
8d393ea2fa
65
configure
vendored
65
configure
vendored
@ -38,7 +38,7 @@ function is_windows() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
function bazel_clean_and_fetch() {
|
function bazel_fetch() {
|
||||||
if [ -z "$TF_BAZEL_TARGETS" ]; then
|
if [ -z "$TF_BAZEL_TARGETS" ]; then
|
||||||
bazel fetch "//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
|
bazel fetch "//tensorflow/... -//tensorflow/contrib/nccl/... -//tensorflow/examples/android/..."
|
||||||
else
|
else
|
||||||
@ -279,18 +279,40 @@ while [ "$TF_NEED_CUDA" == "" ]; do
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
sed_hyphen_i -e "/--action_env TF_NEED_CUDA/d" .bazelrc
|
||||||
|
sed_hyphen_i -e "/--action_env CUD/d" .bazelrc
|
||||||
|
sed_hyphen_i -e "/--action_env GCC_HOST/d" .bazelrc
|
||||||
|
sed_hyphen_i -e "/--action_env TF_CUD/d" .bazelrc
|
||||||
|
sed_hyphen_i -e "/--action_env CLANG_CUDA/d" .bazelrc
|
||||||
|
|
||||||
export TF_NEED_CUDA
|
export TF_NEED_CUDA
|
||||||
|
echo "build --action_env TF_NEED_CUDA=$TF_NEED_CUDA" >>.bazelrc
|
||||||
|
|
||||||
export TF_NEED_OPENCL
|
export TF_NEED_OPENCL
|
||||||
|
|
||||||
if [[ "$TF_NEED_CUDA" == "0" ]] && [[ "$TF_NEED_OPENCL" == "0" ]]; then
|
if [[ "$TF_NEED_CUDA" == "0" ]] && [[ "$TF_NEED_OPENCL" == "0" ]]; then
|
||||||
echo "Configuration finished"
|
echo "Configuration finished"
|
||||||
bazel_clean_and_fetch
|
bazel_fetch
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$TF_NEED_CUDA" == "1" ]; then
|
if [ "$TF_NEED_CUDA" == "1" ]; then
|
||||||
|
while [[ "$TF_CUDA_CLANG" == "" ]]; do
|
||||||
|
read -p "Do you want to use clang as CUDA compiler? [y/N] " INPUT
|
||||||
|
case $INPUT in
|
||||||
|
[Yy]* ) echo "Clang will be used as CUDA compiler"; TF_CUDA_CLANG=1;;
|
||||||
|
[Nn]* ) echo "nvcc will be used as CUDA compiler"; TF_CUDA_CLANG=0;;
|
||||||
|
"" ) echo "nvcc will be used as CUDA compiler"; TF_CUDA_CLANG=0;;
|
||||||
|
* ) echo "Invalid selection: " $INPUT;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
export TF_CUDA_CLANG
|
||||||
|
echo "build --action_env TF_CUDA_CLANG=$TF_CUDA_CLANG" >>.bazelrc
|
||||||
|
|
||||||
# Set up which gcc nvcc should use as the host compiler
|
# Set up which gcc nvcc should use as the host compiler
|
||||||
# No need to set this on Windows
|
# No need to set this on Windows
|
||||||
while ! is_windows && true; do
|
while [[ "$TF_CUDA_CLANG" != "1" ]] && ! is_windows && true; do
|
||||||
fromuser=""
|
fromuser=""
|
||||||
if [ -z "$GCC_HOST_COMPILER_PATH" ]; then
|
if [ -z "$GCC_HOST_COMPILER_PATH" ]; then
|
||||||
default_gcc_host_compiler_path=$(which gcc || true)
|
default_gcc_host_compiler_path=$(which gcc || true)
|
||||||
@ -302,6 +324,7 @@ while ! is_windows && true; do
|
|||||||
fi
|
fi
|
||||||
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
if [ -e "$GCC_HOST_COMPILER_PATH" ]; then
|
||||||
export GCC_HOST_COMPILER_PATH
|
export GCC_HOST_COMPILER_PATH
|
||||||
|
echo "build --action_env GCC_HOST_COMPILER_PATH=\"$GCC_HOST_COMPILER_PATH\"" >>.bazelrc
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
echo "Invalid gcc path. ${GCC_HOST_COMPILER_PATH} cannot be found" 1>&2
|
||||||
@ -312,6 +335,30 @@ while ! is_windows && true; do
|
|||||||
# Retry
|
# Retry
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Set up which clang we should use as the cuda / host compiler.
|
||||||
|
while [[ "$TF_CUDA_CLANG" == "1" ]] && true; do
|
||||||
|
fromuser=""
|
||||||
|
if [ -z "$CLANG_CUDA_COMPILER_PATH" ]; then
|
||||||
|
default_clang_host_compiler_path=$(which clang || true)
|
||||||
|
read -p "Please specify which clang should be used as device and host compiler. [Default is $default_clang_host_compiler_path]: " CLANG_CUDA_COMPILER_PATH
|
||||||
|
fromuser="1"
|
||||||
|
if [ -z "$CLANG_CUDA_COMPILER_PATH" ]; then
|
||||||
|
CLANG_CUDA_COMPILER_PATH="$default_clang_host_compiler_path"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [ -e "$CLANG_CUDA_COMPILER_PATH" ]; then
|
||||||
|
export CLANG_CUDA_COMPILER_PATH
|
||||||
|
echo "build --action_env CLANG_CUDA_COMPILER_PATH=\"$CLANG_CUDA_COMPILER_PATH\"" >>.bazelrc
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "Invalid clang path. ${CLANG_CUDA_COMPILER_PATH} cannot be found" 1>&2
|
||||||
|
if [ -z "$fromuser" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
CLANG_CUDA_COMPILER_PATH=""
|
||||||
|
# Retry
|
||||||
|
done
|
||||||
|
|
||||||
# Find out where the CUDA toolkit is installed
|
# Find out where the CUDA toolkit is installed
|
||||||
while true; do
|
while true; do
|
||||||
# Configure the Cuda SDK version to use.
|
# Configure the Cuda SDK version to use.
|
||||||
@ -352,7 +399,10 @@ while true; do
|
|||||||
|
|
||||||
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
if [ -e "${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH}" ]; then
|
||||||
export CUDA_TOOLKIT_PATH
|
export CUDA_TOOLKIT_PATH
|
||||||
|
echo "build --action_env CUDA_TOOLKIT_PATH=\"$CUDA_TOOLKIT_PATH\"" >>.bazelrc
|
||||||
|
|
||||||
export TF_CUDA_VERSION
|
export TF_CUDA_VERSION
|
||||||
|
echo "build --action_env TF_CUDA_VERSION=$TF_CUDA_VERSION" >>.bazelrc
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
echo "Invalid path to CUDA $TF_CUDA_VERSION toolkit. ${CUDA_TOOLKIT_PATH}/${CUDA_RT_LIB_PATH} cannot be found"
|
||||||
@ -404,7 +454,10 @@ while true; do
|
|||||||
|
|
||||||
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
if [ -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_ALT_PATH}" -o -e "$CUDNN_INSTALL_PATH/${CUDA_DNN_LIB_PATH}" ]; then
|
||||||
export TF_CUDNN_VERSION
|
export TF_CUDNN_VERSION
|
||||||
|
echo "build --action_env TF_CUDNN_VERSION=$TF_CUDNN_VERSION" >>.bazelrc
|
||||||
|
|
||||||
export CUDNN_INSTALL_PATH
|
export CUDNN_INSTALL_PATH
|
||||||
|
echo "build --action_env CUDNN_INSTALL_PATH=\"$CUDNN_INSTALL_PATH\"" >>.bazelrc
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -417,7 +470,10 @@ while true; do
|
|||||||
CUDNN_PATH_FROM_LDCONFIG="$($LDCONFIG_BIN -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
CUDNN_PATH_FROM_LDCONFIG="$($LDCONFIG_BIN -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')"
|
||||||
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then
|
||||||
export TF_CUDNN_VERSION
|
export TF_CUDNN_VERSION
|
||||||
|
echo "build --action_env TF_CUDNN_VERSION=$TF_CUDNN_VERSION" >>.bazelrc
|
||||||
|
|
||||||
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
export CUDNN_INSTALL_PATH="$(dirname ${CUDNN_PATH_FROM_LDCONFIG})"
|
||||||
|
echo "build --action_env CUDNN_INSTALL_PATH=\"$CUDNN_INSTALL_PATH\"" >>.bazelrc
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
@ -469,6 +525,7 @@ EOF
|
|||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
export TF_CUDA_COMPUTE_CAPABILITIES
|
export TF_CUDA_COMPUTE_CAPABILITIES
|
||||||
|
echo "build --action_env TF_CUDA_COMPUTE_CAPABILITIES=$TF_CUDA_COMPUTE_CAPABILITIES" >>.bazelrc
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
TF_CUDA_COMPUTE_CAPABILITIES=""
|
TF_CUDA_COMPUTE_CAPABILITIES=""
|
||||||
@ -572,6 +629,6 @@ done
|
|||||||
# end of if "$TF_NEED_OPENCL" == "1"
|
# end of if "$TF_NEED_OPENCL" == "1"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bazel_clean_and_fetch
|
bazel_fetch
|
||||||
|
|
||||||
echo "Configuration finished"
|
echo "Configuration finished"
|
||||||
|
@ -50,17 +50,54 @@ def _temp_workaround_http_archive_impl(repo_ctx):
|
|||||||
}, False)
|
}, False)
|
||||||
repo_ctx.download_and_extract(repo_ctx.attr.urls, "", repo_ctx.attr.sha256,
|
repo_ctx.download_and_extract(repo_ctx.attr.urls, "", repo_ctx.attr.sha256,
|
||||||
"", repo_ctx.attr.strip_prefix)
|
"", repo_ctx.attr.strip_prefix)
|
||||||
|
if repo_ctx.attr.patch_file != None:
|
||||||
|
_apply_patch(repo_ctx, repo_ctx.attr.patch_file)
|
||||||
|
|
||||||
temp_workaround_http_archive = repository_rule(
|
temp_workaround_http_archive = repository_rule(
|
||||||
implementation=_temp_workaround_http_archive_impl,
|
implementation=_temp_workaround_http_archive_impl,
|
||||||
attrs = {
|
attrs = {
|
||||||
"build_file": attr.label(),
|
"build_file": attr.label(),
|
||||||
"repository": attr.string(),
|
"repository": attr.string(),
|
||||||
|
"patch_file": attr.label(default = None),
|
||||||
"urls": attr.string_list(default = []),
|
"urls": attr.string_list(default = []),
|
||||||
"sha256": attr.string(default = ""),
|
"sha256": attr.string(default = ""),
|
||||||
"strip_prefix": attr.string(default = ""),
|
"strip_prefix": attr.string(default = ""),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Executes specified command with arguments and calls 'fail' if it exited with non-zero code
|
||||||
|
def _execute_and_check_ret_code(repo_ctx, cmd_and_args):
|
||||||
|
result = repo_ctx.execute(cmd_and_args)
|
||||||
|
if result.return_code != 0:
|
||||||
|
fail(("Non-zero return code({1}) when executing '{0}':\n" +
|
||||||
|
"Stdout: {2}\n" +
|
||||||
|
"Stderr: {3}").format(" ".join(cmd_and_args),
|
||||||
|
result.return_code, result.stdout, result.stderr))
|
||||||
|
|
||||||
|
# Apply a patch_file to the repository root directory
|
||||||
|
# Runs 'patch -p1'
|
||||||
|
def _apply_patch(repo_ctx, patch_file):
|
||||||
|
_execute_and_check_ret_code(repo_ctx, ["patch", "-p1",
|
||||||
|
"-d", repo_ctx.path("."),
|
||||||
|
"-i", repo_ctx.path(patch_file)])
|
||||||
|
|
||||||
|
# Download the repository and apply a patch to its root
|
||||||
|
def _patched_http_archive_impl(repo_ctx):
|
||||||
|
repo_ctx.download_and_extract(repo_ctx.attr.urls,
|
||||||
|
sha256 = repo_ctx.attr.sha256,
|
||||||
|
stripPrefix = repo_ctx.attr.strip_prefix)
|
||||||
|
_apply_patch(repo_ctx, repo_ctx.attr.patch_file)
|
||||||
|
|
||||||
|
patched_http_archive = repository_rule(
|
||||||
|
implementation = _patched_http_archive_impl,
|
||||||
|
attrs = {
|
||||||
|
"patch_file": attr.label(),
|
||||||
|
"build_file": attr.label(),
|
||||||
|
"repository": attr.string(),
|
||||||
|
"urls": attr.string_list(default = []),
|
||||||
|
"sha256": attr.string(default = ""),
|
||||||
|
"strip_prefix": attr.string(default = ""),
|
||||||
|
})
|
||||||
|
|
||||||
# If TensorFlow is linked as a submodule.
|
# If TensorFlow is linked as a submodule.
|
||||||
# path_prefix and tf_repo_name are no longer used.
|
# path_prefix and tf_repo_name are no longer used.
|
||||||
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||||
@ -78,11 +115,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
name = "eigen_archive",
|
name = "eigen_archive",
|
||||||
urls = [
|
urls = [
|
||||||
"http://bazel-mirror.storage.googleapis.com/bitbucket.org/eigen/eigen/get/9c6361787292.tar.gz",
|
"http://bazel-mirror.storage.googleapis.com/bitbucket.org/eigen/eigen/get/deff8b280204.tar.gz",
|
||||||
"https://bitbucket.org/eigen/eigen/get/9c6361787292.tar.gz",
|
"https://bitbucket.org/eigen/eigen/get/deff8b280204.tar.gz",
|
||||||
],
|
],
|
||||||
sha256 = "e6ec2502a5d82dd5df0b9b16e7697f5fccb81c322d0be8e3492969eecb66badd",
|
sha256 = "a39834683eb5bdb9a7434f0ab3621d2cbc3b07e8002db6de101e45ec536723eb",
|
||||||
strip_prefix = "eigen-eigen-9c6361787292",
|
strip_prefix = "eigen-eigen-deff8b280204",
|
||||||
build_file = str(Label("//third_party:eigen.BUILD")),
|
build_file = str(Label("//third_party:eigen.BUILD")),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -255,7 +292,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
actual = "@six_archive//:six",
|
actual = "@six_archive//:six",
|
||||||
)
|
)
|
||||||
|
|
||||||
native.http_archive(
|
patched_http_archive(
|
||||||
name = "protobuf",
|
name = "protobuf",
|
||||||
urls = [
|
urls = [
|
||||||
"http://bazel-mirror.storage.googleapis.com/github.com/google/protobuf/archive/2b7430d96aeff2bb624c8d52182ff5e4b9f7f18a.tar.gz",
|
"http://bazel-mirror.storage.googleapis.com/github.com/google/protobuf/archive/2b7430d96aeff2bb624c8d52182ff5e4b9f7f18a.tar.gz",
|
||||||
@ -263,6 +300,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
],
|
],
|
||||||
sha256 = "e5d3d4e227a0f7afb8745df049bbd4d55474b158ca5aaa2a0e31099af24be1d0",
|
sha256 = "e5d3d4e227a0f7afb8745df049bbd4d55474b158ca5aaa2a0e31099af24be1d0",
|
||||||
strip_prefix = "protobuf-2b7430d96aeff2bb624c8d52182ff5e4b9f7f18a",
|
strip_prefix = "protobuf-2b7430d96aeff2bb624c8d52182ff5e4b9f7f18a",
|
||||||
|
# TODO: remove patching when tensorflow stops linking same protos into
|
||||||
|
# multiple shared libraries loaded in runtime by python.
|
||||||
|
# This patch fixes a runtime crash when tensorflow is compiled
|
||||||
|
# with clang -O2 on Linux (see https://github.com/tensorflow/tensorflow/issues/8394)
|
||||||
|
patch_file = str(Label("//third_party/protobuf:add_noinlines.patch")),
|
||||||
)
|
)
|
||||||
|
|
||||||
native.new_http_archive(
|
native.new_http_archive(
|
||||||
@ -452,7 +494,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
|||||||
],
|
],
|
||||||
sha256 = "6787f0eed88d52ee8e32956fa4947d92c139da469f1d8e311c307f27d641118e",
|
sha256 = "6787f0eed88d52ee8e32956fa4947d92c139da469f1d8e311c307f27d641118e",
|
||||||
strip_prefix = "nccl-024d1e267845f2ed06f3e2e42476d50f04a00ee6",
|
strip_prefix = "nccl-024d1e267845f2ed06f3e2e42476d50f04a00ee6",
|
||||||
build_file = str(Label("//third_party:nccl.BUILD")),
|
build_file = str(Label("//third_party/nccl:nccl.BUILD")),
|
||||||
|
# TODO: Remove patching after the fix is merged into nccl(see https://github.com/NVIDIA/nccl/pull/78)
|
||||||
|
patch_file = str(Label("//third_party/nccl:fix_clang_compilation.patch")),
|
||||||
repository = tf_repo_name,
|
repository = tf_repo_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
292
third_party/gpus/crosstool/CROSSTOOL_clang.tpl
vendored
Normal file
292
third_party/gpus/crosstool/CROSSTOOL_clang.tpl
vendored
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
major_version: "local"
|
||||||
|
minor_version: ""
|
||||||
|
default_target_cpu: "same_as_host"
|
||||||
|
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "k8"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "piii"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "arm"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "darwin"
|
||||||
|
toolchain_identifier: "local_darwin"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "ppc"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
|
||||||
|
toolchain {
|
||||||
|
abi_version: "local"
|
||||||
|
abi_libc_version: "local"
|
||||||
|
compiler: "compiler"
|
||||||
|
host_system_name: "local"
|
||||||
|
needsPic: true
|
||||||
|
target_libc: "local"
|
||||||
|
target_cpu: "local"
|
||||||
|
target_system_name: "local"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "c++11"
|
||||||
|
flag_set {
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
flag: "-std=c++11"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "stdlib"
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-executable"
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
flag_group {
|
||||||
|
flag: "-lstdc++"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "determinism"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||||
|
# compiler symbols.
|
||||||
|
flag: "-Wno-builtin-macro-redefined"
|
||||||
|
flag: "-D__DATE__=\"redacted\""
|
||||||
|
flag: "-D__TIMESTAMP__=\"redacted\""
|
||||||
|
flag: "-D__TIME__=\"redacted\""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "alwayslink"
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
action: "c++-link-executable"
|
||||||
|
flag_group {
|
||||||
|
flag: "-Wl,-no-as-needed"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# This feature will be enabled for builds that support pic by bazel.
|
||||||
|
feature {
|
||||||
|
name: "pic"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
expand_if_all_available: "pic"
|
||||||
|
flag: "-fPIC"
|
||||||
|
}
|
||||||
|
flag_group {
|
||||||
|
expand_if_none_available: "pic"
|
||||||
|
flag: "-fPIE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Security hardening on by default.
|
||||||
|
feature {
|
||||||
|
name: "hardening"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
|
||||||
|
# We need to undef it before redefining it as some distributions now
|
||||||
|
# have it enabled by default.
|
||||||
|
flag: "-U_FORTIFY_SOURCE"
|
||||||
|
flag: "-D_FORTIFY_SOURCE=1"
|
||||||
|
flag: "-fstack-protector"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
flag_group {
|
||||||
|
flag: "-Wl,-z,relro,-z,now"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-executable"
|
||||||
|
flag_group {
|
||||||
|
flag: "-pie"
|
||||||
|
flag: "-Wl,-z,relro,-z,now"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "warnings"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
# All warnings are enabled. Maybe enable -Werror as well?
|
||||||
|
flag: "-Wall"
|
||||||
|
# Some parts of the codebase set -Werror and hit this warning, so
|
||||||
|
# switch it off for now.
|
||||||
|
flag: "-Wno-invalid-partial-specialization"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Keep stack frames for debugging, even in opt mode.
|
||||||
|
feature {
|
||||||
|
name: "frame-pointer"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
flag: "-fno-omit-frame-pointer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "build-id"
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-executable"
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
flag_group {
|
||||||
|
# Stamp the binary with a unique identifier.
|
||||||
|
flag: "-Wl,--build-id=md5"
|
||||||
|
flag: "-Wl,--hash-style=gnu"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "no-canonical-prefixes"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
action: "c++-link-executable"
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
flag_group {
|
||||||
|
flag:"-no-canonical-prefixes"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "disable-assertions"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
flag: "-DNDEBUG"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "linker-bin-path"
|
||||||
|
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-executable"
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
flag_group {
|
||||||
|
flag: "-B/usr/bin/"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "common"
|
||||||
|
implies: "stdlib"
|
||||||
|
implies: "c++11"
|
||||||
|
implies: "determinism"
|
||||||
|
implies: "alwayslink"
|
||||||
|
implies: "hardening"
|
||||||
|
implies: "warnings"
|
||||||
|
implies: "frame-pointer"
|
||||||
|
implies: "build-id"
|
||||||
|
implies: "no-canonical-prefixes"
|
||||||
|
implies: "linker-bin-path"
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "opt"
|
||||||
|
implies: "common"
|
||||||
|
implies: "disable-assertions"
|
||||||
|
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
# No debug symbols.
|
||||||
|
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt
|
||||||
|
# or even generally? However, that can't happen here, as it requires
|
||||||
|
# special handling in Bazel.
|
||||||
|
flag: "-g0"
|
||||||
|
|
||||||
|
# Conservative choice for -O
|
||||||
|
# -O3 can increase binary size and even slow down the resulting binaries.
|
||||||
|
# Profile first and / or use FDO if you need better performance than this.
|
||||||
|
flag: "-O2"
|
||||||
|
|
||||||
|
# Removal of unused code and data at link time (can this increase binary size in some cases?).
|
||||||
|
flag: "-ffunction-sections"
|
||||||
|
flag: "-fdata-sections"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flag_set {
|
||||||
|
action: "c++-link-dynamic-library"
|
||||||
|
action: "c++-link-executable"
|
||||||
|
flag_group {
|
||||||
|
flag: "-Wl,--gc-sections"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "fastbuild"
|
||||||
|
implies: "common"
|
||||||
|
}
|
||||||
|
|
||||||
|
feature {
|
||||||
|
name: "dbg"
|
||||||
|
implies: "common"
|
||||||
|
flag_set {
|
||||||
|
action: "c-compile"
|
||||||
|
action: "c++-compile"
|
||||||
|
flag_group {
|
||||||
|
flag: "-g"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set clang as a C/C++ compiler.
|
||||||
|
tool_path { name: "gcc" path: "%{clang_path}" }
|
||||||
|
|
||||||
|
# Use the default system toolchain for everything else.
|
||||||
|
tool_path { name: "ar" path: "/usr/bin/ar" }
|
||||||
|
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||||
|
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||||
|
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||||
|
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||||
|
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||||
|
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||||
|
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||||
|
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||||
|
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||||
|
|
||||||
|
# Enabled dynamic linking.
|
||||||
|
linking_mode_flags { mode: DYNAMIC }
|
||||||
|
|
||||||
|
%{host_compiler_includes}
|
||||||
|
}
|
249
third_party/gpus/crosstool/CROSSTOOL_nvcc.tpl
vendored
Normal file
249
third_party/gpus/crosstool/CROSSTOOL_nvcc.tpl
vendored
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
major_version: "local"
|
||||||
|
minor_version: ""
|
||||||
|
default_target_cpu: "same_as_host"
|
||||||
|
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "k8"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "piii"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "arm"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "darwin"
|
||||||
|
toolchain_identifier: "local_darwin"
|
||||||
|
}
|
||||||
|
default_toolchain {
|
||||||
|
cpu: "ppc"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
}
|
||||||
|
|
||||||
|
toolchain {
|
||||||
|
abi_version: "local"
|
||||||
|
abi_libc_version: "local"
|
||||||
|
builtin_sysroot: ""
|
||||||
|
compiler: "compiler"
|
||||||
|
host_system_name: "local"
|
||||||
|
needsPic: true
|
||||||
|
supports_gold_linker: false
|
||||||
|
supports_incremental_linker: false
|
||||||
|
supports_fission: false
|
||||||
|
supports_interface_shared_objects: false
|
||||||
|
supports_normalizing_ar: false
|
||||||
|
supports_start_end_lib: false
|
||||||
|
supports_thin_archives: false
|
||||||
|
target_libc: "local"
|
||||||
|
target_cpu: "local"
|
||||||
|
target_system_name: "local"
|
||||||
|
toolchain_identifier: "local_linux"
|
||||||
|
|
||||||
|
tool_path { name: "ar" path: "/usr/bin/ar" }
|
||||||
|
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||||
|
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||||
|
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||||
|
# As part of the TensorFlow release, we place some cuda-related compilation
|
||||||
|
# files in @local_config_cuda//crosstool/clang/bin, and this relative
|
||||||
|
# path, combined with the rest of our Bazel configuration causes our
|
||||||
|
# compilation to use those files.
|
||||||
|
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
|
||||||
|
# Use "-std=c++11" for nvcc. For consistency, force both the host compiler
|
||||||
|
# and the device compiler to use "-std=c++11".
|
||||||
|
cxx_flag: "-std=c++11"
|
||||||
|
linker_flag: "-Wl,-no-as-needed"
|
||||||
|
linker_flag: "-lstdc++"
|
||||||
|
linker_flag: "-B/usr/bin/"
|
||||||
|
|
||||||
|
%{host_compiler_includes}
|
||||||
|
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||||
|
|
||||||
|
# C(++) compiles invoke the compiler (as that is the one knowing where
|
||||||
|
# to find libraries), but we provide LD so other rules can invoke the linker.
|
||||||
|
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||||
|
|
||||||
|
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||||
|
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||||
|
objcopy_embed_flag: "-I"
|
||||||
|
objcopy_embed_flag: "binary"
|
||||||
|
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||||
|
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||||
|
|
||||||
|
# Anticipated future default.
|
||||||
|
unfiltered_cxx_flag: "-no-canonical-prefixes"
|
||||||
|
|
||||||
|
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||||
|
# compiler symbols.
|
||||||
|
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
|
||||||
|
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
|
||||||
|
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
|
||||||
|
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
|
||||||
|
|
||||||
|
# Security hardening on by default.
|
||||||
|
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
|
||||||
|
# We need to undef it before redefining it as some distributions now have
|
||||||
|
# it enabled by default.
|
||||||
|
compiler_flag: "-U_FORTIFY_SOURCE"
|
||||||
|
compiler_flag: "-D_FORTIFY_SOURCE=1"
|
||||||
|
compiler_flag: "-fstack-protector"
|
||||||
|
compiler_flag: "-fPIE"
|
||||||
|
linker_flag: "-pie"
|
||||||
|
linker_flag: "-Wl,-z,relro,-z,now"
|
||||||
|
|
||||||
|
# Enable coloring even if there's no attached terminal. Bazel removes the
|
||||||
|
# escape sequences if --nocolor is specified. This isn't supported by gcc
|
||||||
|
# on Ubuntu 14.04.
|
||||||
|
# compiler_flag: "-fcolor-diagnostics"
|
||||||
|
|
||||||
|
# All warnings are enabled. Maybe enable -Werror as well?
|
||||||
|
compiler_flag: "-Wall"
|
||||||
|
# Enable a few more warnings that aren't part of -Wall.
|
||||||
|
compiler_flag: "-Wunused-but-set-parameter"
|
||||||
|
# But disable some that are problematic.
|
||||||
|
compiler_flag: "-Wno-free-nonheap-object" # has false positives
|
||||||
|
|
||||||
|
# Keep stack frames for debugging, even in opt mode.
|
||||||
|
compiler_flag: "-fno-omit-frame-pointer"
|
||||||
|
|
||||||
|
# Anticipated future default.
|
||||||
|
linker_flag: "-no-canonical-prefixes"
|
||||||
|
unfiltered_cxx_flag: "-fno-canonical-system-headers"
|
||||||
|
# Have gcc return the exit code from ld.
|
||||||
|
linker_flag: "-pass-exit-codes"
|
||||||
|
# Stamp the binary with a unique identifier.
|
||||||
|
linker_flag: "-Wl,--build-id=md5"
|
||||||
|
linker_flag: "-Wl,--hash-style=gnu"
|
||||||
|
# Gold linker only? Can we enable this by default?
|
||||||
|
# linker_flag: "-Wl,--warn-execstack"
|
||||||
|
# linker_flag: "-Wl,--detect-odr-violations"
|
||||||
|
|
||||||
|
# Include directory for cuda headers.
|
||||||
|
cxx_builtin_include_directory: "%{cuda_include_path}"
|
||||||
|
|
||||||
|
compilation_mode_flags {
|
||||||
|
mode: DBG
|
||||||
|
# Enable debug symbols.
|
||||||
|
compiler_flag: "-g"
|
||||||
|
}
|
||||||
|
compilation_mode_flags {
|
||||||
|
mode: OPT
|
||||||
|
|
||||||
|
# No debug symbols.
|
||||||
|
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or
|
||||||
|
# even generally? However, that can't happen here, as it requires special
|
||||||
|
# handling in Bazel.
|
||||||
|
compiler_flag: "-g0"
|
||||||
|
|
||||||
|
# Conservative choice for -O
|
||||||
|
# -O3 can increase binary size and even slow down the resulting binaries.
|
||||||
|
# Profile first and / or use FDO if you need better performance than this.
|
||||||
|
compiler_flag: "-O2"
|
||||||
|
|
||||||
|
# Disable assertions
|
||||||
|
compiler_flag: "-DNDEBUG"
|
||||||
|
|
||||||
|
# Removal of unused code and data at link time (can this increase binary size in some cases?).
|
||||||
|
compiler_flag: "-ffunction-sections"
|
||||||
|
compiler_flag: "-fdata-sections"
|
||||||
|
linker_flag: "-Wl,--gc-sections"
|
||||||
|
}
|
||||||
|
linking_mode_flags { mode: DYNAMIC }
|
||||||
|
}
|
||||||
|
|
||||||
|
toolchain {
|
||||||
|
abi_version: "local"
|
||||||
|
abi_libc_version: "local"
|
||||||
|
builtin_sysroot: ""
|
||||||
|
compiler: "compiler"
|
||||||
|
host_system_name: "local"
|
||||||
|
needsPic: true
|
||||||
|
target_libc: "macosx"
|
||||||
|
target_cpu: "darwin"
|
||||||
|
target_system_name: "local"
|
||||||
|
toolchain_identifier: "local_darwin"
|
||||||
|
|
||||||
|
tool_path { name: "ar" path: "/usr/bin/libtool" }
|
||||||
|
tool_path { name: "compat-ld" path: "/usr/bin/ld" }
|
||||||
|
tool_path { name: "cpp" path: "/usr/bin/cpp" }
|
||||||
|
tool_path { name: "dwp" path: "/usr/bin/dwp" }
|
||||||
|
tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_is_not_gcc" }
|
||||||
|
cxx_flag: "-std=c++11"
|
||||||
|
ar_flag: "-static"
|
||||||
|
ar_flag: "-s"
|
||||||
|
ar_flag: "-o"
|
||||||
|
linker_flag: "-lc++"
|
||||||
|
linker_flag: "-undefined"
|
||||||
|
linker_flag: "dynamic_lookup"
|
||||||
|
# TODO(ulfjack): This is wrong on so many levels. Figure out a way to auto-detect the proper
|
||||||
|
# setting from the local compiler, and also how to make incremental builds correct.
|
||||||
|
cxx_builtin_include_directory: "/"
|
||||||
|
tool_path { name: "gcov" path: "/usr/bin/gcov" }
|
||||||
|
tool_path { name: "ld" path: "/usr/bin/ld" }
|
||||||
|
tool_path { name: "nm" path: "/usr/bin/nm" }
|
||||||
|
tool_path { name: "objcopy" path: "/usr/bin/objcopy" }
|
||||||
|
objcopy_embed_flag: "-I"
|
||||||
|
objcopy_embed_flag: "binary"
|
||||||
|
tool_path { name: "objdump" path: "/usr/bin/objdump" }
|
||||||
|
tool_path { name: "strip" path: "/usr/bin/strip" }
|
||||||
|
|
||||||
|
# Anticipated future default.
|
||||||
|
unfiltered_cxx_flag: "-no-canonical-prefixes"
|
||||||
|
# Make C++ compilation deterministic. Use linkstamping instead of these
|
||||||
|
# compiler symbols.
|
||||||
|
unfiltered_cxx_flag: "-Wno-builtin-macro-redefined"
|
||||||
|
unfiltered_cxx_flag: "-D__DATE__=\"redacted\""
|
||||||
|
unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\""
|
||||||
|
unfiltered_cxx_flag: "-D__TIME__=\"redacted\""
|
||||||
|
|
||||||
|
# Security hardening on by default.
|
||||||
|
# Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases.
|
||||||
|
compiler_flag: "-D_FORTIFY_SOURCE=1"
|
||||||
|
compiler_flag: "-fstack-protector"
|
||||||
|
|
||||||
|
# Enable coloring even if there's no attached terminal. Bazel removes the
|
||||||
|
# escape sequences if --nocolor is specified.
|
||||||
|
compiler_flag: "-fcolor-diagnostics"
|
||||||
|
|
||||||
|
# All warnings are enabled. Maybe enable -Werror as well?
|
||||||
|
compiler_flag: "-Wall"
|
||||||
|
# Enable a few more warnings that aren't part of -Wall.
|
||||||
|
compiler_flag: "-Wthread-safety"
|
||||||
|
compiler_flag: "-Wself-assign"
|
||||||
|
|
||||||
|
# Keep stack frames for debugging, even in opt mode.
|
||||||
|
compiler_flag: "-fno-omit-frame-pointer"
|
||||||
|
|
||||||
|
# Anticipated future default.
|
||||||
|
linker_flag: "-no-canonical-prefixes"
|
||||||
|
|
||||||
|
# Include directory for cuda headers.
|
||||||
|
cxx_builtin_include_directory: "%{cuda_include_path}"
|
||||||
|
|
||||||
|
compilation_mode_flags {
|
||||||
|
mode: DBG
|
||||||
|
# Enable debug symbols.
|
||||||
|
compiler_flag: "-g"
|
||||||
|
}
|
||||||
|
compilation_mode_flags {
|
||||||
|
mode: OPT
|
||||||
|
# No debug symbols.
|
||||||
|
# Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or even generally?
|
||||||
|
# However, that can't happen here, as it requires special handling in Bazel.
|
||||||
|
compiler_flag: "-g0"
|
||||||
|
|
||||||
|
# Conservative choice for -O
|
||||||
|
# -O3 can increase binary size and even slow down the resulting binaries.
|
||||||
|
# Profile first and / or use FDO if you need better performance than this.
|
||||||
|
compiler_flag: "-O2"
|
||||||
|
|
||||||
|
# Disable assertions
|
||||||
|
compiler_flag: "-DNDEBUG"
|
||||||
|
|
||||||
|
# Removal of unused code and data at link time (can this increase binary size in some cases?).
|
||||||
|
compiler_flag: "-ffunction-sections"
|
||||||
|
compiler_flag: "-fdata-sections"
|
||||||
|
}
|
||||||
|
}
|
4
third_party/gpus/cuda/build_defs.bzl.tpl
vendored
4
third_party/gpus/cuda/build_defs.bzl.tpl
vendored
@ -8,12 +8,14 @@ def if_cuda(if_true, if_false = []):
|
|||||||
"""
|
"""
|
||||||
return select({
|
return select({
|
||||||
"@local_config_cuda//cuda:using_nvcc": if_true,
|
"@local_config_cuda//cuda:using_nvcc": if_true,
|
||||||
|
"@local_config_cuda//cuda:using_clang": if_true,
|
||||||
"//conditions:default": if_false
|
"//conditions:default": if_false
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def cuda_default_copts():
|
def cuda_default_copts():
|
||||||
"""Default options for all CUDA compilations."""
|
"""Default options for all CUDA compilations."""
|
||||||
return if_cuda(["-x", "cuda", "-DGOOGLE_CUDA=1"])
|
return if_cuda(["-x", "cuda", "-DGOOGLE_CUDA=1"] + %{cuda_extra_copts})
|
||||||
|
|
||||||
|
|
||||||
def cuda_is_configured():
|
def cuda_is_configured():
|
||||||
|
108
third_party/gpus/cuda_configure.bzl
vendored
108
third_party/gpus/cuda_configure.bzl
vendored
@ -5,6 +5,9 @@
|
|||||||
|
|
||||||
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
|
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
|
||||||
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
|
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
|
||||||
|
* `TF_CUDA_CLANG`: Wheter to use clang as a cuda compiler.
|
||||||
|
* `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
|
||||||
|
both host and device code compilation if TF_CUDA_CLANG is 1.
|
||||||
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
* `CUDA_TOOLKIT_PATH`: The path to the CUDA toolkit. Default is
|
||||||
`/usr/local/cuda`.
|
`/usr/local/cuda`.
|
||||||
* `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
* `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
||||||
@ -17,6 +20,7 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
|
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
|
||||||
|
_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
|
||||||
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
|
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
|
||||||
_TF_CUDA_VERSION = "TF_CUDA_VERSION"
|
_TF_CUDA_VERSION = "TF_CUDA_VERSION"
|
||||||
_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
|
_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
|
||||||
@ -35,19 +39,25 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"]
|
|||||||
# BEGIN cc_configure common functions.
|
# BEGIN cc_configure common functions.
|
||||||
def find_cc(repository_ctx):
|
def find_cc(repository_ctx):
|
||||||
"""Find the C++ compiler."""
|
"""Find the C++ compiler."""
|
||||||
cc_name = "gcc"
|
if _use_cuda_clang(repository_ctx):
|
||||||
if _GCC_HOST_COMPILER_PATH in repository_ctx.os.environ:
|
target_cc_name = "clang"
|
||||||
cc_name = repository_ctx.os.environ[_GCC_HOST_COMPILER_PATH].strip()
|
cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
|
||||||
if not cc_name:
|
else:
|
||||||
cc_name = "gcc"
|
target_cc_name = "gcc"
|
||||||
|
cc_path_envvar = _GCC_HOST_COMPILER_PATH
|
||||||
|
cc_name = target_cc_name
|
||||||
|
|
||||||
|
if cc_path_envvar in repository_ctx.os.environ:
|
||||||
|
cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip()
|
||||||
|
if cc_name_from_env:
|
||||||
|
cc_name = cc_name_from_env
|
||||||
if cc_name.startswith("/"):
|
if cc_name.startswith("/"):
|
||||||
# Absolute path, maybe we should make this suported by our which function.
|
# Absolute path, maybe we should make this suported by our which function.
|
||||||
return cc_name
|
return cc_name
|
||||||
cc = repository_ctx.which(cc_name)
|
cc = repository_ctx.which(cc_name)
|
||||||
if cc == None:
|
if cc == None:
|
||||||
fail(
|
fail(("Cannot find {}, either correct your path or set the {}" +
|
||||||
"Cannot find gcc, either correct your path or set the CC" +
|
" environment variable").format(target_cc_name, cc_path_envvar))
|
||||||
" environment variable")
|
|
||||||
return cc
|
return cc
|
||||||
|
|
||||||
|
|
||||||
@ -64,10 +74,17 @@ def _cxx_inc_convert(path):
|
|||||||
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
|
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
|
||||||
def get_cxx_inc_directories(repository_ctx, cc):
|
"""Compute the list of default C or C++ include directories."""
|
||||||
"""Compute the list of default C++ include directories."""
|
if lang_is_cpp:
|
||||||
result = repository_ctx.execute([cc, "-E", "-xc++", "-", "-v"])
|
lang = "c++"
|
||||||
|
else:
|
||||||
|
lang = "c"
|
||||||
|
# TODO: We pass -no-canonical-prefixes here to match the compiler flags,
|
||||||
|
# but in cuda_clang CROSSTOOL file that is a `feature` and we should
|
||||||
|
# handle the case when it's disabled and no flag is passed
|
||||||
|
result = repository_ctx.execute([cc, "-no-canonical-prefixes",
|
||||||
|
"-E", "-x" + lang, "-", "-v"])
|
||||||
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
|
index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
|
||||||
if index1 == -1:
|
if index1 == -1:
|
||||||
return []
|
return []
|
||||||
@ -86,6 +103,19 @@ def get_cxx_inc_directories(repository_ctx, cc):
|
|||||||
return [repository_ctx.path(_cxx_inc_convert(p))
|
return [repository_ctx.path(_cxx_inc_convert(p))
|
||||||
for p in inc_dirs.split("\n")]
|
for p in inc_dirs.split("\n")]
|
||||||
|
|
||||||
|
def get_cxx_inc_directories(repository_ctx, cc):
|
||||||
|
"""Compute the list of default C and C++ include directories."""
|
||||||
|
# For some reason `clang -xc` sometimes returns include paths that are
|
||||||
|
# different from the ones from `clang -xc++`. (Symlink and a dir)
|
||||||
|
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
|
||||||
|
includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
|
||||||
|
includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
|
||||||
|
|
||||||
|
includes_cpp_set = set(includes_cpp)
|
||||||
|
return includes_cpp + [inc for inc in includes_c
|
||||||
|
if inc not in includes_cpp_set]
|
||||||
|
|
||||||
|
|
||||||
def auto_configure_fail(msg):
|
def auto_configure_fail(msg):
|
||||||
"""Output failure message when auto configuration fails."""
|
"""Output failure message when auto configuration fails."""
|
||||||
red = "\033[0;31m"
|
red = "\033[0;31m"
|
||||||
@ -94,7 +124,7 @@ def auto_configure_fail(msg):
|
|||||||
# END cc_configure common functions (see TODO above).
|
# END cc_configure common functions (see TODO above).
|
||||||
|
|
||||||
|
|
||||||
def _gcc_host_compiler_includes(repository_ctx, cc):
|
def _host_compiler_includes(repository_ctx, cc):
|
||||||
"""Generates the cxx_builtin_include_directory entries for gcc inc dirs.
|
"""Generates the cxx_builtin_include_directory entries for gcc inc dirs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -645,7 +675,8 @@ def _create_dummy_repository(repository_ctx):
|
|||||||
# Set up BUILD file for cuda/.
|
# Set up BUILD file for cuda/.
|
||||||
_tpl(repository_ctx, "cuda:build_defs.bzl",
|
_tpl(repository_ctx, "cuda:build_defs.bzl",
|
||||||
{
|
{
|
||||||
"%{cuda_is_configured}": "False"
|
"%{cuda_is_configured}": "False",
|
||||||
|
"%{cuda_extra_copts}": "[]"
|
||||||
})
|
})
|
||||||
_tpl(repository_ctx, "cuda:BUILD",
|
_tpl(repository_ctx, "cuda:BUILD",
|
||||||
{
|
{
|
||||||
@ -730,6 +761,19 @@ def _symlink_dir(repository_ctx, src_dir, dest_dir):
|
|||||||
for src_file in files:
|
for src_file in files:
|
||||||
repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
|
repository_ctx.symlink(src_file, dest_dir + "/" + src_file.basename)
|
||||||
|
|
||||||
|
def _use_cuda_clang(repository_ctx):
|
||||||
|
if "TF_CUDA_CLANG" in repository_ctx.os.environ:
|
||||||
|
enable_cuda = repository_ctx.os.environ["TF_CUDA_CLANG"].strip()
|
||||||
|
return enable_cuda == "1"
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _compute_cuda_extra_copts(repository_ctx, cuda_config):
|
||||||
|
if _use_cuda_clang(repository_ctx):
|
||||||
|
capability_flags = ["--cuda-gpu-arch=sm_" + cap.replace(".", "") for cap in cuda_config.compute_capabilities]
|
||||||
|
else:
|
||||||
|
# Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc
|
||||||
|
capability_flags = []
|
||||||
|
return str(capability_flags)
|
||||||
|
|
||||||
def _create_cuda_repository(repository_ctx):
|
def _create_cuda_repository(repository_ctx):
|
||||||
"""Creates the repository containing files set up to build with CUDA."""
|
"""Creates the repository containing files set up to build with CUDA."""
|
||||||
@ -761,7 +805,9 @@ def _create_cuda_repository(repository_ctx):
|
|||||||
# Set up BUILD file for cuda/
|
# Set up BUILD file for cuda/
|
||||||
_tpl(repository_ctx, "cuda:build_defs.bzl",
|
_tpl(repository_ctx, "cuda:build_defs.bzl",
|
||||||
{
|
{
|
||||||
"%{cuda_is_configured}": "True"
|
"%{cuda_is_configured}": "True",
|
||||||
|
"%{cuda_extra_copts}": _compute_cuda_extra_copts(repository_ctx, cuda_config),
|
||||||
|
|
||||||
})
|
})
|
||||||
_tpl(repository_ctx, "cuda:BUILD",
|
_tpl(repository_ctx, "cuda:BUILD",
|
||||||
{
|
{
|
||||||
@ -787,21 +833,25 @@ def _create_cuda_repository(repository_ctx):
|
|||||||
# Set up crosstool/
|
# Set up crosstool/
|
||||||
_file(repository_ctx, "crosstool:BUILD")
|
_file(repository_ctx, "crosstool:BUILD")
|
||||||
cc = find_cc(repository_ctx)
|
cc = find_cc(repository_ctx)
|
||||||
gcc_host_compiler_includes = _gcc_host_compiler_includes(repository_ctx, cc)
|
host_compiler_includes = _host_compiler_includes(repository_ctx, cc)
|
||||||
_tpl(repository_ctx, "crosstool:CROSSTOOL",
|
cuda_defines = {
|
||||||
{
|
|
||||||
"%{cuda_include_path}": cuda_config.cuda_toolkit_path + '/include',
|
"%{cuda_include_path}": cuda_config.cuda_toolkit_path + '/include',
|
||||||
"%{gcc_host_compiler_includes}": gcc_host_compiler_includes,
|
"%{host_compiler_includes}": host_compiler_includes,
|
||||||
})
|
}
|
||||||
_tpl(repository_ctx,
|
if _use_cuda_clang(repository_ctx):
|
||||||
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
cuda_defines["%{clang_path}"] = cc
|
||||||
{
|
_tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL")
|
||||||
"%{cpu_compiler}": str(cc),
|
else:
|
||||||
"%{cuda_version}": cuda_config.cuda_version,
|
_tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL")
|
||||||
"%{gcc_host_compiler_path}": str(cc),
|
_tpl(repository_ctx,
|
||||||
"%{cuda_compute_capabilities}": ", ".join(
|
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
||||||
["\"%s\"" % c for c in cuda_config.compute_capabilities]),
|
{
|
||||||
})
|
"%{cpu_compiler}": str(cc),
|
||||||
|
"%{cuda_version}": cuda_config.cuda_version,
|
||||||
|
"%{gcc_host_compiler_path}": str(cc),
|
||||||
|
"%{cuda_compute_capabilities}": ", ".join(
|
||||||
|
["\"%s\"" % c for c in cuda_config.compute_capabilities]),
|
||||||
|
})
|
||||||
|
|
||||||
# Set up cuda_config.h, which is used by
|
# Set up cuda_config.h, which is used by
|
||||||
# tensorflow/stream_executor/dso_loader.cc.
|
# tensorflow/stream_executor/dso_loader.cc.
|
||||||
|
0
third_party/nccl/BUILD
vendored
Normal file
0
third_party/nccl/BUILD
vendored
Normal file
85
third_party/nccl/fix_clang_compilation.patch
vendored
Normal file
85
third_party/nccl/fix_clang_compilation.patch
vendored
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
From 8241cd7b6ed1425eeb88fd380090575978e358f4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ilya Biryukov <ibiryukov@google.com>
|
||||||
|
Date: Thu, 16 Mar 2017 12:01:11 +0100
|
||||||
|
Subject: [PATCH 1/1] Fix compilation error when compiling with 'clang -x
|
||||||
|
cuda'.
|
||||||
|
|
||||||
|
Functions vFetch and vStore are not found by ADL with clang,
|
||||||
|
so they need to be declared before usage in ReduceCopy.
|
||||||
|
---
|
||||||
|
src/common_kernel.h | 52 ++++++++++++++++++++++++++--------------------------
|
||||||
|
1 file changed, 26 insertions(+), 26 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/common_kernel.h b/src/common_kernel.h
|
||||||
|
index 28fbc85..cc71f8a 100644
|
||||||
|
--- a/src/common_kernel.h
|
||||||
|
+++ b/src/common_kernel.h
|
||||||
|
@@ -30,6 +30,32 @@
|
||||||
|
#define BAR(type, barid, nthreads) \
|
||||||
|
BAR_EXPAND(type, barid, ROUNDUP(nthreads, WARP_SIZE))
|
||||||
|
|
||||||
|
+template<typename T> inline __device__
|
||||||
|
+T vFetch(const volatile T* ptr) {
|
||||||
|
+ return *ptr;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#ifdef CUDA_HAS_HALF
|
||||||
|
+template<> inline __device__
|
||||||
|
+half vFetch<half>(const volatile half* ptr) {
|
||||||
|
+ half r;
|
||||||
|
+ r.x = ptr->x;
|
||||||
|
+ return r;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+template<typename T> inline __device__
|
||||||
|
+void vStore(volatile T* ptr, const T val) {
|
||||||
|
+ *ptr = val;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#ifdef CUDA_HAS_HALF
|
||||||
|
+template<> inline __device__
|
||||||
|
+void vStore<half>(volatile half* ptr, const half val) {
|
||||||
|
+ ptr->x = val.x;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
__device__ unsigned int spinct;
|
||||||
|
|
||||||
|
// Spin wait until func evaluates to true
|
||||||
|
@@ -225,32 +251,6 @@ __device__ inline volatile T* AlignUp(volatile T * ptr, size_t align) {
|
||||||
|
return reinterpret_cast<volatile T*>(ALIGNUP(ptrval, align));
|
||||||
|
}
|
||||||
|
|
||||||
|
-template<typename T> inline __device__
|
||||||
|
-T vFetch(const volatile T* ptr) {
|
||||||
|
- return *ptr;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-#ifdef CUDA_HAS_HALF
|
||||||
|
-template<> inline __device__
|
||||||
|
-half vFetch<half>(const volatile half* ptr) {
|
||||||
|
- half r;
|
||||||
|
- r.x = ptr->x;
|
||||||
|
- return r;
|
||||||
|
-}
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
-template<typename T> inline __device__
|
||||||
|
-void vStore(volatile T* ptr, const T val) {
|
||||||
|
- *ptr = val;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-#ifdef CUDA_HAS_HALF
|
||||||
|
-template<> inline __device__
|
||||||
|
-void vStore<half>(volatile half* ptr, const half val) {
|
||||||
|
- ptr->x = val.x;
|
||||||
|
-}
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
// Assumptions:
|
||||||
|
// - there is exactly 1 block
|
||||||
|
// - THREADS is the number of producer threads
|
||||||
|
--
|
||||||
|
2.12.0.367.g23dc2f6d3c-goog
|
||||||
|
|
66
third_party/nccl/nccl.BUILD
vendored
Normal file
66
third_party/nccl/nccl.BUILD
vendored
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# NVIDIA nccl
|
||||||
|
# A package of optimized primitives for collective multi-GPU communication.
|
||||||
|
|
||||||
|
licenses(["notice"]) # BSD
|
||||||
|
|
||||||
|
exports_files(["LICENSE.txt"])
|
||||||
|
|
||||||
|
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda")
|
||||||
|
|
||||||
|
SRCS = [
|
||||||
|
"src/all_gather.cu",
|
||||||
|
"src/all_reduce.cu",
|
||||||
|
"src/broadcast.cu",
|
||||||
|
"src/core.cu",
|
||||||
|
"src/libwrap.cu",
|
||||||
|
"src/reduce.cu",
|
||||||
|
"src/reduce_scatter.cu",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Copy .cu to .cu.cc so they can be in srcs of cc_library.
|
||||||
|
[
|
||||||
|
genrule(
|
||||||
|
name = "gen_" + src,
|
||||||
|
srcs = [src],
|
||||||
|
outs = [src + ".cc"],
|
||||||
|
cmd = "cp $(location " + src + ") $(location " + src + ".cc)",
|
||||||
|
)
|
||||||
|
for src in SRCS
|
||||||
|
]
|
||||||
|
|
||||||
|
SRCS_CU_CC = [src + ".cc" for src in SRCS]
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "nccl",
|
||||||
|
srcs = if_cuda(SRCS_CU_CC + glob(["src/*.h"])),
|
||||||
|
hdrs = if_cuda(["src/nccl.h"]),
|
||||||
|
copts = [
|
||||||
|
"-DCUDA_MAJOR=0",
|
||||||
|
"-DCUDA_MINOR=0",
|
||||||
|
"-DNCCL_MAJOR=0",
|
||||||
|
"-DNCCL_MINOR=0",
|
||||||
|
"-DNCCL_PATCH=0",
|
||||||
|
"-Iexternal/nccl_archive/src",
|
||||||
|
"-O3",
|
||||||
|
] + cuda_default_copts(),
|
||||||
|
linkopts = select({
|
||||||
|
"@%ws%//tensorflow:android": [
|
||||||
|
"-pie",
|
||||||
|
],
|
||||||
|
"@%ws%//tensorflow:darwin": [
|
||||||
|
"-Wl,-framework",
|
||||||
|
"-Wl,CoreFoundation",
|
||||||
|
"-Wl,-framework",
|
||||||
|
"-Wl,Security",
|
||||||
|
],
|
||||||
|
"@%ws%//tensorflow:ios": [],
|
||||||
|
"@%ws%//tensorflow:windows": [
|
||||||
|
"ws2_32.lib",
|
||||||
|
],
|
||||||
|
"//conditions:default": [
|
||||||
|
"-lrt",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = ["@local_config_cuda//cuda:cuda_headers"],
|
||||||
|
)
|
0
third_party/protobuf/BUILD
vendored
Normal file
0
third_party/protobuf/BUILD
vendored
Normal file
30
third_party/protobuf/add_noinlines.patch
vendored
Normal file
30
third_party/protobuf/add_noinlines.patch
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
diff -u -r a/src/google/protobuf/compiler/cpp/cpp_file.cc b/src/google/protobuf/compiler/cpp/cpp_file.cc
|
||||||
|
--- a/src/google/protobuf/compiler/cpp/cpp_file.cc 2017-02-10 23:55:34.000000000 +0100
|
||||||
|
+++ b/src/google/protobuf/compiler/cpp/cpp_file.cc 2017-03-21 13:41:46.931979154 +0100
|
||||||
|
@@ -557,7 +557,7 @@
|
||||||
|
" $metadata$, $enum_descriptors$, $service_descriptors$);\n"
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
- "void protobuf_AssignDescriptorsOnce() {\n"
|
||||||
|
+ "GOOGLE_ATTRIBUTE_NOINLINE void protobuf_AssignDescriptorsOnce() {\n"
|
||||||
|
" static GOOGLE_PROTOBUF_DECLARE_ONCE(once);\n"
|
||||||
|
" ::google::protobuf::GoogleOnceInit(&once, &protobuf_AssignDescriptors);\n"
|
||||||
|
"}\n"
|
||||||
|
@@ -656,7 +656,7 @@
|
||||||
|
printer->Print(
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
- "void InitDefaults() {\n"
|
||||||
|
+ "GOOGLE_ATTRIBUTE_NOINLINE void InitDefaults() {\n"
|
||||||
|
" static GOOGLE_PROTOBUF_DECLARE_ONCE(once);\n"
|
||||||
|
" ::google::protobuf::GoogleOnceInit(&once, &TableStruct::InitDefaultsImpl);\n"
|
||||||
|
"}\n");
|
||||||
|
@@ -737,7 +737,7 @@
|
||||||
|
printer->Print(
|
||||||
|
"}\n"
|
||||||
|
"\n"
|
||||||
|
- "void AddDescriptors() {\n"
|
||||||
|
+ "GOOGLE_ATTRIBUTE_NOINLINE void AddDescriptors() {\n"
|
||||||
|
" static GOOGLE_PROTOBUF_DECLARE_ONCE(once);\n"
|
||||||
|
" ::google::protobuf::GoogleOnceInit(&once, &AddDescriptorsImpl);\n"
|
||||||
|
"}\n");
|
@ -1,6 +1,11 @@
|
|||||||
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
|
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
|
||||||
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
||||||
|
|
||||||
|
build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
|
||||||
|
build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true
|
||||||
|
|
||||||
build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
build:win-cuda --define=using_cuda=true --define=using_cuda_nvcc=true
|
||||||
|
|
||||||
build:mkl --define=using_mkl=true
|
build:mkl --define=using_mkl=true
|
||||||
|
|
||||||
build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
|
build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
|
||||||
|
Loading…
Reference in New Issue
Block a user