1414 lines
50 KiB
Python
1414 lines
50 KiB
Python
"""Repository rule for CUDA autoconfiguration.
|
|
|
|
`cuda_configure` depends on the following environment variables:
|
|
|
|
* `TF_NEED_CUDA`: Whether to enable building with CUDA.
|
|
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
|
|
* `TF_CUDA_CLANG`: Whether to use clang as a cuda compiler.
|
|
* `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
|
|
both host and device code compilation if TF_CUDA_CLANG is 1.
|
|
* `TF_SYSROOT`: The sysroot to use when compiling.
|
|
* `TF_DOWNLOAD_CLANG`: Whether to download a recent release of clang
|
|
compiler and use it to build tensorflow. When this option is set
|
|
CLANG_CUDA_COMPILER_PATH is ignored.
|
|
* `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is
|
|
`/usr/local/cuda,usr/`.
|
|
* `CUDA_TOOLKIT_PATH` (deprecated): The path to the CUDA toolkit. Default is
|
|
`/usr/local/cuda`.
|
|
* `TF_CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then
|
|
use the system default.
|
|
* `TF_CUDNN_VERSION`: The version of the cuDNN library.
|
|
* `CUDNN_INSTALL_PATH` (deprecated): The path to the cuDNN library. Default is
|
|
`/usr/local/cuda`.
|
|
* `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is
|
|
`3.5,5.2`.
|
|
* `PYTHON_BIN_PATH`: The python binary path
|
|
"""
|
|
|
|
load("//third_party/clang_toolchain:download_clang.bzl", "download_clang")
|
|
load(
|
|
"@bazel_tools//tools/cpp:lib_cc_configure.bzl",
|
|
"escape_string",
|
|
"get_env_var",
|
|
)
|
|
load(
|
|
"@bazel_tools//tools/cpp:windows_cc_configure.bzl",
|
|
"find_msvc_tool",
|
|
"find_vc_path",
|
|
"setup_vc_env_vars",
|
|
)
|
|
load(
|
|
"//third_party/remote_config:common.bzl",
|
|
"config_repo_label",
|
|
"err_out",
|
|
"execute",
|
|
"get_bash_bin",
|
|
"get_cpu_value",
|
|
"get_host_environ",
|
|
"get_python_bin",
|
|
"is_windows",
|
|
"raw_exec",
|
|
"read_dir",
|
|
"realpath",
|
|
"which",
|
|
)
|
|
|
|
_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
|
|
_GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
|
|
_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
|
|
_TF_SYSROOT = "TF_SYSROOT"
|
|
_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
|
|
_TF_CUDA_VERSION = "TF_CUDA_VERSION"
|
|
_TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
|
|
_CUDNN_INSTALL_PATH = "CUDNN_INSTALL_PATH"
|
|
_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES"
|
|
_TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO"
|
|
_TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG"
|
|
_PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
|
|
|
|
def to_list_of_strings(elements):
|
|
"""Convert the list of ["a", "b", "c"] into '"a", "b", "c"'.
|
|
|
|
This is to be used to put a list of strings into the bzl file templates
|
|
so it gets interpreted as list of strings in Starlark.
|
|
|
|
Args:
|
|
elements: list of string elements
|
|
|
|
Returns:
|
|
single string of elements wrapped in quotes separated by a comma."""
|
|
quoted_strings = ["\"" + element + "\"" for element in elements]
|
|
return ", ".join(quoted_strings)
|
|
|
|
def verify_build_defines(params):
|
|
"""Verify all variables that crosstool/BUILD.tpl expects are substituted.
|
|
|
|
Args:
|
|
params: dict of variables that will be passed to the BUILD.tpl template.
|
|
"""
|
|
missing = []
|
|
for param in [
|
|
"cxx_builtin_include_directories",
|
|
"extra_no_canonical_prefixes_flags",
|
|
"host_compiler_path",
|
|
"host_compiler_prefix",
|
|
"host_compiler_warnings",
|
|
"linker_bin_path",
|
|
"compiler_deps",
|
|
"msvc_cl_path",
|
|
"msvc_env_include",
|
|
"msvc_env_lib",
|
|
"msvc_env_path",
|
|
"msvc_env_tmp",
|
|
"msvc_lib_path",
|
|
"msvc_link_path",
|
|
"msvc_ml_path",
|
|
"unfiltered_compile_flags",
|
|
"win_compiler_deps",
|
|
]:
|
|
if ("%{" + param + "}") not in params:
|
|
missing.append(param)
|
|
|
|
if missing:
|
|
auto_configure_fail(
|
|
"BUILD.tpl template is missing these variables: " +
|
|
str(missing) +
|
|
".\nWe only got: " +
|
|
str(params) +
|
|
".",
|
|
)
|
|
|
|
def _get_nvcc_tmp_dir_for_windows(repository_ctx):
|
|
"""Return the Windows tmp directory for nvcc to generate intermediate source files."""
|
|
escaped_tmp_dir = escape_string(
|
|
get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
|
|
"\\",
|
|
"\\\\",
|
|
),
|
|
)
|
|
return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir"
|
|
|
|
def _get_msvc_compiler(repository_ctx):
|
|
vc_path = find_vc_path(repository_ctx)
|
|
return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/")
|
|
|
|
def _get_win_cuda_defines(repository_ctx):
|
|
"""Return CROSSTOOL defines for Windows"""
|
|
|
|
# If we are not on Windows, return fake vaules for Windows specific fields.
|
|
# This ensures the CROSSTOOL file parser is happy.
|
|
if not is_windows(repository_ctx):
|
|
return {
|
|
"%{msvc_env_tmp}": "msvc_not_used",
|
|
"%{msvc_env_path}": "msvc_not_used",
|
|
"%{msvc_env_include}": "msvc_not_used",
|
|
"%{msvc_env_lib}": "msvc_not_used",
|
|
"%{msvc_cl_path}": "msvc_not_used",
|
|
"%{msvc_ml_path}": "msvc_not_used",
|
|
"%{msvc_link_path}": "msvc_not_used",
|
|
"%{msvc_lib_path}": "msvc_not_used",
|
|
}
|
|
|
|
vc_path = find_vc_path(repository_ctx)
|
|
if not vc_path:
|
|
auto_configure_fail(
|
|
"Visual C++ build tools not found on your machine." +
|
|
"Please check your installation following https://docs.bazel.build/versions/master/windows.html#using",
|
|
)
|
|
return {}
|
|
|
|
env = setup_vc_env_vars(repository_ctx, vc_path)
|
|
escaped_paths = escape_string(env["PATH"])
|
|
escaped_include_paths = escape_string(env["INCLUDE"])
|
|
escaped_lib_paths = escape_string(env["LIB"])
|
|
escaped_tmp_dir = escape_string(
|
|
get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
|
|
"\\",
|
|
"\\\\",
|
|
),
|
|
)
|
|
|
|
msvc_cl_path = get_python_bin(repository_ctx)
|
|
msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
|
|
"\\",
|
|
"/",
|
|
)
|
|
msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
|
|
"\\",
|
|
"/",
|
|
)
|
|
msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace(
|
|
"\\",
|
|
"/",
|
|
)
|
|
|
|
# nvcc will generate some temporary source files under %{nvcc_tmp_dir}
|
|
# The generated files are guaranteed to have unique name, so they can share
|
|
# the same tmp directory
|
|
escaped_cxx_include_directories = [
|
|
_get_nvcc_tmp_dir_for_windows(repository_ctx),
|
|
]
|
|
for path in escaped_include_paths.split(";"):
|
|
if path:
|
|
escaped_cxx_include_directories.append(path)
|
|
|
|
return {
|
|
"%{msvc_env_tmp}": escaped_tmp_dir,
|
|
"%{msvc_env_path}": escaped_paths,
|
|
"%{msvc_env_include}": escaped_include_paths,
|
|
"%{msvc_env_lib}": escaped_lib_paths,
|
|
"%{msvc_cl_path}": msvc_cl_path,
|
|
"%{msvc_ml_path}": msvc_ml_path,
|
|
"%{msvc_link_path}": msvc_link_path,
|
|
"%{msvc_lib_path}": msvc_lib_path,
|
|
"%{cxx_builtin_include_directories}": to_list_of_strings(
|
|
escaped_cxx_include_directories,
|
|
),
|
|
}
|
|
|
|
# TODO(dzc): Once these functions have been factored out of Bazel's
|
|
# cc_configure.bzl, load them from @bazel_tools instead.
|
|
# BEGIN cc_configure common functions.
|
|
def find_cc(repository_ctx):
|
|
"""Find the C++ compiler."""
|
|
if is_windows(repository_ctx):
|
|
return _get_msvc_compiler(repository_ctx)
|
|
|
|
if _use_cuda_clang(repository_ctx):
|
|
target_cc_name = "clang"
|
|
cc_path_envvar = _CLANG_CUDA_COMPILER_PATH
|
|
if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG):
|
|
return "extra_tools/bin/clang"
|
|
else:
|
|
target_cc_name = "gcc"
|
|
cc_path_envvar = _GCC_HOST_COMPILER_PATH
|
|
cc_name = target_cc_name
|
|
|
|
cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar)
|
|
if cc_name_from_env:
|
|
cc_name = cc_name_from_env
|
|
if cc_name.startswith("/"):
|
|
# Absolute path, maybe we should make this supported by our which function.
|
|
return cc_name
|
|
cc = which(repository_ctx, cc_name)
|
|
if cc == None:
|
|
fail(("Cannot find {}, either correct your path or set the {}" +
|
|
" environment variable").format(target_cc_name, cc_path_envvar))
|
|
return cc
|
|
|
|
_INC_DIR_MARKER_BEGIN = "#include <...>"
|
|
|
|
# OSX add " (framework directory)" at the end of line, strip it.
|
|
_OSX_FRAMEWORK_SUFFIX = " (framework directory)"
|
|
_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX)
|
|
|
|
def _cxx_inc_convert(path):
|
|
"""Convert path returned by cc -E xc++ in a complete path."""
|
|
path = path.strip()
|
|
if path.endswith(_OSX_FRAMEWORK_SUFFIX):
|
|
path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip()
|
|
return path
|
|
|
|
def _normalize_include_path(repository_ctx, path):
|
|
"""Normalizes include paths before writing them to the crosstool.
|
|
|
|
If path points inside the 'crosstool' folder of the repository, a relative
|
|
path is returned.
|
|
If path points outside the 'crosstool' folder, an absolute path is returned.
|
|
"""
|
|
path = str(repository_ctx.path(path))
|
|
crosstool_folder = str(repository_ctx.path(".").get_child("crosstool"))
|
|
|
|
if path.startswith(crosstool_folder):
|
|
# We drop the path to "$REPO/crosstool" and a trailing path separator.
|
|
return path[len(crosstool_folder) + 1:]
|
|
return path
|
|
|
|
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot):
|
|
"""Compute the list of default C or C++ include directories."""
|
|
if lang_is_cpp:
|
|
lang = "c++"
|
|
else:
|
|
lang = "c"
|
|
sysroot = []
|
|
if tf_sysroot:
|
|
sysroot += ["--sysroot", tf_sysroot]
|
|
result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] +
|
|
sysroot)
|
|
stderr = err_out(result)
|
|
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
|
|
if index1 == -1:
|
|
return []
|
|
index1 = stderr.find("\n", index1)
|
|
if index1 == -1:
|
|
return []
|
|
index2 = stderr.rfind("\n ")
|
|
if index2 == -1 or index2 < index1:
|
|
return []
|
|
index2 = stderr.find("\n", index2 + 1)
|
|
if index2 == -1:
|
|
inc_dirs = stderr[index1 + 1:]
|
|
else:
|
|
inc_dirs = stderr[index1 + 1:index2].strip()
|
|
|
|
return [
|
|
_normalize_include_path(repository_ctx, _cxx_inc_convert(p))
|
|
for p in inc_dirs.split("\n")
|
|
]
|
|
|
|
def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot):
|
|
"""Compute the list of default C and C++ include directories."""
|
|
|
|
# For some reason `clang -xc` sometimes returns include paths that are
|
|
# different from the ones from `clang -xc++`. (Symlink and a dir)
|
|
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
|
|
includes_cpp = _get_cxx_inc_directories_impl(
|
|
repository_ctx,
|
|
cc,
|
|
True,
|
|
tf_sysroot,
|
|
)
|
|
includes_c = _get_cxx_inc_directories_impl(
|
|
repository_ctx,
|
|
cc,
|
|
False,
|
|
tf_sysroot,
|
|
)
|
|
|
|
return includes_cpp + [
|
|
inc
|
|
for inc in includes_c
|
|
if inc not in includes_cpp
|
|
]
|
|
|
|
def auto_configure_fail(msg):
|
|
"""Output failure message when cuda configuration fails."""
|
|
red = "\033[0;31m"
|
|
no_color = "\033[0m"
|
|
fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg))
|
|
|
|
# END cc_configure common functions (see TODO above).
|
|
|
|
def _cuda_include_path(repository_ctx, cuda_config):
|
|
"""Generates the Starlark string with cuda include directories.
|
|
|
|
Args:
|
|
repository_ctx: The repository context.
|
|
cc: The path to the gcc host compiler.
|
|
|
|
Returns:
|
|
A list of the gcc host compiler include directories.
|
|
"""
|
|
nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % (
|
|
cuda_config.cuda_toolkit_path,
|
|
".exe" if cuda_config.cpu_value == "Windows" else "",
|
|
))
|
|
|
|
# The expected exit code of this command is non-zero. Bazel remote execution
|
|
# only caches commands with zero exit code. So force a zero exit code.
|
|
cmd = "%s -v /dev/null -o /dev/null ; [ $? -eq 1 ]" % str(nvcc_path)
|
|
result = raw_exec(repository_ctx, [get_bash_bin(repository_ctx), "-c", cmd])
|
|
target_dir = ""
|
|
for one_line in err_out(result).splitlines():
|
|
if one_line.startswith("#$ _TARGET_DIR_="):
|
|
target_dir = (
|
|
cuda_config.cuda_toolkit_path + "/" + one_line.replace(
|
|
"#$ _TARGET_DIR_=",
|
|
"",
|
|
) + "/include"
|
|
)
|
|
inc_entries = []
|
|
if target_dir != "":
|
|
inc_entries.append(realpath(repository_ctx, target_dir))
|
|
inc_entries.append(realpath(repository_ctx, cuda_config.cuda_toolkit_path + "/include"))
|
|
return inc_entries
|
|
|
|
def enable_cuda(repository_ctx):
|
|
"""Returns whether to build with CUDA support."""
|
|
return int(get_host_environ(repository_ctx, "TF_NEED_CUDA", False))
|
|
|
|
def matches_version(environ_version, detected_version):
|
|
"""Checks whether the user-specified version matches the detected version.
|
|
|
|
This function performs a weak matching so that if the user specifies only
|
|
the
|
|
major or major and minor versions, the versions are still considered
|
|
matching
|
|
if the version parts match. To illustrate:
|
|
|
|
environ_version detected_version result
|
|
-----------------------------------------
|
|
5.1.3 5.1.3 True
|
|
5.1 5.1.3 True
|
|
5 5.1 True
|
|
5.1.3 5.1 False
|
|
5.2.3 5.1.3 False
|
|
|
|
Args:
|
|
environ_version: The version specified by the user via environment
|
|
variables.
|
|
detected_version: The version autodetected from the CUDA installation on
|
|
the system.
|
|
Returns: True if user-specified version matches detected version and False
|
|
otherwise.
|
|
"""
|
|
environ_version_parts = environ_version.split(".")
|
|
detected_version_parts = detected_version.split(".")
|
|
if len(detected_version_parts) < len(environ_version_parts):
|
|
return False
|
|
for i, part in enumerate(detected_version_parts):
|
|
if i >= len(environ_version_parts):
|
|
break
|
|
if part != environ_version_parts[i]:
|
|
return False
|
|
return True
|
|
|
|
_NVCC_VERSION_PREFIX = "Cuda compilation tools, release "
|
|
|
|
_DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR"
|
|
|
|
def compute_capabilities(repository_ctx):
|
|
"""Returns a list of strings representing cuda compute capabilities.
|
|
|
|
Args:
|
|
repository_ctx: the repo rule's context.
|
|
Returns: list of cuda architectures to compile for. 'compute_xy' refers to
|
|
both PTX and SASS, 'sm_xy' refers to SASS only.
|
|
"""
|
|
capabilities = get_host_environ(
|
|
repository_ctx,
|
|
_TF_CUDA_COMPUTE_CAPABILITIES,
|
|
"compute_35,compute_52",
|
|
).split(",")
|
|
|
|
# Map old 'x.y' capabilities to 'compute_xy'.
|
|
for i, capability in enumerate(capabilities):
|
|
parts = capability.split(".")
|
|
if len(parts) != 2:
|
|
continue
|
|
capabilities[i] = "compute_%s%s" % (parts[0], parts[1])
|
|
|
|
# Make list unique
|
|
capabilities = dict(zip(capabilities, capabilities)).keys()
|
|
|
|
# Validate capabilities.
|
|
for capability in capabilities:
|
|
if not capability.startswith(("compute_", "sm_")):
|
|
auto_configure_fail("Invalid compute capability: %s" % capability)
|
|
for prefix in ["compute_", "sm_"]:
|
|
if not capability.startswith(prefix):
|
|
continue
|
|
if len(capability) == len(prefix) + 2 and capability[-2:].isdigit():
|
|
continue
|
|
auto_configure_fail("Invalid compute capability: %s" % capability)
|
|
|
|
return capabilities
|
|
|
|
def lib_name(base_name, cpu_value, version = None, static = False):
|
|
"""Constructs the platform-specific name of a library.
|
|
|
|
Args:
|
|
base_name: The name of the library, such as "cudart"
|
|
cpu_value: The name of the host operating system.
|
|
version: The version of the library.
|
|
static: True the library is static or False if it is a shared object.
|
|
|
|
Returns:
|
|
The platform-specific name of the library.
|
|
"""
|
|
version = "" if not version else "." + version
|
|
if cpu_value in ("Linux", "FreeBSD"):
|
|
if static:
|
|
return "lib%s.a" % base_name
|
|
return "lib%s.so%s" % (base_name, version)
|
|
elif cpu_value == "Windows":
|
|
return "%s.lib" % base_name
|
|
elif cpu_value == "Darwin":
|
|
if static:
|
|
return "lib%s.a" % base_name
|
|
return "lib%s%s.dylib" % (base_name, version)
|
|
else:
|
|
auto_configure_fail("Invalid cpu_value: %s" % cpu_value)
|
|
|
|
def _lib_path(lib, cpu_value, basedir, version, static):
|
|
file_name = lib_name(lib, cpu_value, version, static)
|
|
return "%s/%s" % (basedir, file_name)
|
|
|
|
def _should_check_soname(version, static):
|
|
return version and not static
|
|
|
|
def _check_cuda_lib_params(lib, cpu_value, basedir, version, static = False):
|
|
return (
|
|
_lib_path(lib, cpu_value, basedir, version, static),
|
|
_should_check_soname(version, static),
|
|
)
|
|
|
|
def _check_cuda_libs(repository_ctx, script_path, libs):
|
|
python_bin = get_python_bin(repository_ctx)
|
|
contents = repository_ctx.read(script_path).splitlines()
|
|
|
|
cmd = "from os import linesep;"
|
|
cmd += "f = open('script.py', 'w');"
|
|
for line in contents:
|
|
cmd += "f.write('%s' + linesep);" % line
|
|
cmd += "f.close();"
|
|
cmd += "from os import system;"
|
|
args = " ".join(["\"" + path + "\" " + str(check) for path, check in libs])
|
|
cmd += "system('%s script.py %s');" % (python_bin, args)
|
|
|
|
all_paths = [path for path, _ in libs]
|
|
checked_paths = execute(repository_ctx, [python_bin, "-c", cmd]).stdout.splitlines()
|
|
|
|
# Filter out empty lines from splitting on '\r\n' on Windows
|
|
checked_paths = [path for path in checked_paths if len(path) > 0]
|
|
if all_paths != checked_paths:
|
|
auto_configure_fail("Error with installed CUDA libs. Expected '%s'. Actual '%s'." % (all_paths, checked_paths))
|
|
|
|
def _find_libs(repository_ctx, check_cuda_libs_script, cuda_config):
|
|
"""Returns the CUDA and cuDNN libraries on the system.
|
|
|
|
Also, verifies that the script actually exist.
|
|
|
|
Args:
|
|
repository_ctx: The repository context.
|
|
check_cuda_libs_script: The path to a script verifying that the cuda
|
|
libraries exist on the system.
|
|
cuda_config: The CUDA config as returned by _get_cuda_config
|
|
|
|
Returns:
|
|
Map of library names to structs of filename and path.
|
|
"""
|
|
cpu_value = cuda_config.cpu_value
|
|
stub_dir = "" if is_windows(repository_ctx) else "/stubs"
|
|
|
|
check_cuda_libs_params = {
|
|
"cuda": _check_cuda_lib_params(
|
|
"cuda",
|
|
cpu_value,
|
|
cuda_config.config["cuda_library_dir"] + stub_dir,
|
|
version = None,
|
|
static = False,
|
|
),
|
|
"cudart": _check_cuda_lib_params(
|
|
"cudart",
|
|
cpu_value,
|
|
cuda_config.config["cuda_library_dir"],
|
|
cuda_config.cuda_version,
|
|
static = False,
|
|
),
|
|
"cudart_static": _check_cuda_lib_params(
|
|
"cudart_static",
|
|
cpu_value,
|
|
cuda_config.config["cuda_library_dir"],
|
|
cuda_config.cuda_version,
|
|
static = True,
|
|
),
|
|
"cublas": _check_cuda_lib_params(
|
|
"cublas",
|
|
cpu_value,
|
|
cuda_config.config["cublas_library_dir"],
|
|
cuda_config.cublas_version,
|
|
static = False,
|
|
),
|
|
"cusolver": _check_cuda_lib_params(
|
|
"cusolver",
|
|
cpu_value,
|
|
cuda_config.config["cusolver_library_dir"],
|
|
cuda_config.cusolver_version,
|
|
static = False,
|
|
),
|
|
"curand": _check_cuda_lib_params(
|
|
"curand",
|
|
cpu_value,
|
|
cuda_config.config["curand_library_dir"],
|
|
cuda_config.curand_version,
|
|
static = False,
|
|
),
|
|
"cufft": _check_cuda_lib_params(
|
|
"cufft",
|
|
cpu_value,
|
|
cuda_config.config["cufft_library_dir"],
|
|
cuda_config.cufft_version,
|
|
static = False,
|
|
),
|
|
"cudnn": _check_cuda_lib_params(
|
|
"cudnn",
|
|
cpu_value,
|
|
cuda_config.config["cudnn_library_dir"],
|
|
cuda_config.cudnn_version,
|
|
static = False,
|
|
),
|
|
"cupti": _check_cuda_lib_params(
|
|
"cupti",
|
|
cpu_value,
|
|
cuda_config.config["cupti_library_dir"],
|
|
cuda_config.cuda_version,
|
|
static = False,
|
|
),
|
|
"cusparse": _check_cuda_lib_params(
|
|
"cusparse",
|
|
cpu_value,
|
|
cuda_config.config["cusparse_library_dir"],
|
|
cuda_config.cusparse_version,
|
|
static = False,
|
|
),
|
|
}
|
|
|
|
# Verify that the libs actually exist at their locations.
|
|
_check_cuda_libs(repository_ctx, check_cuda_libs_script, check_cuda_libs_params.values())
|
|
|
|
paths = {filename: v[0] for (filename, v) in check_cuda_libs_params.items()}
|
|
return paths
|
|
|
|
def _cudart_static_linkopt(cpu_value):
|
|
"""Returns additional platform-specific linkopts for cudart."""
|
|
return "" if cpu_value == "Darwin" else "\"-lrt\","
|
|
|
|
def _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries):
|
|
python_bin = get_python_bin(repository_ctx)
|
|
|
|
# If used with remote execution then repository_ctx.execute() can't
|
|
# access files from the source tree. A trick is to read the contents
|
|
# of the file in Starlark and embed them as part of the command. In
|
|
# this case the trick is not sufficient as the find_cuda_config.py
|
|
# script has more than 8192 characters. 8192 is the command length
|
|
# limit of cmd.exe on Windows. Thus we additionally need to compress
|
|
# the contents locally and decompress them as part of the execute().
|
|
compressed_contents = repository_ctx.read(script_path)
|
|
decompress_and_execute_cmd = (
|
|
"from zlib import decompress;" +
|
|
"from base64 import b64decode;" +
|
|
"from os import system;" +
|
|
"script = decompress(b64decode('%s'));" % compressed_contents +
|
|
"f = open('script.py', 'wb');" +
|
|
"f.write(script);" +
|
|
"f.close();" +
|
|
"system('\"%s\" script.py %s');" % (python_bin, " ".join(cuda_libraries))
|
|
)
|
|
|
|
return execute(repository_ctx, [python_bin, "-c", decompress_and_execute_cmd])
|
|
|
|
# TODO(csigg): Only call once instead of from here, tensorrt_configure.bzl,
|
|
# and nccl_configure.bzl.
|
|
def find_cuda_config(repository_ctx, script_path, cuda_libraries):
|
|
"""Returns CUDA config dictionary from running find_cuda_config.py"""
|
|
exec_result = _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries)
|
|
if exec_result.return_code:
|
|
auto_configure_fail("Failed to run find_cuda_config.py: %s" % err_out(exec_result))
|
|
|
|
# Parse the dict from stdout.
|
|
return dict([tuple(x.split(": ")) for x in exec_result.stdout.splitlines()])
|
|
|
|
def _get_cuda_config(repository_ctx, find_cuda_config_script):
|
|
"""Detects and returns information about the CUDA installation on the system.
|
|
|
|
Args:
|
|
repository_ctx: The repository context.
|
|
|
|
Returns:
|
|
A struct containing the following fields:
|
|
cuda_toolkit_path: The CUDA toolkit installation directory.
|
|
cudnn_install_basedir: The cuDNN installation directory.
|
|
cuda_version: The version of CUDA on the system.
|
|
cudnn_version: The version of cuDNN on the system.
|
|
compute_capabilities: A list of the system's CUDA compute capabilities.
|
|
cpu_value: The name of the host operating system.
|
|
"""
|
|
config = find_cuda_config(repository_ctx, find_cuda_config_script, ["cuda", "cudnn"])
|
|
cpu_value = get_cpu_value(repository_ctx)
|
|
toolkit_path = config["cuda_toolkit_path"]
|
|
|
|
is_windows = cpu_value == "Windows"
|
|
cuda_version = config["cuda_version"].split(".")
|
|
cuda_major = cuda_version[0]
|
|
cuda_minor = cuda_version[1]
|
|
|
|
cuda_version = ("64_%s%s" if is_windows else "%s.%s") % (cuda_major, cuda_minor)
|
|
cudnn_version = ("64_%s" if is_windows else "%s") % config["cudnn_version"]
|
|
|
|
if int(cuda_major) >= 11:
|
|
cublas_version = ("64_%s" if is_windows else "%s") % config["cublas_version"].split(".")[0]
|
|
cusolver_version = ("64_%s" if is_windows else "%s") % config["cusolver_version"].split(".")[0]
|
|
curand_version = ("64_%s" if is_windows else "%s") % config["curand_version"].split(".")[0]
|
|
cufft_version = ("64_%s" if is_windows else "%s") % config["cufft_version"].split(".")[0]
|
|
cusparse_version = ("64_%s" if is_windows else "%s") % config["cusparse_version"].split(".")[0]
|
|
elif (int(cuda_major), int(cuda_minor)) >= (10, 1):
|
|
# cuda_lib_version is for libraries like cuBLAS, cuFFT, cuSOLVER, etc.
|
|
# It changed from 'x.y' to just 'x' in CUDA 10.1.
|
|
cuda_lib_version = ("64_%s" if is_windows else "%s") % cuda_major
|
|
cublas_version = cuda_lib_version
|
|
cusolver_version = cuda_lib_version
|
|
curand_version = cuda_lib_version
|
|
cufft_version = cuda_lib_version
|
|
cusparse_version = cuda_lib_version
|
|
else:
|
|
cublas_version = cuda_version
|
|
cusolver_version = cuda_version
|
|
curand_version = cuda_version
|
|
cufft_version = cuda_version
|
|
cusparse_version = cuda_version
|
|
|
|
return struct(
|
|
cuda_toolkit_path = toolkit_path,
|
|
cuda_version = cuda_version,
|
|
cublas_version = cublas_version,
|
|
cusolver_version = cusolver_version,
|
|
curand_version = curand_version,
|
|
cufft_version = cufft_version,
|
|
cusparse_version = cusparse_version,
|
|
cudnn_version = cudnn_version,
|
|
compute_capabilities = compute_capabilities(repository_ctx),
|
|
cpu_value = cpu_value,
|
|
config = config,
|
|
)
|
|
|
|
def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
|
|
if not out:
|
|
out = tpl.replace(":", "/")
|
|
repository_ctx.template(
|
|
out,
|
|
Label("//third_party/gpus/%s.tpl" % tpl),
|
|
substitutions,
|
|
)
|
|
|
|
def _file(repository_ctx, label):
|
|
repository_ctx.template(
|
|
label.replace(":", "/"),
|
|
Label("//third_party/gpus/%s.tpl" % label),
|
|
{},
|
|
)
|
|
|
|
_DUMMY_CROSSTOOL_BZL_FILE = """
|
|
def error_gpu_disabled():
|
|
fail("ERROR: Building with --config=cuda but TensorFlow is not configured " +
|
|
"to build with GPU support. Please re-run ./configure and enter 'Y' " +
|
|
"at the prompt to build with GPU support.")
|
|
|
|
native.genrule(
|
|
name = "error_gen_crosstool",
|
|
outs = ["CROSSTOOL"],
|
|
cmd = "echo 'Should not be run.' && exit 1",
|
|
)
|
|
|
|
native.filegroup(
|
|
name = "crosstool",
|
|
srcs = [":CROSSTOOL"],
|
|
output_licenses = ["unencumbered"],
|
|
)
|
|
"""
|
|
|
|
_DUMMY_CROSSTOOL_BUILD_FILE = """
|
|
load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled")
|
|
|
|
error_gpu_disabled()
|
|
"""
|
|
|
|
def _create_dummy_repository(repository_ctx):
|
|
cpu_value = get_cpu_value(repository_ctx)
|
|
|
|
# Set up BUILD file for cuda/.
|
|
_tpl(
|
|
repository_ctx,
|
|
"cuda:build_defs.bzl",
|
|
{
|
|
"%{cuda_is_configured}": "False",
|
|
"%{cuda_extra_copts}": "[]",
|
|
"%{cuda_gpu_architectures}": "[]",
|
|
},
|
|
)
|
|
_tpl(
|
|
repository_ctx,
|
|
"cuda:BUILD",
|
|
{
|
|
"%{cuda_driver_lib}": lib_name("cuda", cpu_value),
|
|
"%{cudart_static_lib}": lib_name(
|
|
"cudart_static",
|
|
cpu_value,
|
|
static = True,
|
|
),
|
|
"%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value),
|
|
"%{cudart_lib}": lib_name("cudart", cpu_value),
|
|
"%{cublas_lib}": lib_name("cublas", cpu_value),
|
|
"%{cusolver_lib}": lib_name("cusolver", cpu_value),
|
|
"%{cudnn_lib}": lib_name("cudnn", cpu_value),
|
|
"%{cufft_lib}": lib_name("cufft", cpu_value),
|
|
"%{curand_lib}": lib_name("curand", cpu_value),
|
|
"%{cupti_lib}": lib_name("cupti", cpu_value),
|
|
"%{cusparse_lib}": lib_name("cusparse", cpu_value),
|
|
"%{copy_rules}": """
|
|
filegroup(name="cuda-include")
|
|
filegroup(name="cublas-include")
|
|
filegroup(name="cusolver-include")
|
|
filegroup(name="cufft-include")
|
|
filegroup(name="cusparse-include")
|
|
filegroup(name="curand-include")
|
|
filegroup(name="cudnn-include")
|
|
""",
|
|
},
|
|
)
|
|
|
|
# Create dummy files for the CUDA toolkit since they are still required by
|
|
# tensorflow/core/platform/default/build_config:cuda.
|
|
repository_ctx.file("cuda/cuda/include/cuda.h")
|
|
repository_ctx.file("cuda/cuda/include/cublas.h")
|
|
repository_ctx.file("cuda/cuda/include/cudnn.h")
|
|
repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h")
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cuda", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudart", cpu_value))
|
|
repository_ctx.file(
|
|
"cuda/cuda/lib/%s" % lib_name("cudart_static", cpu_value),
|
|
)
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cublas", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusolver", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cudnn", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("curand", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cufft", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cupti", cpu_value))
|
|
repository_ctx.file("cuda/cuda/lib/%s" % lib_name("cusparse", cpu_value))
|
|
|
|
# Set up cuda_config.h, which is used by
|
|
# tensorflow/stream_executor/dso_loader.cc.
|
|
_tpl(
|
|
repository_ctx,
|
|
"cuda:cuda_config.h",
|
|
{
|
|
"%{cuda_version}": "",
|
|
"%{cublas_version}": "",
|
|
"%{cusolver_version}": "",
|
|
"%{curand_version}": "",
|
|
"%{cufft_version}": "",
|
|
"%{cusparse_version}": "",
|
|
"%{cudnn_version}": "",
|
|
"%{cuda_toolkit_path}": "",
|
|
},
|
|
"cuda/cuda/cuda_config.h",
|
|
)
|
|
|
|
# Set up cuda_config.py, which is used by gen_build_info to provide
|
|
# static build environment info to the API
|
|
_tpl(
|
|
repository_ctx,
|
|
"cuda:cuda_config.py",
|
|
_py_tmpl_dict({}),
|
|
"cuda/cuda/cuda_config.py",
|
|
)
|
|
|
|
# If cuda_configure is not configured to build with GPU support, and the user
|
|
# attempts to build with --config=cuda, add a dummy build rule to intercept
|
|
# this and fail with an actionable error message.
|
|
repository_ctx.file(
|
|
"crosstool/error_gpu_disabled.bzl",
|
|
_DUMMY_CROSSTOOL_BZL_FILE,
|
|
)
|
|
repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE)
|
|
|
|
def _norm_path(path):
|
|
"""Returns a path with '/' and remove the trailing slash."""
|
|
path = path.replace("\\", "/")
|
|
if path[-1] == "/":
|
|
path = path[:-1]
|
|
return path
|
|
|
|
def make_copy_files_rule(repository_ctx, name, srcs, outs):
|
|
"""Returns a rule to copy a set of files."""
|
|
cmds = []
|
|
|
|
# Copy files.
|
|
for src, out in zip(srcs, outs):
|
|
cmds.append('cp -f "%s" "$(location %s)"' % (src, out))
|
|
outs = [(' "%s",' % out) for out in outs]
|
|
return """genrule(
|
|
name = "%s",
|
|
outs = [
|
|
%s
|
|
],
|
|
cmd = \"""%s \""",
|
|
)""" % (name, "\n".join(outs), " && \\\n".join(cmds))
|
|
|
|
def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir, exceptions = None):
|
|
"""Returns a rule to recursively copy a directory.
|
|
If exceptions is not None, it must be a list of files or directories in
|
|
'src_dir'; these will be excluded from copying.
|
|
"""
|
|
src_dir = _norm_path(src_dir)
|
|
out_dir = _norm_path(out_dir)
|
|
outs = read_dir(repository_ctx, src_dir)
|
|
post_cmd = ""
|
|
if exceptions != None:
|
|
outs = [x for x in outs if not any([
|
|
x.startswith(src_dir + "/" + y)
|
|
for y in exceptions
|
|
])]
|
|
outs = [(' "%s",' % out.replace(src_dir, out_dir)) for out in outs]
|
|
|
|
# '@D' already contains the relative path for a single file, see
|
|
# http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables
|
|
out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)"
|
|
if exceptions != None:
|
|
for x in exceptions:
|
|
post_cmd += " ; rm -fR " + out_dir + "/" + x
|
|
return """genrule(
|
|
name = "%s",
|
|
outs = [
|
|
%s
|
|
],
|
|
cmd = \"""cp -rLf "%s/." "%s/" %s\""",
|
|
)""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd)
|
|
|
|
def _flag_enabled(repository_ctx, flag_name):
|
|
return get_host_environ(repository_ctx, flag_name) == "1"
|
|
|
|
def _use_cuda_clang(repository_ctx):
|
|
return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
|
|
|
|
def _tf_sysroot(repository_ctx):
|
|
return get_host_environ(repository_ctx, _TF_SYSROOT, "")
|
|
|
|
def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
|
|
copts = []
|
|
for capability in compute_capabilities:
|
|
if capability.startswith("compute_"):
|
|
capability = capability.replace("compute_", "sm_")
|
|
copts.append("--cuda-include-ptx=%s" % capability)
|
|
copts.append("--cuda-gpu-arch=%s" % capability)
|
|
|
|
return str(copts)
|
|
|
|
def _tpl_path(repository_ctx, filename):
|
|
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
|
|
|
|
def _basename(repository_ctx, path_str):
|
|
"""Returns the basename of a path of type string.
|
|
|
|
This method is different from path.basename in that it also works if
|
|
the host platform is different from the execution platform
|
|
i.e. linux -> windows.
|
|
"""
|
|
|
|
num_chars = len(path_str)
|
|
is_win = is_windows(repository_ctx)
|
|
for i in range(num_chars):
|
|
r_i = num_chars - 1 - i
|
|
if (is_win and path_str[r_i] == "\\") or path_str[r_i] == "/":
|
|
return path_str[r_i + 1:]
|
|
return path_str
|
|
|
|
def _create_local_cuda_repository(repository_ctx):
|
|
"""Creates the repository containing files set up to build with CUDA."""
|
|
|
|
# Resolve all labels before doing any real work. Resolving causes the
|
|
# function to be restarted with all previous state being lost. This
|
|
# can easily lead to a O(n^2) runtime in the number of labels.
|
|
# See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778
|
|
tpl_paths = {filename: _tpl_path(repository_ctx, filename) for filename in [
|
|
"cuda:build_defs.bzl",
|
|
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
|
"crosstool:windows/msvc_wrapper_for_nvcc.py",
|
|
"crosstool:BUILD",
|
|
"crosstool:cc_toolchain_config.bzl",
|
|
"cuda:cuda_config.h",
|
|
"cuda:cuda_config.py",
|
|
]}
|
|
tpl_paths["cuda:BUILD"] = _tpl_path(repository_ctx, "cuda:BUILD.windows" if is_windows(repository_ctx) else "cuda:BUILD")
|
|
find_cuda_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64"))
|
|
|
|
cuda_config = _get_cuda_config(repository_ctx, find_cuda_config_script)
|
|
|
|
cuda_include_path = cuda_config.config["cuda_include_dir"]
|
|
cublas_include_path = cuda_config.config["cublas_include_dir"]
|
|
cudnn_header_dir = cuda_config.config["cudnn_include_dir"]
|
|
cupti_header_dir = cuda_config.config["cupti_include_dir"]
|
|
nvvm_libdevice_dir = cuda_config.config["nvvm_library_dir"]
|
|
|
|
# Create genrule to copy files from the installed CUDA toolkit into execroot.
|
|
copy_rules = [
|
|
make_copy_dir_rule(
|
|
repository_ctx,
|
|
name = "cuda-include",
|
|
src_dir = cuda_include_path,
|
|
out_dir = "cuda/include",
|
|
),
|
|
make_copy_dir_rule(
|
|
repository_ctx,
|
|
name = "cuda-nvvm",
|
|
src_dir = nvvm_libdevice_dir,
|
|
out_dir = "cuda/nvvm/libdevice",
|
|
),
|
|
make_copy_dir_rule(
|
|
repository_ctx,
|
|
name = "cuda-extras",
|
|
src_dir = cupti_header_dir,
|
|
out_dir = "cuda/extras/CUPTI/include",
|
|
),
|
|
]
|
|
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cublas-include",
|
|
srcs = [
|
|
cublas_include_path + "/cublas.h",
|
|
cublas_include_path + "/cublas_v2.h",
|
|
cublas_include_path + "/cublas_api.h",
|
|
],
|
|
outs = [
|
|
"cublas/include/cublas.h",
|
|
"cublas/include/cublas_v2.h",
|
|
"cublas/include/cublas_api.h",
|
|
],
|
|
))
|
|
|
|
cusolver_include_path = cuda_config.config["cusolver_include_dir"]
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cusolver-include",
|
|
srcs = [
|
|
cusolver_include_path + "/cusolver_common.h",
|
|
cusolver_include_path + "/cusolverDn.h",
|
|
],
|
|
outs = [
|
|
"cusolver/include/cusolver_common.h",
|
|
"cusolver/include/cusolverDn.h",
|
|
],
|
|
))
|
|
|
|
cufft_include_path = cuda_config.config["cufft_include_dir"]
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cufft-include",
|
|
srcs = [
|
|
cufft_include_path + "/cufft.h",
|
|
],
|
|
outs = [
|
|
"cufft/include/cufft.h",
|
|
],
|
|
))
|
|
|
|
cusparse_include_path = cuda_config.config["cusparse_include_dir"]
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cusparse-include",
|
|
srcs = [
|
|
cusparse_include_path + "/cusparse.h",
|
|
],
|
|
outs = [
|
|
"cusparse/include/cusparse.h",
|
|
],
|
|
))
|
|
|
|
curand_include_path = cuda_config.config["curand_include_dir"]
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "curand-include",
|
|
srcs = [
|
|
curand_include_path + "/curand.h",
|
|
],
|
|
outs = [
|
|
"curand/include/curand.h",
|
|
],
|
|
))
|
|
|
|
check_cuda_libs_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:check_cuda_libs.py"))
|
|
cuda_libs = _find_libs(repository_ctx, check_cuda_libs_script, cuda_config)
|
|
cuda_lib_srcs = []
|
|
cuda_lib_outs = []
|
|
for path in cuda_libs.values():
|
|
cuda_lib_srcs.append(path)
|
|
cuda_lib_outs.append("cuda/lib/" + _basename(repository_ctx, path))
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cuda-lib",
|
|
srcs = cuda_lib_srcs,
|
|
outs = cuda_lib_outs,
|
|
))
|
|
|
|
# copy files mentioned in third_party/nccl/build_defs.bzl.tpl
|
|
file_ext = ".exe" if is_windows(repository_ctx) else ""
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cuda-bin",
|
|
srcs = [
|
|
cuda_config.cuda_toolkit_path + "/bin/" + "crt/link.stub",
|
|
cuda_config.cuda_toolkit_path + "/bin/" + "nvlink" + file_ext,
|
|
cuda_config.cuda_toolkit_path + "/bin/" + "fatbinary" + file_ext,
|
|
cuda_config.cuda_toolkit_path + "/bin/" + "bin2c" + file_ext,
|
|
],
|
|
outs = [
|
|
"cuda/bin/" + "crt/link.stub",
|
|
"cuda/bin/" + "nvlink" + file_ext,
|
|
"cuda/bin/" + "fatbinary" + file_ext,
|
|
"cuda/bin/" + "bin2c" + file_ext,
|
|
],
|
|
))
|
|
|
|
# Select the headers based on the cuDNN version (strip '64_' for Windows).
|
|
cudnn_headers = ["cudnn.h"]
|
|
if cuda_config.cudnn_version.rsplit("_", 1)[0] >= "8":
|
|
cudnn_headers += [
|
|
"cudnn_backend.h",
|
|
"cudnn_adv_infer.h",
|
|
"cudnn_adv_train.h",
|
|
"cudnn_cnn_infer.h",
|
|
"cudnn_cnn_train.h",
|
|
"cudnn_ops_infer.h",
|
|
"cudnn_ops_train.h",
|
|
"cudnn_version.h",
|
|
]
|
|
|
|
cudnn_srcs = []
|
|
cudnn_outs = []
|
|
for header in cudnn_headers:
|
|
cudnn_srcs.append(cudnn_header_dir + "/" + header)
|
|
cudnn_outs.append("cudnn/include/" + header)
|
|
|
|
copy_rules.append(make_copy_files_rule(
|
|
repository_ctx,
|
|
name = "cudnn-include",
|
|
srcs = cudnn_srcs,
|
|
outs = cudnn_outs,
|
|
))
|
|
|
|
# Set up BUILD file for cuda/
|
|
repository_ctx.template(
|
|
"cuda/build_defs.bzl",
|
|
tpl_paths["cuda:build_defs.bzl"],
|
|
{
|
|
"%{cuda_is_configured}": "True",
|
|
"%{cuda_extra_copts}": _compute_cuda_extra_copts(
|
|
repository_ctx,
|
|
cuda_config.compute_capabilities,
|
|
),
|
|
"%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities),
|
|
},
|
|
)
|
|
|
|
repository_ctx.template(
|
|
"cuda/BUILD",
|
|
tpl_paths["cuda:BUILD"],
|
|
{
|
|
"%{cuda_driver_lib}": _basename(repository_ctx, cuda_libs["cuda"]),
|
|
"%{cudart_static_lib}": _basename(repository_ctx, cuda_libs["cudart_static"]),
|
|
"%{cudart_static_linkopt}": _cudart_static_linkopt(cuda_config.cpu_value),
|
|
"%{cudart_lib}": _basename(repository_ctx, cuda_libs["cudart"]),
|
|
"%{cublas_lib}": _basename(repository_ctx, cuda_libs["cublas"]),
|
|
"%{cusolver_lib}": _basename(repository_ctx, cuda_libs["cusolver"]),
|
|
"%{cudnn_lib}": _basename(repository_ctx, cuda_libs["cudnn"]),
|
|
"%{cufft_lib}": _basename(repository_ctx, cuda_libs["cufft"]),
|
|
"%{curand_lib}": _basename(repository_ctx, cuda_libs["curand"]),
|
|
"%{cupti_lib}": _basename(repository_ctx, cuda_libs["cupti"]),
|
|
"%{cusparse_lib}": _basename(repository_ctx, cuda_libs["cusparse"]),
|
|
"%{copy_rules}": "\n".join(copy_rules),
|
|
},
|
|
)
|
|
|
|
is_cuda_clang = _use_cuda_clang(repository_ctx)
|
|
tf_sysroot = _tf_sysroot(repository_ctx)
|
|
|
|
should_download_clang = is_cuda_clang and _flag_enabled(
|
|
repository_ctx,
|
|
_TF_DOWNLOAD_CLANG,
|
|
)
|
|
if should_download_clang:
|
|
download_clang(repository_ctx, "crosstool/extra_tools")
|
|
|
|
# Set up crosstool/
|
|
cc = find_cc(repository_ctx)
|
|
cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
|
|
|
|
host_compiler_includes = get_cxx_inc_directories(
|
|
repository_ctx,
|
|
cc_fullpath,
|
|
tf_sysroot,
|
|
)
|
|
cuda_defines = {}
|
|
cuda_defines["%{builtin_sysroot}"] = tf_sysroot
|
|
cuda_defines["%{cuda_toolkit_path}"] = ""
|
|
cuda_defines["%{compiler}"] = "unknown"
|
|
if is_cuda_clang:
|
|
cuda_defines["%{cuda_toolkit_path}"] = cuda_config.config["cuda_toolkit_path"]
|
|
cuda_defines["%{compiler}"] = "clang"
|
|
|
|
host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX)
|
|
if not host_compiler_prefix:
|
|
host_compiler_prefix = "/usr/bin"
|
|
|
|
cuda_defines["%{host_compiler_prefix}"] = host_compiler_prefix
|
|
|
|
# Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see
|
|
# https://github.com/bazelbuild/bazel/issues/760).
|
|
# However, this stops our custom clang toolchain from picking the provided
|
|
# LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded
|
|
# toolchain.
|
|
# TODO: when bazel stops adding '-B/usr/bin' by default, remove this
|
|
# flag from the CROSSTOOL completely (see
|
|
# https://github.com/bazelbuild/bazel/issues/5634)
|
|
if should_download_clang:
|
|
cuda_defines["%{linker_bin_path}"] = ""
|
|
else:
|
|
cuda_defines["%{linker_bin_path}"] = host_compiler_prefix
|
|
|
|
cuda_defines["%{extra_no_canonical_prefixes_flags}"] = ""
|
|
cuda_defines["%{unfiltered_compile_flags}"] = ""
|
|
if is_cuda_clang:
|
|
cuda_defines["%{host_compiler_path}"] = str(cc)
|
|
cuda_defines["%{host_compiler_warnings}"] = """
|
|
# Some parts of the codebase set -Werror and hit this warning, so
|
|
# switch it off for now.
|
|
"-Wno-invalid-partial-specialization"
|
|
"""
|
|
cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(host_compiler_includes)
|
|
cuda_defines["%{compiler_deps}"] = ":empty"
|
|
cuda_defines["%{win_compiler_deps}"] = ":empty"
|
|
repository_ctx.file(
|
|
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
|
"",
|
|
)
|
|
repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
|
|
else:
|
|
cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
|
|
cuda_defines["%{host_compiler_warnings}"] = ""
|
|
|
|
# nvcc has the system include paths built in and will automatically
|
|
# search them; we cannot work around that, so we add the relevant cuda
|
|
# system paths to the allowed compiler specific include paths.
|
|
cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(
|
|
host_compiler_includes + _cuda_include_path(
|
|
repository_ctx,
|
|
cuda_config,
|
|
) + [cupti_header_dir, cudnn_header_dir],
|
|
)
|
|
|
|
# For gcc, do not canonicalize system header paths; some versions of gcc
|
|
# pick the shortest possible path for system includes when creating the
|
|
# .d file - given that includes that are prefixed with "../" multiple
|
|
# time quickly grow longer than the root of the tree, this can lead to
|
|
# bazel's header check failing.
|
|
cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\""
|
|
|
|
file_ext = ".exe" if is_windows(repository_ctx) else ""
|
|
nvcc_path = "%s/nvcc%s" % (cuda_config.config["cuda_binary_dir"], file_ext)
|
|
cuda_defines["%{compiler_deps}"] = ":crosstool_wrapper_driver_is_not_gcc"
|
|
cuda_defines["%{win_compiler_deps}"] = ":windows_msvc_wrapper_files"
|
|
|
|
wrapper_defines = {
|
|
"%{cpu_compiler}": str(cc),
|
|
"%{cuda_version}": cuda_config.cuda_version,
|
|
"%{nvcc_path}": nvcc_path,
|
|
"%{gcc_host_compiler_path}": str(cc),
|
|
"%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx),
|
|
}
|
|
repository_ctx.template(
|
|
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
|
tpl_paths["crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc"],
|
|
wrapper_defines,
|
|
)
|
|
repository_ctx.template(
|
|
"crosstool/windows/msvc_wrapper_for_nvcc.py",
|
|
tpl_paths["crosstool:windows/msvc_wrapper_for_nvcc.py"],
|
|
wrapper_defines,
|
|
)
|
|
|
|
cuda_defines.update(_get_win_cuda_defines(repository_ctx))
|
|
|
|
verify_build_defines(cuda_defines)
|
|
|
|
# Only expand template variables in the BUILD file
|
|
repository_ctx.template(
|
|
"crosstool/BUILD",
|
|
tpl_paths["crosstool:BUILD"],
|
|
cuda_defines,
|
|
)
|
|
|
|
# No templating of cc_toolchain_config - use attributes and templatize the
|
|
# BUILD file.
|
|
repository_ctx.template(
|
|
"crosstool/cc_toolchain_config.bzl",
|
|
tpl_paths["crosstool:cc_toolchain_config.bzl"],
|
|
{},
|
|
)
|
|
|
|
# Set up cuda_config.h, which is used by
|
|
# tensorflow/stream_executor/dso_loader.cc.
|
|
repository_ctx.template(
|
|
"cuda/cuda/cuda_config.h",
|
|
tpl_paths["cuda:cuda_config.h"],
|
|
{
|
|
"%{cuda_version}": cuda_config.cuda_version,
|
|
"%{cublas_version}": cuda_config.cublas_version,
|
|
"%{cusolver_version}": cuda_config.cusolver_version,
|
|
"%{curand_version}": cuda_config.curand_version,
|
|
"%{cufft_version}": cuda_config.cufft_version,
|
|
"%{cusparse_version}": cuda_config.cusparse_version,
|
|
"%{cudnn_version}": cuda_config.cudnn_version,
|
|
"%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path,
|
|
},
|
|
)
|
|
|
|
# Set up cuda_config.py, which is used by gen_build_info to provide
|
|
# static build environment info to the API
|
|
repository_ctx.template(
|
|
"cuda/cuda/cuda_config.py",
|
|
tpl_paths["cuda:cuda_config.py"],
|
|
_py_tmpl_dict({
|
|
"cuda_version": cuda_config.cuda_version,
|
|
"cudnn_version": cuda_config.cudnn_version,
|
|
"cuda_compute_capabilities": cuda_config.compute_capabilities,
|
|
"cpu_compiler": str(cc),
|
|
}),
|
|
)
|
|
|
|
def _py_tmpl_dict(d):
|
|
return {"%{cuda_config}": str(d)}
|
|
|
|
def _create_remote_cuda_repository(repository_ctx, remote_config_repo):
|
|
"""Creates pointers to a remotely configured repo set up to build with CUDA."""
|
|
_tpl(
|
|
repository_ctx,
|
|
"cuda:build_defs.bzl",
|
|
{
|
|
"%{cuda_is_configured}": "True",
|
|
"%{cuda_extra_copts}": _compute_cuda_extra_copts(
|
|
repository_ctx,
|
|
compute_capabilities(repository_ctx),
|
|
),
|
|
},
|
|
)
|
|
repository_ctx.template(
|
|
"cuda/BUILD",
|
|
config_repo_label(remote_config_repo, "cuda:BUILD"),
|
|
{},
|
|
)
|
|
repository_ctx.template(
|
|
"cuda/build_defs.bzl",
|
|
config_repo_label(remote_config_repo, "cuda:build_defs.bzl"),
|
|
{},
|
|
)
|
|
repository_ctx.template(
|
|
"cuda/cuda/cuda_config.h",
|
|
config_repo_label(remote_config_repo, "cuda:cuda/cuda_config.h"),
|
|
{},
|
|
)
|
|
repository_ctx.template(
|
|
"cuda/cuda/cuda_config.py",
|
|
config_repo_label(remote_config_repo, "cuda:cuda/cuda_config.py"),
|
|
_py_tmpl_dict({}),
|
|
)
|
|
|
|
repository_ctx.template(
|
|
"crosstool/BUILD",
|
|
config_repo_label(remote_config_repo, "crosstool:BUILD"),
|
|
{},
|
|
)
|
|
|
|
repository_ctx.template(
|
|
"crosstool/cc_toolchain_config.bzl",
|
|
config_repo_label(remote_config_repo, "crosstool:cc_toolchain_config.bzl"),
|
|
{},
|
|
)
|
|
|
|
repository_ctx.template(
|
|
"crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc",
|
|
config_repo_label(remote_config_repo, "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc"),
|
|
{},
|
|
)
|
|
|
|
def _cuda_autoconf_impl(repository_ctx):
|
|
"""Implementation of the cuda_autoconf repository rule."""
|
|
if not enable_cuda(repository_ctx):
|
|
_create_dummy_repository(repository_ctx)
|
|
elif get_host_environ(repository_ctx, _TF_CUDA_CONFIG_REPO) != None:
|
|
has_cuda_version = get_host_environ(repository_ctx, _TF_CUDA_VERSION) != None
|
|
has_cudnn_version = get_host_environ(repository_ctx, _TF_CUDNN_VERSION) != None
|
|
if not has_cuda_version or not has_cudnn_version:
|
|
auto_configure_fail("%s and %s must also be set if %s is specified" %
|
|
(_TF_CUDA_VERSION, _TF_CUDNN_VERSION, _TF_CUDA_CONFIG_REPO))
|
|
_create_remote_cuda_repository(
|
|
repository_ctx,
|
|
get_host_environ(repository_ctx, _TF_CUDA_CONFIG_REPO),
|
|
)
|
|
else:
|
|
_create_local_cuda_repository(repository_ctx)
|
|
|
|
_ENVIRONS = [
|
|
_GCC_HOST_COMPILER_PATH,
|
|
_GCC_HOST_COMPILER_PREFIX,
|
|
_CLANG_CUDA_COMPILER_PATH,
|
|
"TF_NEED_CUDA",
|
|
"TF_CUDA_CLANG",
|
|
_TF_DOWNLOAD_CLANG,
|
|
_CUDA_TOOLKIT_PATH,
|
|
_CUDNN_INSTALL_PATH,
|
|
_TF_CUDA_VERSION,
|
|
_TF_CUDNN_VERSION,
|
|
_TF_CUDA_COMPUTE_CAPABILITIES,
|
|
"NVVMIR_LIBRARY_DIR",
|
|
_PYTHON_BIN_PATH,
|
|
"TMP",
|
|
"TMPDIR",
|
|
"TF_CUDA_PATHS",
|
|
]
|
|
|
|
remote_cuda_configure = repository_rule(
|
|
implementation = _create_local_cuda_repository,
|
|
environ = _ENVIRONS,
|
|
remotable = True,
|
|
attrs = {
|
|
"environ": attr.string_dict(),
|
|
},
|
|
)
|
|
|
|
cuda_configure = repository_rule(
|
|
implementation = _cuda_autoconf_impl,
|
|
environ = _ENVIRONS + [_TF_CUDA_CONFIG_REPO],
|
|
)
|
|
"""Detects and configures the local CUDA toolchain.
|
|
|
|
Add the following to your WORKSPACE FILE:
|
|
|
|
```python
|
|
cuda_configure(name = "local_config_cuda")
|
|
```
|
|
|
|
Args:
|
|
name: A unique name for this workspace rule.
|
|
"""
|